Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
d702fc24
Commit
d702fc24
authored
Nov 15, 2018
by
eckhart
Browse files
Corrections of mypy-TypeErrors
parent
4b248f94
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
d702fc24
...
...
@@ -39,7 +39,7 @@ import os
import
re
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
RootNode
,
StrictResultType
from
DHParser.syntaxtree
import
Node
,
RootNode
,
ZOMBIE_ROOTNODE
,
StrictResultType
from
DHParser.transform
import
TransformationFunc
from
DHParser.parse
import
Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
...
...
@@ -97,7 +97,7 @@ class Compiler:
self
.
_reset
()
def
_reset
(
self
):
self
.
tree
=
None
# type:
Optional[
RootNode
]
self
.
tree
=
ZOMBIE_ROOTNODE
# type: RootNode
self
.
context
=
[]
# type: List[Node]
self
.
_dirty_flag
=
False
...
...
@@ -116,7 +116,7 @@ class Compiler:
result
=
self
.
compile
(
root
)
return
result
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
"""
Changes the grammar's name and the grammar's source.
...
...
@@ -219,7 +219,7 @@ def compile_source(source: str,
parser
:
Grammar
,
# str -> Node (concrete syntax tree (CST))
transformer
:
TransformationFunc
,
# Node (CST) -> Node (abstract syntax tree (AST))
compiler
:
Compiler
,
# Node (AST) -> Any
preserve_ast
:
bool
=
False
)
->
Tuple
[
Any
,
List
[
Error
],
Node
]:
preserve_ast
:
bool
=
False
)
->
Tuple
[
Optional
[
Any
]
,
List
[
Error
],
Optional
[
Node
]
]
:
"""
Compiles a source in four stages:
1. Pre-Processing (if needed)
...
...
@@ -259,7 +259,7 @@ def compile_source(source: str,
source_mapping
=
lambda
i
:
i
else
:
source_text
,
source_mapping
=
with_source_mapping
(
preprocessor
(
original_text
))
syntax_tree
=
parser
(
source_text
)
syntax_tree
=
parser
(
source_text
)
# type: RootNode
if
is_logging
():
log_ST
(
syntax_tree
,
log_file_name
+
'.cst'
)
log_parsing_history
(
parser
,
log_file_name
)
...
...
DHParser/ebnf.py
View file @
d702fc24
...
...
@@ -247,6 +247,7 @@ EBNF_AST_transformation_table = {
def
EBNFTransform
()
->
TransformationFunc
:
return
partial
(
traverse
,
processing_table
=
EBNF_AST_transformation_table
.
copy
())
def
get_ebnf_transformer
()
->
TransformationFunc
:
global
thread_local_EBNF_transformer_singleton
try
:
...
...
@@ -550,11 +551,11 @@ class EBNFCompiler(Compiler):
# add EBNF grammar to the doc string of the parser class
article
=
'an '
if
self
.
grammar_name
[
0
:
1
]
in
"AaEeIiOoUu"
else
'a '
# what about 'hour', 'universe' etc.?
declarations
=
[
'class '
+
self
.
grammar_name
+
'Grammar(Grammar):'
,
'r"""Parser for '
+
article
+
self
.
grammar_name
+
' source file'
+
(
', with this grammar:'
if
self
.
grammar_source
else
'.'
)]
declarations
=
[
'class '
+
self
.
grammar_name
+
'Grammar(Grammar):'
,
'r"""Parser for '
+
article
+
self
.
grammar_name
+
' source file'
+
(
', with this grammar:'
if
self
.
grammar_source
else
'.'
)]
definitions
.
append
((
'parser_initialization__'
,
'"upon instantiation"'
))
if
self
.
grammar_source
:
definitions
.
append
((
'source_hash__'
,
...
...
@@ -833,7 +834,7 @@ class EBNFCompiler(Compiler):
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node
.
children
[
1
].
result
=
(
Node
(
node
.
children
[
1
].
parser
,
node
.
children
[
1
].
result
),)
\
+
node
.
children
[
2
:]
+
node
.
children
[
2
:]
node
.
children
[
1
].
parser
=
node
.
parser
node
.
result
=
(
node
.
children
[
0
],
node
.
children
[
1
])
...
...
DHParser/log.py
View file @
d702fc24
...
...
@@ -55,7 +55,7 @@ import os
from
DHParser.error
import
line_col
from
DHParser.stringview
import
StringView
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
from
DHParser.syntaxtree
import
Node
from
DHParser.toolkit
import
is_filename
,
escape_control_characters
,
typing
from
typing
import
List
,
Tuple
,
Union
...
...
@@ -210,7 +210,7 @@ class HistoryRecord:
HTML_LEAD_IN
=
(
'<!DOCTYPE html>
\n
'
'<html>
\n
<head>
\n
<meta charset="utf-8"/>
\n
<style>
\n
'
'td,th {font-family:monospace; '
'border-right: thin solid grey; border-bottom: thin solid grey}
\n
'
'border-right: thin solid grey; border-bottom: thin solid grey}
\n
'
'td.line, td.column {color:darkgrey}
\n
'
# 'td.stack {}\n'
'td.status {font-weight:bold}
\n
'
'td.text {color:darkblue}
\n
'
...
...
@@ -236,7 +236,7 @@ class HistoryRecord:
def
__str__
(
self
):
return
'%4i, %2i: %s; %s; "%s"'
%
self
.
as_tuple
()
def
as_tuple
(
self
)
->
Snapshot
:
def
as_tuple
(
self
)
->
'
Snapshot
'
:
"""
Returns history record formatted as a snapshot tuple.
"""
...
...
@@ -294,7 +294,6 @@ class HistoryRecord:
def
status
(
self
)
->
str
:
return
self
.
FAIL
if
self
.
node
is
None
else
\
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
errors
else
self
.
MATCH
# has_errors(self.node._errors)
@
property
def
excerpt
(
self
):
...
...
@@ -344,8 +343,8 @@ class HistoryRecord:
remaining
=
-
1
result
=
None
for
record
in
history
:
if
(
record
.
status
==
HistoryRecord
.
MATCH
and
(
record
.
remaining
<
remaining
or
remaining
<
0
)):
if
(
record
.
status
==
HistoryRecord
.
MATCH
and
(
record
.
remaining
<
remaining
or
remaining
<
0
)):
result
=
record
remaining
=
record
.
remaining
return
result
...
...
@@ -376,7 +375,7 @@ LOG_SIZE_THRESHOLD = 10000 # maximum number of history records to log
LOG_TAIL_THRESHOLD
=
500
# maximum number of history recors for "tail log"
def
log_parsing_history
(
grammar
,
log_file_name
:
str
=
''
,
html
:
bool
=
True
)
->
None
:
def
log_parsing_history
(
grammar
,
log_file_name
:
str
=
''
,
html
:
bool
=
True
)
->
None
:
"""
Writes a log of the parsing history of the most recently parsed document.
...
...
@@ -415,8 +414,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
if
not
is_logging
():
raise
AssertionError
(
"Cannot log history when logging is turned off!"
)
# assert self.history__, \
# "Parser did not yet run or logging was turned off when running parser!"
if
not
log_file_name
:
name
=
grammar
.
__class__
.
__name__
log_file_name
=
name
[:
-
7
]
if
name
.
lower
().
endswith
(
'grammar'
)
else
name
...
...
@@ -424,35 +422,23 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
log_file_name
=
log_file_name
[:
-
4
]
full_history
=
[
'<h1>Full parsing history of "%s"</h1>'
%
log_file_name
]
# type: List[str]
# match_history = ['<h1>Match history of parsing "%s"</h1>' % log_file_name] # type: List[str]
# errors_only = ['<h1>Errors when parsing "%s"</h1>' % log_file_name] # type: List[str]
if
len
(
grammar
.
history__
)
>
LOG_SIZE_THRESHOLD
:
warning
=
(
'Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
%
(
len
(
grammar
.
history__
)
//
1000
,
LOG_SIZE_THRESHOLD
//
1000
))
%
(
len
(
grammar
.
history__
)
//
1000
,
LOG_SIZE_THRESHOLD
//
1000
))
html_warning
=
'<p><strong>'
+
warning
+
'</strong></p>'
full_history
.
append
(
html_warning
)
# match_history.append(html_warning)
# errors_only.append(html_warning)
lead_in
=
'
\n
'
.
join
([
'<table>'
,
HistoryRecord
.
COLGROUP
,
HistoryRecord
.
HEADINGS
])
full_history
.
append
(
lead_in
)
# match_history.append(lead_in)
# errors_only.append(lead_in)
for
record
in
grammar
.
history__
[
-
LOG_SIZE_THRESHOLD
:]:
line
=
record
.
as_html_tr
()
if
html
else
str
(
record
)
append_line
(
full_history
,
line
)
# if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
# append_line(match_history, line)
# if record.node.errors:
# append_line(errors_only, line)
write_log
(
full_history
,
log_file_name
+
'_full'
)
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
heading
=
'<h1>Last 500 records of parsing history of "%s"</h1>'
%
log_file_name
+
lead_in
write_log
([
heading
]
+
full_history
[
-
LOG_TAIL_THRESHOLD
:],
log_file_name
+
'_full.tail'
)
# write_log(match_history, log_file_name + '_match')
# if (len(errors_only) > 3 or (len(grammar.history__) <= LOG_SIZE_THRESHOLD
# and len(errors_only) > 2)):
# write_log(errors_only, log_file_name + '_errors')
DHParser/parse.py
View file @
d702fc24
...
...
@@ -40,7 +40,7 @@ from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
RootNode
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
from
typing
import
Callable
,
cast
,
Dict
,
DefaultDict
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
typing
import
Callable
,
cast
,
List
,
Tuple
,
Set
,
Dict
,
DefaultDict
,
Union
,
Optional
__all__
=
(
'Parser'
,
...
...
@@ -263,7 +263,7 @@ class Parser(ParserBase):
"""
duplicate
=
self
.
__class__
()
duplicate
.
name
=
self
.
name
duplicate
.
ptype
=
self
.
ptype
duplicate
.
ptype
=
self
.
ptype
return
duplicate
def
reset
(
self
):
...
...
@@ -271,7 +271,7 @@ class Parser(ParserBase):
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self
.
visited
=
dict
()
# type: Dict[int, Tuple[Optional[Node], StringView]]
self
.
recursion_counter
=
defaultdict
(
lambda
:
0
)
# type: DefaultDict[int, int]
self
.
recursion_counter
=
defaultdict
(
lambda
:
0
)
# type: DefaultDict[int, int]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
...
...
@@ -293,7 +293,10 @@ class Parser(ParserBase):
@
property
def
grammar
(
self
)
->
'Grammar'
:
return
self
.
_grammar
if
self
.
_grammar
:
return
self
.
_grammar
else
:
raise
AssertionError
(
'Grammar has not yet been set!'
)
@
grammar
.
setter
def
grammar
(
self
,
grammar
:
'Grammar'
):
...
...
@@ -301,8 +304,9 @@ class Parser(ParserBase):
self
.
_grammar
=
grammar
self
.
_grammar_assigned_notifier
()
else
:
assert
self
.
_grammar
==
grammar
,
\
"Parser has already been assigned to a different Grammar object!"
if
self
.
_grammar
!=
grammar
:
raise
AssertionError
(
"Parser has already been assigned"
"to a different Grammar object!"
)
def
_grammar_assigned_notifier
(
self
):
"""A function that notifies the parser object that it has been
...
...
@@ -564,12 +568,6 @@ class Grammar:
def
__init__
(
self
,
root
:
Parser
=
None
)
->
None
:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
# if not hasattr(self.__class__, 'wspL__'):
# self.wspL__ = ''
# if not hasattr(self.__class__, 'wspR__'):
# self.wspR__ = ''
self
.
all_parsers__
=
set
()
# type: Set[ParserBase]
self
.
_dirty_flag__
=
False
# type: bool
self
.
history_tracking__
=
False
# type: bool
...
...
@@ -650,7 +648,7 @@ class Grammar:
parser
.
grammar
=
self
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
,
track_history
=
False
)
->
Node
:
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
,
track_history
=
False
)
->
Root
Node
:
"""
Parses a document with with parser-combinators.
...
...
DHParser/stringview.py
View file @
d702fc24
...
...
@@ -288,12 +288,12 @@ class StringView(collections.abc.Sized):
return
self
.
fullstring
.
split
(
sep
)
else
:
pieces
=
[]
l
=
len
(
sep
)
l
ength
=
len
(
sep
)
k
=
0
i
=
self
.
find
(
sep
,
k
)
while
i
>=
0
:
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
begin
+
i
])
k
=
i
+
l
k
=
i
+
l
ength
i
=
self
.
find
(
sep
,
k
)
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
end
])
return
pieces
...
...
DHParser/syntaxtree.py
View file @
d702fc24
...
...
@@ -28,10 +28,10 @@ import collections.abc
from
collections
import
OrderedDict
import
copy
from
DHParser.error
import
Error
,
linebreaks
,
line_col
from
DHParser.error
import
Error
,
ErrorCode
,
linebreaks
,
line_col
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
re
,
typing
from
typing
import
Callable
,
cast
,
Iterator
,
List
,
AbstractSet
,
Set
,
Union
,
Tuple
,
Optional
from
typing
import
Callable
,
cast
,
Iterator
,
List
,
AbstractSet
,
Set
,
Dict
,
Union
,
Tuple
,
Optional
__all__
=
(
'ParserBase'
,
...
...
@@ -43,6 +43,7 @@ __all__ = ('ParserBase',
'ZOMBIE_NODE'
,
'Node'
,
'RootNode'
,
'ZOMBIE_ROOTNODE'
,
'parse_sxpr'
,
'parse_xml'
,
'flatten_sxpr'
,
...
...
@@ -70,7 +71,7 @@ class ParserBase:
self
.
ptype
=
':'
+
self
.
__class__
.
__name__
# type: str
def
__repr__
(
self
):
return
self
.
name
+
self
.
ptype
return
self
.
name
+
self
.
ptype
def
__str__
(
self
):
return
self
.
name
+
(
' = '
if
self
.
name
else
''
)
+
repr
(
self
)
...
...
@@ -200,7 +201,7 @@ def flatten_xml(xml: str) -> str:
return
re
.
sub
(
r
'\s+(?=<[\w:])'
,
''
,
re
.
sub
(
r
'(?P<closing_tag></:?\w+>)\s+'
,
tag_only
,
xml
))
RX_AMP
=
re
.
compile
(
'&(?!\w+;)'
)
RX_AMP
=
re
.
compile
(
r
'&(?!\w+;)'
)
class
Node
(
collections
.
abc
.
Sized
):
...
...
@@ -271,7 +272,7 @@ class Node(collections.abc.Sized):
# Assignment to self.result initializes the attr _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if
leafhint
:
self
.
_result
=
result
# type: StrictResultType
self
.
_result
=
result
# type: StrictResultType
# cast(StrictResultType, result)
self
.
_content
=
None
# type: Optional[str]
self
.
children
=
NoChildren
# type: ChildrenType
self
.
_len
=
-
1
# type: int # lazy evaluation
...
...
@@ -436,7 +437,7 @@ class Node(collections.abc.Sized):
self
.
_result
=
result
or
''
else
:
self
.
children
=
NoChildren
self
.
_result
=
result
self
.
_result
=
result
# cast(StrictResultType, result)
@
property
...
...
@@ -554,11 +555,11 @@ class Node(collections.abc.Sized):
subtree
=
child
.
_tree_repr
(
tab
,
open_fn
,
close_fn
,
data_fn
,
density
,
inline
,
inline_fn
)
if
subtree
:
s
ubtree
=
[
subtree
]
if
inline
else
subtree
.
split
(
'
\n
'
)
content
.
append
((
sep
+
usetab
).
join
(
s
for
s
in
s
ubtree
))
s
t
=
[
subtree
]
if
inline
else
subtree
.
split
(
'
\n
'
)
content
.
append
((
sep
+
usetab
).
join
(
s
for
s
in
s
t
))
return
head
+
usetab
+
(
sep
+
usetab
).
join
(
content
)
+
tail
res
=
self
.
content
# cast(str, self.result) # safe, because if there are no children, result is a string
res
=
self
.
content
if
not
inline
and
not
head
:
# strip whitespace for omitted non inline node, e.g. CharData in mixed elements
res
=
res
.
strip
()
...
...
@@ -593,7 +594,7 @@ class Node(collections.abc.Sized):
def
opening
(
node
)
->
str
:
"""Returns the opening string for the representation of `node`."""
txt
=
[
left_bracket
,
node
.
tag_name
]
txt
=
[
left_bracket
,
node
.
tag_name
]
# s += " '(pos %i)" % node.add_pos
if
hasattr
(
node
,
'_xml_attr'
):
txt
.
extend
(
' `(%s "%s")'
%
(
k
,
v
)
for
k
,
v
in
node
.
attr
.
items
())
...
...
@@ -620,9 +621,9 @@ class Node(collections.abc.Sized):
def
as_xml
(
self
,
src
:
str
=
None
,
showerrors
:
bool
=
True
,
indentation
:
int
=
2
,
inline_tags
:
Set
[
str
]
=
set
(),
omit_tags
:
Set
[
str
]
=
set
(),
empty_tags
:
Set
[
str
]
=
set
())
->
str
:
inline_tags
:
Set
[
str
]
=
set
(),
omit_tags
:
Set
[
str
]
=
set
(),
empty_tags
:
Set
[
str
]
=
set
())
->
str
:
"""
Returns content as XML-tree.
...
...
@@ -683,15 +684,16 @@ class Node(collections.abc.Sized):
thereby signalling that the children of this node shall not be
printed on several lines to avoid unwanted gaps in the output.
"""
return
node
.
tag_name
in
inline_tags
or
(
hasattr
(
node
,
'_xml_attr'
)
\
and
node
.
attr
.
get
(
'xml:space'
,
'default'
)
==
'preserve'
)
return
node
.
tag_name
in
inline_tags
\
or
(
hasattr
(
node
,
'_xml_attr'
)
and
node
.
attr
.
get
(
'xml:space'
,
'default'
)
==
'preserve'
)
line_breaks
=
linebreaks
(
src
)
if
src
else
[]
return
self
.
_tree_repr
(
' '
*
indentation
,
opening
,
closing
,
sanitizer
,
density
=
1
,
inline_fn
=
inlining
)
def
select
(
self
,
match_function
:
Callable
,
include_root
:
bool
=
False
,
reverse
:
bool
=
False
)
\
def
select
(
self
,
match_function
:
Callable
,
include_root
:
bool
=
False
,
reverse
:
bool
=
False
)
\
->
Iterator
[
'Node'
]:
"""
Finds nodes in the tree that fulfill a given criterion.
...
...
@@ -722,7 +724,7 @@ class Node(collections.abc.Sized):
def
select_by_tag
(
self
,
tag_names
:
Union
[
str
,
AbstractSet
[
str
]],
include_root
:
bool
=
False
)
->
Iterator
[
'Node'
]:
include_root
:
bool
=
False
)
->
Iterator
[
'Node'
]:
"""
Returns an iterator that runs through all descendants that have one
of the given tag names.
...
...
@@ -790,16 +792,16 @@ class RootNode(Node):
that occurred.
"""
def
__init__
(
self
,
node
:
Optional
[
Node
]
=
None
)
->
'RootNode'
:
def
__init__
(
self
,
node
:
Optional
[
Node
]
=
None
):
super
().
__init__
(
ZOMBIE_PARSER
,
''
)
self
.
all_errors
=
[]
self
.
all_errors
=
[]
# type: List[Error]
self
.
error_flag
=
0
if
node
is
not
None
:
self
.
swallow
(
node
)
# customization for XML-Representation
self
.
inline_tags
=
set
()
self
.
omit_tags
=
set
()
self
.
empty_tags
=
set
()
self
.
inline_tags
=
set
()
# type: Set[str]
self
.
omit_tags
=
set
()
# type: Set[str]
self
.
empty_tags
=
set
()
# type: Set[str]
def
__deepcopy__
(
self
,
memodict
=
{}):
duplicate
=
self
.
__class__
(
None
)
...
...
@@ -857,7 +859,7 @@ class RootNode(Node):
def
new_error
(
self
,
node
:
Node
,
message
:
str
,
code
:
int
=
Error
.
ERROR
)
->
'RootNode'
:
code
:
ErrorCode
=
Error
.
ERROR
)
->
'RootNode'
:
"""
Adds an error to this tree, locating it at a specific node.
Parameters:
...
...
@@ -882,11 +884,13 @@ class RootNode(Node):
See the docstring of `Node.as_xml()` for an explanation of the
customizations.
"""
return
self
.
as_xml
(
inline_tags
=
self
.
inline_tags
,
return
self
.
as_xml
(
inline_tags
=
self
.
inline_tags
,
omit_tags
=
self
.
omit_tags
,
empty_tags
=
self
.
empty_tags
)
ZOMBIE_ROOTNODE
=
RootNode
()
#######################################################################
#
# S-expression- and XML-parsers
...
...
@@ -908,7 +912,7 @@ def parse_sxpr(sxpr: str) -> Node:
"""
sxpr
=
StringView
(
sxpr
).
strip
()
mock_parsers
=
dict
()
mock_parsers
=
dict
()
# type: Dict[str, MockParser]
def
next_block
(
s
:
StringView
):
"""Generator that yields all characters until the next closing bracket
...
...
@@ -949,9 +953,9 @@ def parse_sxpr(sxpr: str) -> Node:
tagname
=
sxpr
[:
end
]
name
,
class_name
=
(
tagname
.
split
(
':'
)
+
[
''
])[:
2
]
sxpr
=
sxpr
[
end
:].
strip
()
attributes
=
OrderedDict
()
attributes
=
OrderedDict
()
# type: OrderedDict[str, str]
if
sxpr
[
0
]
==
'('
:
result
=
tuple
(
inner_parser
(
block
)
for
block
in
next_block
(
sxpr
))
result
=
tuple
(
inner_parser
(
block
)
for
block
in
next_block
(
sxpr
))
# type: ResultType
else
:
lines
=
[]
while
sxpr
and
sxpr
[
0
:
1
]
!=
')'
:
...
...
@@ -961,11 +965,12 @@ def parse_sxpr(sxpr: str) -> Node:
k
=
sxpr
.
find
(
')'
)
# read very special attribute pos
if
sxpr
[
2
:
5
]
==
"pos"
and
0
<
i
<
k
:
pos
=
int
(
sxpr
[
5
:
k
].
strip
().
split
(
' '
)[
0
])
# pos = int(sxpr[5:k].strip().split(' ')[0])
pass
# ignore very special attribute err
elif
sxpr
[
2
:
5
]
==
"err"
and
0
<=
sxpr
.
find
(
'`'
,
5
)
<
k
:
m
=
sxpr
.
find
(
'('
,
5
)
while
m
>=
0
and
m
<
k
:
while
0
<=
m
<
k
:
m
=
sxpr
.
find
(
'('
,
k
)
k
=
max
(
k
,
sxpr
.
find
(
')'
,
max
(
m
,
0
)))
# read attr
...
...
@@ -973,7 +978,7 @@ def parse_sxpr(sxpr: str) -> Node:
attr
=
sxpr
[
2
:
i
].
strip
()
value
=
sxpr
[
i
:
k
].
strip
()[
1
:
-
1
]
attributes
[
attr
]
=
value
sxpr
=
sxpr
[
k
+
1
:].
strip
()
sxpr
=
sxpr
[
k
+
1
:].
strip
()
# parse content
for
qtmark
in
[
'"""'
,
"'''"
,
'"'
,
"'"
]:
match
=
sxpr
.
match
(
re
.
compile
(
qtmark
+
r
'.*?'
+
qtmark
,
re
.
DOTALL
))
...
...
@@ -1000,12 +1005,12 @@ def parse_sxpr(sxpr: str) -> Node:
RX_WHITESPACE_TAIL
=
re
.
compile
(
r
'\s*$'
)
def
parse_xml
(
xml
:
str
)
->
Node
:
def
parse_xml
(
xml
:
Union
[
str
,
StringView
]
)
->
Node
:
"""
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml
=
StringView
(
xml
)
xml
=
StringView
(
str
(
xml
)
)
PlainText
=
MockParser
(
''
,
TOKEN_PTYPE
)
mock_parsers
=
{
TOKEN_PTYPE
:
PlainText
}
...
...
@@ -1013,7 +1018,7 @@ def parse_xml(xml: str) -> Node:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
beginning after the end of the attr.
"""
attributes
=
OrderedDict
()
attributes
=
OrderedDict
()
# type: OrderedDict[str, str]
restart
=
0
for
match
in
s
.
finditer
(
re
.
compile
(
r
'\s*(?P<attr>\w+)\s*=\s*"(?P<value>.*)"\s*'
)):
d
=
match
.
groupdict
()
...
...
@@ -1034,7 +1039,7 @@ def parse_xml(xml: str) -> Node:
s
,
attributes
=
parse_attributes
(
section
)
i
=
s
.
find
(
'>'
)
assert
i
>=
0
return
s
[
i
+
1
:],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
return
s
[
i
+
1
:],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
def
parse_closing_tag
(
s
:
StringView
)
->
Tuple
[
StringView
,
str
]:
"""Parses a closing tag and returns the string segment, just after
...
...
@@ -1045,12 +1050,12 @@ def parse_xml(xml: str) -> Node:
tagname
=
match
.
groupdict
()[
'tagname'
]
return
s
[
match
.
end
()
-
s
.
begin
:],
tagname
def
parse_leaf_content
(
s
:
StringView
)
->
Tuple
[
StringView
,
s
tr
]:
def
parse_leaf_content
(
s
:
StringView
)
->
Tuple
[
StringView
,
S
tr
ingView
]:
"""Parses a piece of the content of a tag, just until the next opening,
closing or solitary tag is reached.
"""
i
=
0
while
s
[
i
]
!=
"<"
or
s
[
max
(
0
,
i
-
1
)]
==
"
\\
"
:
while
s
[
i
]
!=
"<"
or
s
[
max
(
0
,
i
-
1
)]
==
"
\\
"
:
i
=
s
.
find
(
"<"
,
i
)
return
s
[
i
:],
s
[:
i
]
...
...
@@ -1058,23 +1063,23 @@ def parse_xml(xml: str) -> Node:
"""Parses the full content of a tag, starting right at the beginning
of the opening tag and ending right after the closing tag.
"""
res
ult
=
[]
s
,
tagname
,
attributes
,
solitary
=
parse_opening_tag
(
s
)
res
=
[]
# type: List[Node]
s
,
tagname
,
_
,
solitary
=
parse_opening_tag
(
s
)
name
,
class_name
=
(
tagname
.
split
(
":"
)
+
[
''
])[:
2
]
if
not
solitary
:
while
s
and
not
s
[:
2
]
==
"</"
:
s
,
leaf
=
parse_leaf_content
(
s
)
if
leaf
and
(
leaf
.
find
(
'
\n
'
)
<
0
or
not
leaf
.
match
(
RX_WHITESPACE_TAIL
)):
res
ult
.
append
(
Node
(
PlainText
,
leaf
))
res
.
append
(
Node
(
PlainText
,
leaf
))
if
s
[:
1
]
==
"<"
and
s
[:
2
]
!=
"</"
:
s
,
child
=
parse_full_content
(
s
)
res
ult
.
append
(
child
)
res
.
append
(
child
)
s
,
closing_tagname
=
parse_closing_tag
(
s
)
assert
tagname
==
closing_tagname
if
len
(
res
ult
)
==
1
and
res
ult
[
0
].
parser
.
ptype
==
TOKEN_PTYPE
:
result
=
res
ult
[
0
].
result
if
len
(
res
)
==
1
and
res
[
0
].
parser
.
ptype
==
TOKEN_PTYPE
:
result
=
res
[
0
].
result
else
:
result
=
tuple
(
res
ult
)
result
=
tuple
(
res
)
return
s
,
Node
(
mock_parsers
.
setdefault
(
tagname
,
MockParser
(
name
,
":"
+
class_name
)),
result
)
match_header
=
xml
.
search
(
re
.
compile
(
r
'<(?!\?)'
))
...
...
dhparser.py
View file @
d702fc24
...
...
@@ -166,7 +166,7 @@ if __name__ == '__main__':
# if called with a single filename that is either an EBNF file or a known
# test file type then use the given argument
arg = argv[1]
else:
else:
# otherwise run all tests in the test directory
arg = '*_test_*.ini'
if arg.endswith('.ebnf'):
...
...
@@ -328,8 +328,8 @@ def main():
file_path
=
input
(
'Please enter a file path for compilation > '
)
if
os
.
path
.
exists
(
file_path
)
and
os
.
path
.
isfile
(
file_path
):
compiler_suite
=
input
(
'Compiler suite or ENTER (for ebnf) > '
)
if
(
not
compiler_suite
or
(
os
.
path
.
exists
(
compiler_suite
)
and
os
.
path
.
isfile
(
compiler_suite
))
)
:
if
not
compiler_suite
or
(
os
.
path
.
exists
(
compiler_suite
)
and
os
.
path
.
isfile
(
compiler_suite
)):
_errors
=
compile_on_disk
(
file_path
,
compiler_suite
)
if
_errors
:
print
(
'
\n\n
'
.
join
(
str
(
err
)
for
err
in
_errors
))
...
...
test/run.py
View file @
d702fc24
...
...
@@ -11,7 +11,7 @@ import time
def
run_tests
(
command
):
testtype
=
'DOCTEST'
if
command
.
find
(
'doctest'
)
>=
0
else
'UNITTEST'
filename
=
command
[
command
.
rfind
(
' '
)
+
1
:]
filename
=
command
[
command
.
rfind
(
' '
)
+
1
:]
print
(
'
\n
'
+
testtype
+
' '
+
filename
)
os
.
system
(
command
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment