Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
09196c53
Commit
09196c53
authored
Apr 17, 2018
by
di68kap
Browse files
- error handling completely refactored
parent
ce68124b
Changes
13
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
09196c53
...
...
@@ -38,7 +38,7 @@ import os
import
re
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
from
DHParser.syntaxtree
import
Node
,
RootNode
from
DHParser.transform
import
TransformationFunc
from
DHParser.parse
import
Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
...
...
@@ -94,10 +94,11 @@ class Compiler:
self
.
set_grammar_name
(
grammar_name
,
grammar_source
)
def
_reset
(
self
):
self
.
tree
=
None
# type: Optional[RootNode]
self
.
context
=
[]
# type: List[Node]
self
.
_dirty_flag
=
False
def
__call__
(
self
,
node
:
Node
)
->
Any
:
def
__call__
(
self
,
root
:
Root
Node
)
->
Any
:
"""
Compiles the abstract syntax tree with the root node `node` and
returns the compiled code. It is up to subclasses implementing
...
...
@@ -108,8 +109,8 @@ class Compiler:
if
self
.
_dirty_flag
:
self
.
_reset
()
self
.
_dirty_flag
=
True
result
=
self
.
compile
(
node
)
self
.
propagate_error_flags
(
node
,
lazy
=
True
)
self
.
tree
=
root
result
=
self
.
compile
(
root
)
return
result
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
...
...
@@ -128,18 +129,18 @@ class Compiler:
self
.
grammar_source
=
load_if_file
(
grammar_source
)
return
self
@
staticmethod
def
propagate_error_flags
(
node
:
Node
,
lazy
:
bool
=
True
)
->
None
:
# See test_parser.TestCompilerClass.test_propagate_error()..
"""Propagates error flags from children to parent nodes to make sure
that the parent's error flag is always greater or equal the maximum
of the children's error flags."""
if
not
lazy
or
node
.
error_flag
<
Error
.
HIGHEST
:
for
child
in
node
.
children
:
Compiler
.
propagate_error_flags
(
child
)
node
.
error_flag
=
max
(
node
.
error_flag
,
child
.
error_flag
)
if
lazy
and
node
.
error_flag
>=
Error
.
HIGHEST
:
return
#
@staticmethod
#
def propagate_error_flags(node: Node, lazy: bool = True) -> None:
#
# See test_parser.TestCompilerClass.test_propagate_error()..
#
"""Propagates error flags from children to parent nodes to make sure
#
that the parent's error flag is always greater or equal the maximum
#
of the children's error flags."""
#
if not lazy or node.error_flag < Error.HIGHEST:
#
for child in node.children:
#
Compiler.propagate_error_flags(child)
#
node.error_flag = max(node.error_flag, child.error_flag)
#
if lazy and node.error_flag >= Error.HIGHEST:
#
return
@
staticmethod
def
method_name
(
node_name
:
str
)
->
str
:
...
...
DHParser/ebnf.py
View file @
09196c53
...
...
@@ -581,7 +581,8 @@ class EBNFCompiler(Compiler):
defined_symbols
=
set
(
self
.
rules
.
keys
())
|
self
.
RESERVED_SYMBOLS
for
symbol
in
self
.
symbols
:
if
symbol
not
in
defined_symbols
:
self
.
symbols
[
symbol
].
add_error
(
"Missing definition for symbol '%s'"
%
symbol
)
self
.
tree
.
add_error
(
self
.
symbols
[
symbol
],
"Missing definition for symbol '%s'"
%
symbol
)
# root_node.error_flag = True
# check for unconnected rules
...
...
@@ -598,7 +599,7 @@ class EBNFCompiler(Compiler):
remove_connections
(
self
.
root_symbol
)
for
leftover
in
defined_symbols
:
self
.
rules
[
leftover
][
0
]
.
add_error
(
self
.
tree
.
add_error
(
self
.
rules
[
leftover
][
0
]
,
(
'Rule "%s" is not connected to parser root "%s" !'
)
%
(
leftover
,
self
.
root_symbol
),
Error
.
WARNING
)
...
...
@@ -628,7 +629,7 @@ class EBNFCompiler(Compiler):
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
self
.
compile
(
nd
)
node
.
error_flag
=
max
(
node
.
error_flag
,
nd
.
error_flag
)
#
node.error_flag = max(node.error_flag, nd.error_flag)
self
.
definitions
.
update
(
definitions
)
return
self
.
assemble_parser
(
definitions
,
node
)
...
...
@@ -639,19 +640,19 @@ class EBNFCompiler(Compiler):
if
rule
in
self
.
rules
:
first
=
self
.
rules
[
rule
][
0
]
if
not
first
.
_errors
:
first
.
add_error
(
'First definition of rule "%s" '
'followed by illegal redefinitions.'
%
rule
)
nod
e
.
add_error
(
'A rul
e with nam
e "%s" has already been defined earlier.'
%
rule
)
self
.
tree
.
add_error
(
first
,
'First definition of rule "%s" '
'followed by illegal redefinitions.'
%
rule
)
self
.
tre
e
.
add_error
(
node
,
'A rule "%s" has already been defined earlier.'
%
rule
)
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
nod
e
.
add_error
(
'Symbol "%s" is a reserved symbol.'
%
rule
)
self
.
tre
e
.
add_error
(
node
,
'Symbol "%s" is a reserved symbol.'
%
rule
)
elif
not
sane_parser_name
(
rule
):
nod
e
.
add_error
(
'Illegal symbol "%s". Symbols must not start or '
self
.
tre
e
.
add_error
(
node
,
'Illegal symbol "%s". Symbols must not start or '
' end with a doube underscore "__".'
%
rule
)
elif
rule
in
self
.
directives
[
'tokens'
]:
node
.
add_error
(
'Symbol "%s" has already been defined as '
self
.
add_error
(
node
,
'Symbol "%s" has already been defined as '
'a preprocessor token.'
%
rule
)
elif
keyword
.
iskeyword
(
rule
):
node
.
add_error
(
'Python keyword "%s" may not be used as a symbol. '
self
.
add_error
(
node
,
'Python keyword "%s" may not be used as a symbol. '
%
rule
+
'(This may change in the future.)'
)
try
:
self
.
current_symbols
=
[
node
]
...
...
@@ -668,7 +669,7 @@ class EBNFCompiler(Compiler):
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
errmsg
=
"%s (TypeError: %s; %s)
\n
%s"
\
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
node
.
add_error
(
errmsg
)
self
.
add_error
(
node
,
errmsg
)
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
return
rule
,
defn
...
...
@@ -684,7 +685,7 @@ class EBNFCompiler(Compiler):
try
:
re
.
compile
(
rx
)
except
Exception
as
re_error
:
node
.
add_error
(
"malformed regular expression %s: %s"
%
self
.
add_error
(
node
,
"malformed regular expression %s: %s"
%
(
repr
(
rx
),
str
(
re_error
)))
return
rx
...
...
@@ -695,8 +696,8 @@ class EBNFCompiler(Compiler):
if
key
not
in
self
.
REPEATABLE_DIRECTIVES
:
if
key
in
self
.
defined_directives
:
node
.
add_error
(
'Directive "%s" has already been defined earlier. '
%
key
+
\
'Later definition will be ignored!'
,
self
.
add_error
(
node
,
'Directive "%s" has already been defined earlier. '
%
key
+
'Later definition will be ignored!'
,
code
=
Error
.
REDEFINED_DIRECTIVE_WARNING
)
return
""
self
.
defined_directives
.
add
(
key
)
...
...
@@ -704,26 +705,27 @@ class EBNFCompiler(Compiler):
if
key
in
{
'comment'
,
'whitespace'
}:
if
node
.
children
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
children
[
1
].
result
)
!=
1
:
node
.
add_error
(
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
[
1
].
result
)))
self
.
add_error
(
node
,
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
[
1
].
result
)))
value
=
self
.
compile
(
node
.
children
[
1
]).
pop
()
if
key
==
'whitespace'
and
value
in
EBNFCompiler
.
WHITESPACE
:
value
=
EBNFCompiler
.
WHITESPACE
[
value
]
# replace whitespace-name by regex
else
:
node
.
add_error
(
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
self
.
add_error
(
node
,
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[
# 1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result):
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
self
.
add_error
(
node
,
"Whitespace marker '~' not allowed in definition of "
"%s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
if
key
==
'whitespace'
and
not
re
.
match
(
value
,
''
):
node
.
add_error
(
"Implicit whitespace should always
match the empty string,
"
"/%s/ does not."
%
value
)
self
.
add_error
(
node
,
"Implicit whitespace should always "
"
match the empty string,
/%s/ does not."
%
value
)
self
.
directives
[
key
]
=
value
elif
key
==
'ignorecase'
:
...
...
@@ -738,9 +740,8 @@ class EBNFCompiler(Compiler):
value
=
{
item
.
lower
()
for
item
in
self
.
compile
(
node
.
children
[
1
])}
if
((
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
node
.
add_error
(
'Directive "literalws" allows the values '
'`left`, `right`, `both` or `none`, '
'but not `%s`'
%
", "
.
join
(
value
))
self
.
add_error
(
node
,
'Directive "literalws" allows only '
'`left`, `right`, `both` or `none`, not `%s`'
%
", "
.
join
(
value
))
wsp
=
{
'left'
,
'right'
}
if
'both'
in
value
\
else
{}
if
'none'
in
value
else
value
self
.
directives
[
key
]
=
list
(
wsp
)
...
...
@@ -749,7 +750,7 @@ class EBNFCompiler(Compiler):
tokens
=
self
.
compile
(
node
.
children
[
1
])
redeclared
=
self
.
directives
[
'tokens'
]
&
tokens
if
redeclared
:
node
.
add_error
(
'Tokens %s have already been declared earlier. '
self
.
add_error
(
node
,
'Tokens %s have already been declared earlier. '
%
str
(
redeclared
)
+
'Later declaration will be ignored'
,
code
=
Error
.
REDECLARED_TOKEN_WARNING
)
self
.
directives
[
'tokens'
]
|=
tokens
-
redeclared
...
...
@@ -757,12 +758,12 @@ class EBNFCompiler(Compiler):
elif
key
.
endswith
(
'_filter'
):
filter_set
=
self
.
compile
(
node
.
children
[
1
])
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
node
.
add_error
(
'Directive "%s" accepts exactly on symbol, not %s'
self
.
add_error
(
node
.
pos
,
'Directive "%s" accepts exactly on symbol, not %s'
%
(
key
,
str
(
filter_set
)))
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
filter_set
.
pop
()
else
:
node
.
add_error
(
'Unknown directive %s ! (Known ones are %s .)'
%
self
.
add_error
(
node
,
'Unknown directive %s ! (Known ones are %s .)'
%
(
key
,
', '
.
join
(
list
(
self
.
directives
.
keys
()))))
return
""
...
...
@@ -773,7 +774,7 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments
=
[
self
.
compile
(
r
)
for
r
in
node
.
children
]
+
custom_args
node
.
error_flag
=
max
(
node
.
error_flag
,
max
(
t
.
error_flag
for
t
in
node
.
children
))
#
node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
...
...
@@ -793,11 +794,11 @@ class EBNFCompiler(Compiler):
if
nd
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
mandatory_marker
.
append
(
len
(
filtered_children
))
# if len(filtered_children) == 0:
#
nd
.add_error('First item of a series should not be mandatory.',
# Error.WARNING)
#
self
.add_error(
nd.pos,
'First item of a series should not be mandatory.',
#
Error.WARNING)
if
len
(
mandatory_marker
)
>
1
:
nd
.
add_error
(
'One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.'
,
Error
.
WARNING
)
self
.
add_error
(
nd
,
'One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.'
,
Error
.
WARNING
)
else
:
filtered_children
.
append
(
nd
)
saved_result
=
node
.
result
...
...
@@ -821,8 +822,8 @@ class EBNFCompiler(Compiler):
assert
len
(
node
.
children
)
==
2
arg
=
node
.
children
[
-
1
]
if
arg
.
parser
.
name
!=
'symbol'
:
node
.
add_error
((
'Retrieve Operator "%s" requires a symbol, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
self
.
add_error
(
node
,
(
'Retrieve Operator "%s" requires a symbol, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'filter'
]:
custom_args
=
[
'rfilter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
...
...
@@ -855,14 +856,14 @@ class EBNFCompiler(Compiler):
break
if
(
nd
.
parser
.
name
!=
"regexp"
or
nd
.
content
[:
1
]
!=
'/'
or
nd
.
content
[
-
1
:]
!=
'/'
):
node
.
add_error
(
"Lookbehind-parser can only be used with
plain
RegExp-"
self
.
add_error
(
node
,
"Lookbehind-parser can only be used with RegExp-"
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
if
not
result
.
startswith
(
'RegExp('
):
self
.
deferred_tasks
.
append
(
lambda
:
check
(
node
))
return
result
except
KeyError
:
node
.
add_error
(
'Unknown prefix "%s".'
%
prefix
)
self
.
add_error
(
node
,
'Unknown prefix "%s".'
%
prefix
)
return
""
...
...
@@ -888,14 +889,15 @@ class EBNFCompiler(Compiler):
nd
=
node
.
children
[
0
]
for
child
in
nd
.
children
:
if
child
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
node
.
add_error
(
"Unordered
parser list
s can
no
t contain mandatory (§) items."
)
self
.
add_error
(
node
,
"Unordered
sequence
s can
'
t contain mandatory (§) items."
)
args
=
', '
.
join
(
self
.
compile
(
child
)
for
child
in
nd
.
children
)
if
nd
.
parser
.
name
==
"term"
:
return
"AllOf("
+
args
+
")"
elif
nd
.
parser
.
name
==
"expression"
:
return
"SomeOf("
+
args
+
")"
else
:
node
.
add_error
(
"Unordered sequence or alternative requires at least two elements."
)
self
.
add_error
(
node
,
"Unordered sequence or alternative requires at least two elements."
)
return
""
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
...
...
@@ -949,7 +951,7 @@ class EBNFCompiler(Compiler):
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
errmsg
=
"%s (AttributeError: %s; %s)
\n
%s"
\
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
node
.
add_error
(
errmsg
)
self
.
add_error
(
node
,
errmsg
)
return
'"'
+
errmsg
+
'"'
return
parser
+
', '
.
join
([
arg
]
+
name
)
+
')'
...
...
DHParser/error.py
View file @
09196c53
...
...
@@ -174,10 +174,10 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
# """
# Returns the position within a text as (line, column)-tuple.
# """
# if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
# if pos < 0 or
add_
pos > len(text): # one character behind EOF is still an allowed position!
# raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# line = text.count("\n", 0, pos) + 1
# column = pos - text.rfind("\n", 0, pos)
# column = pos - text.rfind("\n", 0,
add_
pos)
# return line, column
...
...
DHParser/log.py
View file @
09196c53
...
...
@@ -283,9 +283,7 @@ class HistoryRecord:
for
cls
,
item
in
zip
(
tpl
.
_fields
,
tpl
)]
+
[
'</tr>'
])
def
err_msg
(
self
)
->
str
:
return
self
.
ERROR
+
": "
+
"; "
.
join
(
str
(
e
)
for
e
in
(
self
.
node
.
_errors
if
self
.
node
.
_errors
else
self
.
node
.
collect_errors
()[:
2
]))
return
self
.
ERROR
+
": "
+
"; "
.
join
(
str
(
e
)
for
e
in
(
self
.
node
.
errors
))
@
property
def
stack
(
self
)
->
str
:
...
...
@@ -295,7 +293,7 @@ class HistoryRecord:
@
property
def
status
(
self
)
->
str
:
return
self
.
FAIL
if
self
.
node
is
None
else
\
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
error
_flag
else
self
.
MATCH
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
error
s
else
self
.
MATCH
# has_errors(self.node._errors)
@
property
...
...
@@ -448,7 +446,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
append_line
(
full_history
,
line
)
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
append_line
(
match_history
,
line
)
if
record
.
node
.
error
_flag
:
if
record
.
node
.
error
s
:
append_line
(
errors_only
,
line
)
write_log
(
full_history
,
log_file_name
+
'_full'
)
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
...
...
DHParser/parse.py
View file @
09196c53
...
...
@@ -156,7 +156,7 @@ def add_parser_guard(parser_func):
if
grammar
.
history_tracking__
:
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
if
grammar
.
moving_forward__
or
(
node
and
node
.
error
_flag
):
# node._errors
if
grammar
.
moving_forward__
or
(
node
and
node
.
_
error
s
):
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
)
grammar
.
history__
.
append
(
record
)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
...
...
@@ -165,7 +165,8 @@ def add_parser_guard(parser_func):
except
RecursionError
:
node
=
Node
(
None
,
str
(
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))])
+
" ..."
)
grammar
.
tree__
.
add_error
(
location
,
"maximum recursion depth of parser reached; "
node
.
_pos
=
location
grammar
.
tree__
.
add_error
(
node
,
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
rest
=
EMPTY_STRING_VIEW
...
...
@@ -727,7 +728,8 @@ class Grammar:
result
,
_
=
parser
(
rest
)
if
result
is
None
:
result
=
Node
(
None
,
''
).
init_pos
(
0
)
result
.
add_error
(
0
,
'Parser "%s" did not match empty document.'
%
str
(
parser
))
self
.
tree__
.
add_error
(
result
,
'Parser "%s" did not match empty document.'
%
str
(
parser
))
while
rest
and
len
(
stitches
)
<
MAX_DROPOUTS
:
result
,
rest
=
parser
(
rest
)
if
rest
:
...
...
@@ -747,7 +749,7 @@ class Grammar:
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
stitches
.
append
(
Node
(
None
,
skip
).
init_pos
(
tail_pos
(
stitches
)))
s
titches
[
-
1
]
.
add_error
(
s
elf
.
document_length__
-
1
,
error_msg
)
s
elf
.
tree__
.
add_error
(
s
titches
[
-
1
]
,
error_msg
)
if
self
.
history_tracking__
:
# # some parsers may have matched and left history records with nodes != None.
# # Because these are not connected to the stitched root node, their pos-
...
...
@@ -773,25 +775,15 @@ class Grammar:
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node
=
Node
(
ZOMBIE_PARSER
,
''
).
init_pos
(
tail_pos
(
result
.
children
))
error_node
.
add_error
(
self
.
document_length__
-
1
,
error_str
)
self
.
tree__
.
add_error
(
error_node
,
error_str
)
result
.
result
=
result
.
children
+
(
error_node
,)
else
:
result
.
add_error
(
self
.
document_length__
-
1
,
error_str
)
self
.
tree__
.
add_error
(
result
,
error_str
)
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
self
.
tree__
.
swallow
(
result
)
return
self
.
tree__
def
location
(
self
,
remaining
:
str
)
->
int
:
"""Returns the location of the `remaining` text within the currently
parsed document.
"""
self
.
document_length__
-
len
(
remaining
)
def
add_error
(
self
,
location
,
error_msg
,
code
=
Error
.
ERROR
):
"""Adds an error at the location of `text` within the whole document that is
currently being parsed."""
self
.
tree__
.
add_error
(
location
,
error_msg
,
code
)
def
push_rollback__
(
self
,
location
,
func
):
"""
...
...
@@ -869,18 +861,20 @@ class PreprocessorToken(Parser):
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
add_error
(
node
,
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)'
)
# type: Node
return
N
ode
(
self
,
''
)
,
text
[
1
:]
return
n
ode
,
text
[
1
:]
elif
end
==
0
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
add_error
(
node
,
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)'
)
return
N
ode
(
self
,
''
)
,
text
[
2
:]
return
n
ode
,
text
[
2
:]
elif
text
.
find
(
BEGIN_TOKEN
,
1
,
end
)
>=
0
:
node
=
Node
(
self
,
text
[
len
(
self
.
name
)
+
1
:
end
])
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
)
,
self
.
grammar
.
tree__
.
add_error
(
node
,
'Preprocessor-tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a preprocessor bug!)'
)
...
...
@@ -1262,8 +1256,8 @@ class ZeroOrMore(Option):
if
not
node
:
break
if
len
(
text
)
==
n
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
)
,
dsl_error_msg
(
self
,
'Infinite Loop
detect
ed.'
))
self
.
grammar
.
tree__
.
add_error
(
node
,
dsl_error_msg
(
self
,
'Infinite Loop
encounter
ed.'
))
results
+=
(
node
,)
return
Node
(
self
,
results
),
text
...
...
@@ -1307,8 +1301,8 @@ class OneOrMore(UnaryOperator):
if
not
node
:
break
if
len
(
text_
)
==
n
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
)
,
dsl_error_msg
(
self
,
'Infinite Loop
detect
ed.'
))
self
.
grammar
.
tree__
.
add_error
(
node
,
dsl_error_msg
(
self
,
'Infinite Loop
encounter
ed.'
))
results
+=
(
node
,)
if
results
==
():
return
None
,
text
...
...
@@ -1368,9 +1362,10 @@ class Series(NaryOperator):
i
=
max
(
1
,
text
.
index
(
match
.
regs
[
1
][
0
]))
if
match
else
1
node
=
Node
(
self
,
text_
[:
i
]).
init_pos
(
self
.
grammar
.
document_length__
-
len
(
text_
))
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
'%s expected; "%s" found!'
%
(
parser
.
repr
,
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)),
code
=
Error
.
MANDATORY_CONTINUATION
)
self
.
grammar
.
tree__
.
add_error
(
node
,
'%s expected; "%s" found!'
%
(
parser
.
repr
,
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)),
code
=
Error
.
MANDATORY_CONTINUATION
)
text_
=
text_
[
i
:]
results
+=
(
node
,)
# if node.error_flag: # break on first error
...
...
@@ -1637,9 +1632,9 @@ def Required(parser: Parser) -> Parser:
# i = max(1, text.index(m.regs[1][0])) if m else 1
# node = Node(self, text[:i])
# text_ = text[i:]
# self.grammar.add_error(
self.grammar.location(text)
,
# '%s expected; "%s" found!' % (str(self.parser),
# text[:10]), code=Error.MANDATORY_CONTINUATION)
# self.grammar.
tree__.
add_error(
node
,
#
'%s expected; "%s" found!' % (str(self.parser),
#
text[:10]), code=Error.MANDATORY_CONTINUATION)
# return node, text_
#
# def __repr__(self):
...
...
@@ -1812,9 +1807,10 @@ class Retrieve(Parser):
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
except
(
KeyError
,
IndexError
):
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
dsl_error_msg
(
self
,
"'%s' undefined or exhausted."
%
self
.
symbol
.
name
))
return
Node
(
self
,
''
),
text
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
add_error
(
node
,
dsl_error_msg
(
self
,
"'%s' undefined or exhausted."
%
self
.
symbol
.
name
))
return
node
,
text
if
text
.
startswith
(
value
):
return
Node
(
self
,
value
),
text
[
len
(
value
):]
else
:
...
...
@@ -1835,7 +1831,7 @@ class Pop(Retrieve):
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
node
,
txt
=
super
().
retrieve_and_match
(
text
)
if
node
and
not
node
.
error
_flag
:
if
node
and
not
node
.
error
s
:
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
stack
.
pop
()
location
=
self
.
grammar
.
document_length__
-
len
(
text
)
...
...
DHParser/syntaxtree.py
View file @
09196c53
...
...
@@ -236,14 +236,14 @@ class Node(collections.abc.Sized):
S-Expression-output.
"""
__slots__
=
[
'_result'
,
'children'
,
'_len'
,
'_pos'
,
'parser'
,
'_errors'
'_xml_attr'
,
'_content'
]
__slots__
=
[
'_result'
,
'children'
,
'_len'
,
'_pos'
,
'parser'
,
'_errors'
,
'_xml_attr'
,
'_content'
]
def
__init__
(
self
,
parser
,
result
:
ResultType
,
leafhint
:
bool
=
False
)
->
None
:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self
.
_errors
=
[]
# type: List[Error]
self
.
_errors
=
[]
# type: List[Error]
self
.
_pos
=
-
1
# type: int
# Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
...
...
@@ -432,7 +432,7 @@ class Node(collections.abc.Sized):
return
self
.
_pos
def
init_pos
(
self
,
pos
:
int
,
overwrite
:
bool
=
Fals
e
)
->
'Node'
:
def
init_pos
(
self
,
pos
:
int
,
overwrite
:
bool
=
Tru
e
)
->
'Node'
:
"""
(Re-)initialize position value. Usually, the parser guard
(`parsers.add_parser_guard()`) takes care of assigning the
...
...
@@ -460,14 +460,15 @@ class Node(collections.abc.Sized):
def
errors
(
self
)
->
List
[
Error
]:
"""
Returns the errors that occurred at this Node, not including any
errors from child nodes. Works only, if error propagation has been
enabled when calling `swallow` from the root node.
errors from child nodes.
"""
return
self
.
_errors
@
property
def
attributes
(
self
):
"""Returns a dictionary of XML-Attributes attached to the Node."""
"""
Returns a dictionary of XML-Attributes attached to the Node.
"""
if
not
hasattr
(
self
,
'_xml_attr'
):
self
.
_xml_attr
=
OrderedDict
()
return
self
.
_xml_attr
...
...
@@ -537,11 +538,11 @@ class Node(collections.abc.Sized):
def
opening
(
node
)
->
str
:
"""Returns the opening string for the representation of `node`."""
txt
=
[
left_bracket
,
node
.
tag_name
]
# s += " '(pos %i)" % node.pos
# s += " '(pos %i)" % node.
add_
pos
if
hasattr
(
node
,
'_xml_attr'
):
txt
.
extend
(
' `(%s "%s")'
%
(
k
,
v
)
for
k
,
v
in
node
.
attributes
.
items
())
if
src
:
txt
.
append
(
" `(pos %i %i %i)"
%
(
node
.
pos
,
*
line_col
(
src
,
node
.
pos
)))
txt
.
append
(
" `(pos %i %i %i)"
%
(
node
.
pos
,
*
line_col
(
src
,
node
.
add_
pos
)))
# if node.error_flag: # just for debugging error collecting
# txt += " HAS ERRORS"
if
showerrors
and
node
.
errors
:
...
...
@@ -688,35 +689,34 @@ class RootNode(Node):
def
__init__
(
self
):
super
().
__init__
(
ZOMBIE_PARSER
,
''
)
self
.
all_errors
=
[]
self
.
err_nodes
=
[]
self
.
error_flag
=
0
self
.
error_propagation
=
False
def
_propagate_errors
(
self
):
assert
self
.
children
if
not
self
.
all_errors
or
not
self
.
error_propagation
:
return
self
.
all_errors
.
sort
(
key
=
lambda
e
:
e
.
pos
)
i
=
0
for
leaf
in
self
.
select
(
lambda
nd
:
not
nd
.
children
,
False
):
leaf
.
errors
=
[]
while
i
<
len
(
self
.
all_errors
)
\
and
leaf
.
pos
<=
self
.
all_errors
[
i
].
pos
<
leaf
.
pos
+
leaf
.
len
:
leaf
.
_errors
.
append
(
self
.
all_errors
[
i
])
i
+=
1
if
i
>=
len
(
self
.
all_errors
):
break
def
_propagate_new_error
(
self
,
error
):
if
self
.
error_propagation
:
for
leaf
in
self
.
select
(
lambda
nd
:
not
nd
.
children
,
False
):
if
leaf
.
pos
<=
error
.
pos
<
leaf
.
pos
+
leaf
.
len
:
leaf
.
_errors
.
append
(
error
)
break
else
:
assert
False
,
"Error %s at pos %i out of bounds"
%
(
str
(
error
),
error
.
pos
)
def
swallow
(
self
,
node
,
error_propagation
=
False
):
assert
not
node
.
errors
,
"Node has already been swallowed!?"
# def _propagate_errors(self):
# if not self.all_errors or not self.error_propagation:
# return
# self.all_errors.sort(key=lambda e: e.pos)
# i = 0
# for leaf in self.select(lambda nd: not nd.children, False):
# leaf.errors = []
# while i < len(self.all_errors) \
# and leaf.pos <= self.all_errors[i].add_pos < leaf.add_pos + leaf.len:
# leaf._errors.append(self.all_errors[i])
# i += 1
# if i >= len(self.all_errors):
# break
#
# def _propagate_new_error(self, error):
# if self.error_propagation: