Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
09196c53
Commit
09196c53
authored
Apr 17, 2018
by
di68kap
Browse files
- error handling completely refactored
parent
ce68124b
Changes
13
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
09196c53
...
@@ -38,7 +38,7 @@ import os
...
@@ -38,7 +38,7 @@ import os
import
re
import
re
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
from
DHParser.syntaxtree
import
Node
,
RootNode
from
DHParser.transform
import
TransformationFunc
from
DHParser.transform
import
TransformationFunc
from
DHParser.parse
import
Grammar
from
DHParser.parse
import
Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
...
@@ -94,10 +94,11 @@ class Compiler:
...
@@ -94,10 +94,11 @@ class Compiler:
self
.
set_grammar_name
(
grammar_name
,
grammar_source
)
self
.
set_grammar_name
(
grammar_name
,
grammar_source
)
def
_reset
(
self
):
def
_reset
(
self
):
self
.
tree
=
None
# type: Optional[RootNode]
self
.
context
=
[]
# type: List[Node]
self
.
context
=
[]
# type: List[Node]
self
.
_dirty_flag
=
False
self
.
_dirty_flag
=
False
def
__call__
(
self
,
node
:
Node
)
->
Any
:
def
__call__
(
self
,
root
:
Root
Node
)
->
Any
:
"""
"""
Compiles the abstract syntax tree with the root node `node` and
Compiles the abstract syntax tree with the root node `node` and
returns the compiled code. It is up to subclasses implementing
returns the compiled code. It is up to subclasses implementing
...
@@ -108,8 +109,8 @@ class Compiler:
...
@@ -108,8 +109,8 @@ class Compiler:
if
self
.
_dirty_flag
:
if
self
.
_dirty_flag
:
self
.
_reset
()
self
.
_reset
()
self
.
_dirty_flag
=
True
self
.
_dirty_flag
=
True
result
=
self
.
compile
(
node
)
self
.
tree
=
root
self
.
propagate_error_flags
(
node
,
lazy
=
True
)
result
=
self
.
compile
(
root
)
return
result
return
result
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
...
@@ -128,18 +129,18 @@ class Compiler:
...
@@ -128,18 +129,18 @@ class Compiler:
self
.
grammar_source
=
load_if_file
(
grammar_source
)
self
.
grammar_source
=
load_if_file
(
grammar_source
)
return
self
return
self
@
staticmethod
#
@staticmethod
def
propagate_error_flags
(
node
:
Node
,
lazy
:
bool
=
True
)
->
None
:
#
def propagate_error_flags(node: Node, lazy: bool = True) -> None:
# See test_parser.TestCompilerClass.test_propagate_error()..
#
# See test_parser.TestCompilerClass.test_propagate_error()..
"""Propagates error flags from children to parent nodes to make sure
#
"""Propagates error flags from children to parent nodes to make sure
that the parent's error flag is always greater or equal the maximum
#
that the parent's error flag is always greater or equal the maximum
of the children's error flags."""
#
of the children's error flags."""
if
not
lazy
or
node
.
error_flag
<
Error
.
HIGHEST
:
#
if not lazy or node.error_flag < Error.HIGHEST:
for
child
in
node
.
children
:
#
for child in node.children:
Compiler
.
propagate_error_flags
(
child
)
#
Compiler.propagate_error_flags(child)
node
.
error_flag
=
max
(
node
.
error_flag
,
child
.
error_flag
)
#
node.error_flag = max(node.error_flag, child.error_flag)
if
lazy
and
node
.
error_flag
>=
Error
.
HIGHEST
:
#
if lazy and node.error_flag >= Error.HIGHEST:
return
#
return
@
staticmethod
@
staticmethod
def
method_name
(
node_name
:
str
)
->
str
:
def
method_name
(
node_name
:
str
)
->
str
:
...
...
DHParser/ebnf.py
View file @
09196c53
...
@@ -581,7 +581,8 @@ class EBNFCompiler(Compiler):
...
@@ -581,7 +581,8 @@ class EBNFCompiler(Compiler):
defined_symbols
=
set
(
self
.
rules
.
keys
())
|
self
.
RESERVED_SYMBOLS
defined_symbols
=
set
(
self
.
rules
.
keys
())
|
self
.
RESERVED_SYMBOLS
for
symbol
in
self
.
symbols
:
for
symbol
in
self
.
symbols
:
if
symbol
not
in
defined_symbols
:
if
symbol
not
in
defined_symbols
:
self
.
symbols
[
symbol
].
add_error
(
"Missing definition for symbol '%s'"
%
symbol
)
self
.
tree
.
add_error
(
self
.
symbols
[
symbol
],
"Missing definition for symbol '%s'"
%
symbol
)
# root_node.error_flag = True
# root_node.error_flag = True
# check for unconnected rules
# check for unconnected rules
...
@@ -598,7 +599,7 @@ class EBNFCompiler(Compiler):
...
@@ -598,7 +599,7 @@ class EBNFCompiler(Compiler):
remove_connections
(
self
.
root_symbol
)
remove_connections
(
self
.
root_symbol
)
for
leftover
in
defined_symbols
:
for
leftover
in
defined_symbols
:
self
.
rules
[
leftover
][
0
]
.
add_error
(
self
.
tree
.
add_error
(
self
.
rules
[
leftover
][
0
]
,
(
'Rule "%s" is not connected to parser root "%s" !'
)
%
(
'Rule "%s" is not connected to parser root "%s" !'
)
%
(
leftover
,
self
.
root_symbol
),
Error
.
WARNING
)
(
leftover
,
self
.
root_symbol
),
Error
.
WARNING
)
...
@@ -628,7 +629,7 @@ class EBNFCompiler(Compiler):
...
@@ -628,7 +629,7 @@ class EBNFCompiler(Compiler):
else
:
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
self
.
compile
(
nd
)
self
.
compile
(
nd
)
node
.
error_flag
=
max
(
node
.
error_flag
,
nd
.
error_flag
)
#
node.error_flag = max(node.error_flag, nd.error_flag)
self
.
definitions
.
update
(
definitions
)
self
.
definitions
.
update
(
definitions
)
return
self
.
assemble_parser
(
definitions
,
node
)
return
self
.
assemble_parser
(
definitions
,
node
)
...
@@ -639,19 +640,19 @@ class EBNFCompiler(Compiler):
...
@@ -639,19 +640,19 @@ class EBNFCompiler(Compiler):
if
rule
in
self
.
rules
:
if
rule
in
self
.
rules
:
first
=
self
.
rules
[
rule
][
0
]
first
=
self
.
rules
[
rule
][
0
]
if
not
first
.
_errors
:
if
not
first
.
_errors
:
first
.
add_error
(
'First definition of rule "%s" '
self
.
tree
.
add_error
(
first
,
'First definition of rule "%s" '
'followed by illegal redefinitions.'
%
rule
)
'followed by illegal redefinitions.'
%
rule
)
nod
e
.
add_error
(
'A rul
e with nam
e "%s" has already been defined earlier.'
%
rule
)
self
.
tre
e
.
add_error
(
node
,
'A rule "%s" has already been defined earlier.'
%
rule
)
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
nod
e
.
add_error
(
'Symbol "%s" is a reserved symbol.'
%
rule
)
self
.
tre
e
.
add_error
(
node
,
'Symbol "%s" is a reserved symbol.'
%
rule
)
elif
not
sane_parser_name
(
rule
):
elif
not
sane_parser_name
(
rule
):
nod
e
.
add_error
(
'Illegal symbol "%s". Symbols must not start or '
self
.
tre
e
.
add_error
(
node
,
'Illegal symbol "%s". Symbols must not start or '
' end with a doube underscore "__".'
%
rule
)
' end with a doube underscore "__".'
%
rule
)
elif
rule
in
self
.
directives
[
'tokens'
]:
elif
rule
in
self
.
directives
[
'tokens'
]:
node
.
add_error
(
'Symbol "%s" has already been defined as '
self
.
add_error
(
node
,
'Symbol "%s" has already been defined as '
'a preprocessor token.'
%
rule
)
'a preprocessor token.'
%
rule
)
elif
keyword
.
iskeyword
(
rule
):
elif
keyword
.
iskeyword
(
rule
):
node
.
add_error
(
'Python keyword "%s" may not be used as a symbol. '
self
.
add_error
(
node
,
'Python keyword "%s" may not be used as a symbol. '
%
rule
+
'(This may change in the future.)'
)
%
rule
+
'(This may change in the future.)'
)
try
:
try
:
self
.
current_symbols
=
[
node
]
self
.
current_symbols
=
[
node
]
...
@@ -668,7 +669,7 @@ class EBNFCompiler(Compiler):
...
@@ -668,7 +669,7 @@ class EBNFCompiler(Compiler):
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
errmsg
=
"%s (TypeError: %s; %s)
\n
%s"
\
errmsg
=
"%s (TypeError: %s; %s)
\n
%s"
\
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
node
.
add_error
(
errmsg
)
self
.
add_error
(
node
,
errmsg
)
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
return
rule
,
defn
return
rule
,
defn
...
@@ -684,7 +685,7 @@ class EBNFCompiler(Compiler):
...
@@ -684,7 +685,7 @@ class EBNFCompiler(Compiler):
try
:
try
:
re
.
compile
(
rx
)
re
.
compile
(
rx
)
except
Exception
as
re_error
:
except
Exception
as
re_error
:
node
.
add_error
(
"malformed regular expression %s: %s"
%
self
.
add_error
(
node
,
"malformed regular expression %s: %s"
%
(
repr
(
rx
),
str
(
re_error
)))
(
repr
(
rx
),
str
(
re_error
)))
return
rx
return
rx
...
@@ -695,8 +696,8 @@ class EBNFCompiler(Compiler):
...
@@ -695,8 +696,8 @@ class EBNFCompiler(Compiler):
if
key
not
in
self
.
REPEATABLE_DIRECTIVES
:
if
key
not
in
self
.
REPEATABLE_DIRECTIVES
:
if
key
in
self
.
defined_directives
:
if
key
in
self
.
defined_directives
:
node
.
add_error
(
'Directive "%s" has already been defined earlier. '
%
key
+
\
self
.
add_error
(
node
,
'Directive "%s" has already been defined earlier. '
'Later definition will be ignored!'
,
%
key
+
'Later definition will be ignored!'
,
code
=
Error
.
REDEFINED_DIRECTIVE_WARNING
)
code
=
Error
.
REDEFINED_DIRECTIVE_WARNING
)
return
""
return
""
self
.
defined_directives
.
add
(
key
)
self
.
defined_directives
.
add
(
key
)
...
@@ -704,26 +705,27 @@ class EBNFCompiler(Compiler):
...
@@ -704,26 +705,27 @@ class EBNFCompiler(Compiler):
if
key
in
{
'comment'
,
'whitespace'
}:
if
key
in
{
'comment'
,
'whitespace'
}:
if
node
.
children
[
1
].
parser
.
name
==
"list_"
:
if
node
.
children
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
children
[
1
].
result
)
!=
1
:
if
len
(
node
.
children
[
1
].
result
)
!=
1
:
node
.
add_error
(
'Directive "%s" must have one, but not %i values.'
%
self
.
add_error
(
node
,
'Directive "%s" must have one, but not %i values.'
(
key
,
len
(
node
.
children
[
1
].
result
)))
%
(
key
,
len
(
node
.
children
[
1
].
result
)))
value
=
self
.
compile
(
node
.
children
[
1
]).
pop
()
value
=
self
.
compile
(
node
.
children
[
1
]).
pop
()
if
key
==
'whitespace'
and
value
in
EBNFCompiler
.
WHITESPACE
:
if
key
==
'whitespace'
and
value
in
EBNFCompiler
.
WHITESPACE
:
value
=
EBNFCompiler
.
WHITESPACE
[
value
]
# replace whitespace-name by regex
value
=
EBNFCompiler
.
WHITESPACE
[
value
]
# replace whitespace-name by regex
else
:
else
:
node
.
add_error
(
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
self
.
add_error
(
node
,
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
else
:
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[
# 1].result).strip("~")
# 1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result):
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result):
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
self
.
add_error
(
node
,
"Whitespace marker '~' not allowed in definition of "
"%s regular expression."
%
key
)
"%s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
if
key
==
'whitespace'
and
not
re
.
match
(
value
,
''
):
if
key
==
'whitespace'
and
not
re
.
match
(
value
,
''
):
node
.
add_error
(
"Implicit whitespace should always
match the empty string,
"
self
.
add_error
(
node
,
"Implicit whitespace should always "
"/%s/ does not."
%
value
)
"
match the empty string,
/%s/ does not."
%
value
)
self
.
directives
[
key
]
=
value
self
.
directives
[
key
]
=
value
elif
key
==
'ignorecase'
:
elif
key
==
'ignorecase'
:
...
@@ -738,9 +740,8 @@ class EBNFCompiler(Compiler):
...
@@ -738,9 +740,8 @@ class EBNFCompiler(Compiler):
value
=
{
item
.
lower
()
for
item
in
self
.
compile
(
node
.
children
[
1
])}
value
=
{
item
.
lower
()
for
item
in
self
.
compile
(
node
.
children
[
1
])}
if
((
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
if
((
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
node
.
add_error
(
'Directive "literalws" allows the values '
self
.
add_error
(
node
,
'Directive "literalws" allows only '
'`left`, `right`, `both` or `none`, '
'`left`, `right`, `both` or `none`, not `%s`'
%
", "
.
join
(
value
))
'but not `%s`'
%
", "
.
join
(
value
))
wsp
=
{
'left'
,
'right'
}
if
'both'
in
value
\
wsp
=
{
'left'
,
'right'
}
if
'both'
in
value
\
else
{}
if
'none'
in
value
else
value
else
{}
if
'none'
in
value
else
value
self
.
directives
[
key
]
=
list
(
wsp
)
self
.
directives
[
key
]
=
list
(
wsp
)
...
@@ -749,7 +750,7 @@ class EBNFCompiler(Compiler):
...
@@ -749,7 +750,7 @@ class EBNFCompiler(Compiler):
tokens
=
self
.
compile
(
node
.
children
[
1
])
tokens
=
self
.
compile
(
node
.
children
[
1
])
redeclared
=
self
.
directives
[
'tokens'
]
&
tokens
redeclared
=
self
.
directives
[
'tokens'
]
&
tokens
if
redeclared
:
if
redeclared
:
node
.
add_error
(
'Tokens %s have already been declared earlier. '
self
.
add_error
(
node
,
'Tokens %s have already been declared earlier. '
%
str
(
redeclared
)
+
'Later declaration will be ignored'
,
%
str
(
redeclared
)
+
'Later declaration will be ignored'
,
code
=
Error
.
REDECLARED_TOKEN_WARNING
)
code
=
Error
.
REDECLARED_TOKEN_WARNING
)
self
.
directives
[
'tokens'
]
|=
tokens
-
redeclared
self
.
directives
[
'tokens'
]
|=
tokens
-
redeclared
...
@@ -757,12 +758,12 @@ class EBNFCompiler(Compiler):
...
@@ -757,12 +758,12 @@ class EBNFCompiler(Compiler):
elif
key
.
endswith
(
'_filter'
):
elif
key
.
endswith
(
'_filter'
):
filter_set
=
self
.
compile
(
node
.
children
[
1
])
filter_set
=
self
.
compile
(
node
.
children
[
1
])
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
node
.
add_error
(
'Directive "%s" accepts exactly on symbol, not %s'
self
.
add_error
(
node
.
pos
,
'Directive "%s" accepts exactly on symbol, not %s'
%
(
key
,
str
(
filter_set
)))
%
(
key
,
str
(
filter_set
)))
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
filter_set
.
pop
()
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
filter_set
.
pop
()
else
:
else
:
node
.
add_error
(
'Unknown directive %s ! (Known ones are %s .)'
%
self
.
add_error
(
node
,
'Unknown directive %s ! (Known ones are %s .)'
%
(
key
,
', '
.
join
(
list
(
self
.
directives
.
keys
()))))
(
key
,
', '
.
join
(
list
(
self
.
directives
.
keys
()))))
return
""
return
""
...
@@ -773,7 +774,7 @@ class EBNFCompiler(Compiler):
...
@@ -773,7 +774,7 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
name for the particular non-terminal.
"""
"""
arguments
=
[
self
.
compile
(
r
)
for
r
in
node
.
children
]
+
custom_args
arguments
=
[
self
.
compile
(
r
)
for
r
in
node
.
children
]
+
custom_args
node
.
error_flag
=
max
(
node
.
error_flag
,
max
(
t
.
error_flag
for
t
in
node
.
children
))
#
node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
...
@@ -793,11 +794,11 @@ class EBNFCompiler(Compiler):
...
@@ -793,11 +794,11 @@ class EBNFCompiler(Compiler):
if
nd
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
if
nd
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
mandatory_marker
.
append
(
len
(
filtered_children
))
mandatory_marker
.
append
(
len
(
filtered_children
))
# if len(filtered_children) == 0:
# if len(filtered_children) == 0:
#
nd
.add_error('First item of a series should not be mandatory.',
#
self
.add_error(
nd.pos,
'First item of a series should not be mandatory.',
# Error.WARNING)
#
Error.WARNING)
if
len
(
mandatory_marker
)
>
1
:
if
len
(
mandatory_marker
)
>
1
:
nd
.
add_error
(
'One mandatory marker (§) sufficient to declare the '
self
.
add_error
(
nd
,
'One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.'
,
Error
.
WARNING
)
'rest of the series as mandatory.'
,
Error
.
WARNING
)
else
:
else
:
filtered_children
.
append
(
nd
)
filtered_children
.
append
(
nd
)
saved_result
=
node
.
result
saved_result
=
node
.
result
...
@@ -821,8 +822,8 @@ class EBNFCompiler(Compiler):
...
@@ -821,8 +822,8 @@ class EBNFCompiler(Compiler):
assert
len
(
node
.
children
)
==
2
assert
len
(
node
.
children
)
==
2
arg
=
node
.
children
[
-
1
]
arg
=
node
.
children
[
-
1
]
if
arg
.
parser
.
name
!=
'symbol'
:
if
arg
.
parser
.
name
!=
'symbol'
:
node
.
add_error
((
'Retrieve Operator "%s" requires a symbol, '
self
.
add_error
(
node
,
(
'Retrieve Operator "%s" requires a symbol, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'filter'
]:
if
str
(
arg
)
in
self
.
directives
[
'filter'
]:
custom_args
=
[
'rfilter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
custom_args
=
[
'rfilter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
...
@@ -855,14 +856,14 @@ class EBNFCompiler(Compiler):
...
@@ -855,14 +856,14 @@ class EBNFCompiler(Compiler):
break
break
if
(
nd
.
parser
.
name
!=
"regexp"
or
nd
.
content
[:
1
]
!=
'/'
if
(
nd
.
parser
.
name
!=
"regexp"
or
nd
.
content
[:
1
]
!=
'/'
or
nd
.
content
[
-
1
:]
!=
'/'
):
or
nd
.
content
[
-
1
:]
!=
'/'
):
node
.
add_error
(
"Lookbehind-parser can only be used with
plain
RegExp-"
self
.
add_error
(
node
,
"Lookbehind-parser can only be used with RegExp-"
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
if
not
result
.
startswith
(
'RegExp('
):
if
not
result
.
startswith
(
'RegExp('
):
self
.
deferred_tasks
.
append
(
lambda
:
check
(
node
))
self
.
deferred_tasks
.
append
(
lambda
:
check
(
node
))
return
result
return
result
except
KeyError
:
except
KeyError
:
node
.
add_error
(
'Unknown prefix "%s".'
%
prefix
)
self
.
add_error
(
node
,
'Unknown prefix "%s".'
%
prefix
)
return
""
return
""
...
@@ -888,14 +889,15 @@ class EBNFCompiler(Compiler):
...
@@ -888,14 +889,15 @@ class EBNFCompiler(Compiler):
nd
=
node
.
children
[
0
]
nd
=
node
.
children
[
0
]
for
child
in
nd
.
children
:
for
child
in
nd
.
children
:
if
child
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
if
child
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
node
.
add_error
(
"Unordered
parser list
s can
no
t contain mandatory (§) items."
)
self
.
add_error
(
node
,
"Unordered
sequence
s can
'
t contain mandatory (§) items."
)
args
=
', '
.
join
(
self
.
compile
(
child
)
for
child
in
nd
.
children
)
args
=
', '
.
join
(
self
.
compile
(
child
)
for
child
in
nd
.
children
)
if
nd
.
parser
.
name
==
"term"
:
if
nd
.
parser
.
name
==
"term"
:
return
"AllOf("
+
args
+
")"
return
"AllOf("
+
args
+
")"
elif
nd
.
parser
.
name
==
"expression"
:
elif
nd
.
parser
.
name
==
"expression"
:
return
"SomeOf("
+
args
+
")"
return
"SomeOf("
+
args
+
")"
else
:
else
:
node
.
add_error
(
"Unordered sequence or alternative requires at least two elements."
)
self
.
add_error
(
node
,
"Unordered sequence or alternative requires at least two elements."
)
return
""
return
""
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
...
@@ -949,7 +951,7 @@ class EBNFCompiler(Compiler):
...
@@ -949,7 +951,7 @@ class EBNFCompiler(Compiler):
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
trace
=
str
(
extract_tb
(
error
.
__traceback__
)[
-
1
])
errmsg
=
"%s (AttributeError: %s; %s)
\n
%s"
\
errmsg
=
"%s (AttributeError: %s; %s)
\n
%s"
\
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
%
(
EBNFCompiler
.
AST_ERROR
,
str
(
error
),
trace
,
node
.
as_sxpr
())
node
.
add_error
(
errmsg
)
self
.
add_error
(
node
,
errmsg
)
return
'"'
+
errmsg
+
'"'
return
'"'
+
errmsg
+
'"'
return
parser
+
', '
.
join
([
arg
]
+
name
)
+
')'
return
parser
+
', '
.
join
([
arg
]
+
name
)
+
')'
...
...
DHParser/error.py
View file @
09196c53
...
@@ -174,10 +174,10 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
...
@@ -174,10 +174,10 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
# """
# """
# Returns the position within a text as (line, column)-tuple.
# Returns the position within a text as (line, column)-tuple.
# """
# """
# if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
# if pos < 0 or
add_
pos > len(text): # one character behind EOF is still an allowed position!
# raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# line = text.count("\n", 0, pos) + 1
# line = text.count("\n", 0, pos) + 1
# column = pos - text.rfind("\n", 0, pos)
# column = pos - text.rfind("\n", 0,
add_
pos)
# return line, column
# return line, column
...
...
DHParser/log.py
View file @
09196c53
...
@@ -283,9 +283,7 @@ class HistoryRecord:
...
@@ -283,9 +283,7 @@ class HistoryRecord:
for
cls
,
item
in
zip
(
tpl
.
_fields
,
tpl
)]
+
[
'</tr>'
])
for
cls
,
item
in
zip
(
tpl
.
_fields
,
tpl
)]
+
[
'</tr>'
])
def
err_msg
(
self
)
->
str
:
def
err_msg
(
self
)
->
str
:
return
self
.
ERROR
+
": "
+
"; "
.
join
(
return
self
.
ERROR
+
": "
+
"; "
.
join
(
str
(
e
)
for
e
in
(
self
.
node
.
errors
))
str
(
e
)
for
e
in
(
self
.
node
.
_errors
if
self
.
node
.
_errors
else
self
.
node
.
collect_errors
()[:
2
]))
@
property
@
property
def
stack
(
self
)
->
str
:
def
stack
(
self
)
->
str
:
...
@@ -295,7 +293,7 @@ class HistoryRecord:
...
@@ -295,7 +293,7 @@ class HistoryRecord:
@
property
@
property
def
status
(
self
)
->
str
:
def
status
(
self
)
->
str
:
return
self
.
FAIL
if
self
.
node
is
None
else
\
return
self
.
FAIL
if
self
.
node
is
None
else
\
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
error
_flag
else
self
.
MATCH
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
error
s
else
self
.
MATCH
# has_errors(self.node._errors)
# has_errors(self.node._errors)
@
property
@
property
...
@@ -448,7 +446,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
...
@@ -448,7 +446,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
append_line
(
full_history
,
line
)
append_line
(
full_history
,
line
)
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
append_line
(
match_history
,
line
)
append_line
(
match_history
,
line
)
if
record
.
node
.
error
_flag
:
if
record
.
node
.
error
s
:
append_line
(
errors_only
,
line
)
append_line
(
errors_only
,
line
)
write_log
(
full_history
,
log_file_name
+
'_full'
)
write_log
(
full_history
,
log_file_name
+
'_full'
)
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
...
...
DHParser/parse.py
View file @
09196c53
...
@@ -156,7 +156,7 @@ def add_parser_guard(parser_func):
...
@@ -156,7 +156,7 @@ def add_parser_guard(parser_func):
if
grammar
.
history_tracking__
:
if
grammar
.
history_tracking__
:
# don't track returning parsers except in case an error has occurred
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
# remaining = len(rest)
if
grammar
.
moving_forward__
or
(
node
and
node
.
error
_flag
):
# node._errors
if
grammar
.
moving_forward__
or
(
node
and
node
.
_
error
s
):
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
)
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
)
grammar
.
history__
.
append
(
record
)
grammar
.
history__
.
append
(
record
)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
...
@@ -165,7 +165,8 @@ def add_parser_guard(parser_func):
...
@@ -165,7 +165,8 @@ def add_parser_guard(parser_func):
except
RecursionError
:
except
RecursionError
:
node
=
Node
(
None
,
str
(
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))])
+
" ..."
)
node
=
Node
(
None
,
str
(
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))])
+
" ..."
)
grammar
.
tree__
.
add_error
(
location
,
"maximum recursion depth of parser reached; "
node
.
_pos
=
location
grammar
.
tree__
.
add_error
(
node
,
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
"potentially due to too many errors!"
)
rest
=
EMPTY_STRING_VIEW
rest
=
EMPTY_STRING_VIEW
...
@@ -727,7 +728,8 @@ class Grammar:
...
@@ -727,7 +728,8 @@ class Grammar:
result
,
_
=
parser
(
rest
)
result
,
_
=
parser
(
rest
)
if
result
is
None
:
if
result
is
None
:
result
=
Node
(
None
,
''
).
init_pos
(
0
)
result
=
Node
(
None
,
''
).
init_pos
(
0
)
result
.
add_error
(
0
,
'Parser "%s" did not match empty document.'
%
str
(
parser
))
self
.
tree__
.
add_error
(
result
,
'Parser "%s" did not match empty document.'
%
str
(
parser
))
while
rest
and
len
(
stitches
)
<
MAX_DROPOUTS
:
while
rest
and
len
(
stitches
)
<
MAX_DROPOUTS
:
result
,
rest
=
parser
(
rest
)
result
,
rest
=
parser
(
rest
)
if
rest
:
if
rest
:
...
@@ -747,7 +749,7 @@ class Grammar:
...
@@ -747,7 +749,7 @@ class Grammar:
if
len
(
stitches
)
<
MAX_DROPOUTS
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
else
" too often! Terminating parser."
)
stitches
.
append
(
Node
(
None
,
skip
).
init_pos
(
tail_pos
(
stitches
)))
stitches
.
append
(
Node
(
None
,
skip
).
init_pos
(
tail_pos
(
stitches
)))
s
titches
[
-
1
]
.
add_error
(
s
elf
.
document_length__
-
1
,
error_msg
)
s
elf
.
tree__
.
add_error
(
s
titches
[
-
1
]
,
error_msg
)
if
self
.
history_tracking__
:
if
self
.
history_tracking__
:
# # some parsers may have matched and left history records with nodes != None.
# # some parsers may have matched and left history records with nodes != None.
# # Because these are not connected to the stitched root node, their pos-
# # Because these are not connected to the stitched root node, their pos-
...
@@ -773,25 +775,15 @@ class Grammar:
...
@@ -773,25 +775,15 @@ class Grammar:
# of the error will be the end of the text. Otherwise, the error
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
# message above ("...after end of parsing") would appear illogical.
error_node
=
Node
(
ZOMBIE_PARSER
,
''
).
init_pos
(
tail_pos
(
result
.
children
))
error_node
=
Node
(
ZOMBIE_PARSER
,
''
).
init_pos
(
tail_pos
(
result
.
children
))
error_node
.
add_error
(
self
.
document_length__
-
1
,
error_str
)
self
.
tree__
.
add_error
(
error_node
,
error_str
)
result
.
result
=
result
.
children
+
(
error_node
,)
result
.
result
=
result
.
children
+
(
error_node
,)
else
:
else
:
result
.
add_error
(
self
.
document_length__
-
1
,
error_str
)
self
.
tree__
.
add_error
(
result
,
error_str
)
# result.pos = 0 # calculate all positions
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
# result.collect_errors(self.document__)
self
.
tree__
.
swallow
(
result
)
self
.
tree__
.
swallow
(
result
)
return
self
.
tree__
return
self
.
tree__
def
location
(
self
,
remaining
:
str
)
->
int
:
"""Returns the location of the `remaining` text within the currently
parsed document.
"""
self
.
document_length__
-
len
(
remaining
)
def
add_error
(
self
,
location
,
error_msg
,
code
=
Error
.
ERROR
):
"""Adds an error at the location of `text` within the whole document that is
currently being parsed."""
self
.
tree__
.
add_error
(
location
,
error_msg
,
code
)
def
push_rollback__
(
self
,
location
,
func
):
def
push_rollback__
(
self
,
location
,
func
):
"""
"""
...
@@ -869,18 +861,20 @@ class PreprocessorToken(Parser):
...
@@ -869,18 +861,20 @@ class PreprocessorToken(Parser):
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
end
=
text
.
find
(
END_TOKEN
,
1
)
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
if
end
<
0
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
add_error
(
node
,
'END_TOKEN delimiter missing from preprocessor token. '
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)'
)
# type: Node
'(Most likely due to a preprocessor bug!)'
)
# type: Node
return
N
ode
(
self
,
''
)
,
text
[
1
:]
return
n
ode
,
text
[
1
:]
elif
end
==
0
:
elif
end
==
0
:
self
.
grammar
.
add_error
(
self
.
grammar
.
location
(
text
),
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
add_error
(
node
,
'Preprocessor-token cannot have zero length. '
'Preprocessor-token cannot have zero length. '