Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
e1da7f7f
Commit
e1da7f7f
authored
Jan 11, 2020
by
eckhart
Browse files
refactoring of history recording
parent
d622d9c7
Changes
6
Hide whitespace changes
Inline
Side-by-side
DHParser/log.py
View file @
e1da7f7f
...
...
@@ -281,6 +281,7 @@ class HistoryRecord:
'td.line, td.column {color:grey}
\n
'
'.text{color:blue}
\n
'
'.failtext {font-weight:normal; color:grey}
\n
'
'.errortext {font-weight:normal; color:darkred}
\n
'
'.unmatched {font-weight:normal; color:lightgrey}
\n
'
'.fail {font-weight:bold; color:darkgrey}
\n
'
'.error {font-weight:bold; color:red}
\n
'
...
...
@@ -361,6 +362,7 @@ class HistoryRecord:
classes
[
idx
[
'text'
]]
=
'failtext'
else
:
# ERROR
stack
+=
'<br/>
\n
"%s"'
%
self
.
err_msg
()
classes
[
idx
[
'text'
]]
=
'errortext'
tpl
=
self
.
Snapshot
(
str
(
self
.
line_col
[
0
]),
str
(
self
.
line_col
[
1
]),
stack
,
status
,
excerpt
)
# type: Tuple[str, str, str, str, str]
return
''
.
join
([
'<tr>'
]
+
[(
'<td class="%s">%s</td>'
%
(
cls
,
item
))
...
...
@@ -387,7 +389,7 @@ class HistoryRecord:
def
status
(
self
)
->
str
:
if
self
.
errors
:
return
self
.
ERROR
+
": "
+
', '
.
join
(
str
(
e
.
code
)
for
e
in
self
.
errors
)
elif
self
.
node
is
None
or
self
.
node
.
tag_name
in
(
ZOMBI
E_TAG
,
NON
E_TAG
):
elif
self
.
node
.
tag_name
in
(
NON
E_TAG
,
ZOMBI
E_TAG
):
return
self
.
FAIL
elif
self
.
node
.
tag_name
==
EMPTY_PTYPE
:
return
self
.
DROP
...
...
@@ -398,7 +400,7 @@ class HistoryRecord:
@
property
def
excerpt
(
self
):
if
self
.
node
:
if
self
.
node
.
tag_name
not
in
(
NONE_TAG
,
ZOMBIE_TAG
)
and
not
self
.
errors
:
excerpt
=
abbreviate_middle
(
str
(
self
.
node
),
40
)
else
:
s
=
self
.
text
...
...
@@ -413,7 +415,7 @@ class HistoryRecord:
@
property
def
remaining
(
self
)
->
int
:
return
len
(
self
.
text
)
-
(
len
(
self
.
node
)
if
self
.
node
else
0
)
return
len
(
self
.
text
)
-
len
(
self
.
node
)
@
staticmethod
def
last_match
(
history
:
List
[
'HistoryRecord'
])
->
Union
[
'HistoryRecord'
,
None
]:
...
...
@@ -497,7 +499,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool = True) ->
otherwise as plain test. (Browsers might take a few seconds or
minutes to display the table for long histories.)
"""
def
write_log
(
history
,
log_name
)
:
def
write_log
(
history
:
List
[
str
],
log_name
:
str
)
->
None
:
htm
=
'.html'
if
html
else
''
path
=
os
.
path
.
join
(
log_dir
()
or
''
,
log_name
+
"_parser.log"
+
htm
)
if
os
.
path
.
exists
(
path
):
...
...
@@ -512,7 +514,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool = True) ->
else
:
f
.
write
(
"
\n
"
.
join
(
history
))
def
append_line
(
log
,
line
)
:
def
append_line
(
log
:
List
[
str
],
line
:
str
)
->
None
:
"""Appends a line to a list of HTML table rows. Starts a new
table every 100 rows to allow browser to speed up rendering.
Does this really work...?"""
...
...
DHParser/parse.py
View file @
e1da7f7f
...
...
@@ -111,6 +111,7 @@ class ParserError(Exception):
or `AllOf`-parser detects a missing mandatory element.
"""
def
__init__
(
self
,
node
:
Node
,
rest
:
StringView
,
error
:
Error
,
first_throw
:
bool
):
assert
node
is
not
None
self
.
node
=
node
# type: Node
self
.
rest
=
rest
# type: StringView
self
.
error
=
error
# type: Error
...
...
@@ -402,8 +403,7 @@ class Parser:
if
i
>=
0
or
self
==
grammar
.
start_parser__
:
assert
pe
.
node
.
children
or
(
not
pe
.
node
.
result
)
# apply reentry-rule or catch error at root-parser
# if i < 0:
# i = 1
if
i
<
0
:
i
=
0
try
:
zombie
=
pe
.
node
[
ZOMBIE_TAG
]
# type: Optional[Node]
except
(
KeyError
,
ValueError
):
...
...
@@ -435,9 +435,6 @@ class Parser:
else
pe
.
node
# type: ResultType
raise
ParserError
(
Node
(
self
.
tag_name
,
result
).
with_pos
(
location
),
text
,
pe
.
error
,
first_throw
=
False
)
# TODO: can this be moved to trace.py, e.g. if pe.first_throw...
# should it be the first thrown error, rather than the last?
grammar
.
most_recent_error__
=
pe
# needed for history tracking
if
left_recursion_depth__
:
self
.
recursion_counter
[
location
]
-=
1
...
...
@@ -1241,8 +1238,9 @@ class Grammar:
error
=
Error
(
error_msg
,
stitch
.
pos
,
error_code
)
self
.
tree__
.
add_error
(
stitch
,
error
)
if
self
.
history_tracking__
:
lc
=
line_col
(
self
.
document_lbreaks__
,
error
.
pos
)
self
.
history__
.
append
(
HistoryRecord
([(
stitch
.
tag_name
,
stitch
.
pos
)],
stitch
,
rest
,
self
.
line_col__
(
rest
)
,
[
error
]))
self
.
document__
[
error
.
pos
:],
lc
,
[
error
]))
else
:
# if complete_match is False, ignore the rest and leave while loop
rest
=
StringView
(
''
)
...
...
@@ -1686,19 +1684,6 @@ class NaryParser(MetaParser):
copy_parser_attrs
(
self
,
duplicate
)
return
duplicate
def
_add_skip_notice
(
self
,
_text
:
StringView
,
err_node
:
Node
)
->
None
:
"""Adds a skip-notice to the parse tree's error-list for a parser
that support skipping parts of the text, when an error occurred
(Series, AllOf)."""
if
not
self
.
_grammar
.
resume_notices__
:
return
notice
=
Error
(
'Skipping within parser {} to point {}'
.
format
(
self
.
pname
or
self
.
ptype
,
repr
(
_text
[:
10
])),
self
.
_grammar
.
document_length__
-
len
(
_text
),
Error
.
RESUME_NOTICE
)
self
.
_grammar
.
tree__
.
add_error
(
err_node
,
notice
)
def
sub_parsers
(
self
)
->
Tuple
[
'Parser'
,
...]:
return
self
.
parsers
...
...
@@ -1850,105 +1835,28 @@ MessagesType = List[Tuple[Union[str, Any], str]]
NO_MANDATORY
=
1000
@
cython
.
locals
(
i
=
cython
.
int
,
location
=
cython
.
int
)
def
mandatory_violation
(
grammar
:
Grammar
,
text_
:
StringView
,
failed_on_lookahead
:
bool
,
expected
:
str
,
err_msgs
:
MessagesType
,
reloc
:
int
)
->
Tuple
[
Error
,
Node
,
StringView
]:
"""
Choses the right error message in case of a mandatory violation and
returns an error with this message, an error node, to which the error
is attached, and the text segment where parsing is to continue.
This is a helper function that abstracts functionality that is
needed by the AllOf- as well as the Series-parser.
:param grammar: the grammar
:param text_: the point, where the mandatory violation. As usual the
string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen
if the regular expression matches the text where the error
occurred.
:param reloc: A position value that represents the reentry point for
parsing after the error occurred.
:return: a tuple of an error object, a zombie node at the position
where the mandatory violation occurred and to which the error
object is attached and a string view for continuing the
parsing process
"""
i
=
reloc
if
reloc
>=
0
else
0
location
=
grammar
.
document_length__
-
len
(
text_
)
err_node
=
Node
(
ZOMBIE_TAG
,
text_
[:
i
]).
with_pos
(
location
)
found
=
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)
+
'...'
for
search
,
message
in
err_msgs
:
rxs
=
not
isinstance
(
search
,
str
)
if
(
rxs
and
text_
.
match
(
search
))
or
(
not
rxs
and
text_
.
startswith
(
search
)):
try
:
msg
=
message
.
format
(
expected
,
found
)
break
except
(
ValueError
,
KeyError
,
IndexError
)
as
e
:
error
=
Error
(
"Malformed error format string »{}« leads to »{}«"
.
format
(
message
,
str
(
e
)),
location
,
Error
.
MALFORMED_ERROR_STRING
)
grammar
.
tree__
.
add_error
(
err_node
,
error
)
else
:
if
grammar
.
history_tracking__
:
pname
=
':root'
for
pname
,
_
in
reversed
(
grammar
.
call_stack__
):
if
not
pname
.
startswith
(
':'
):
break
msg
=
'%s expected by parser %s, »%s« found!'
%
(
expected
,
pname
,
found
)
else
:
msg
=
'%s expected, »%s« found!'
%
(
expected
,
found
)
error
=
Error
(
msg
,
location
,
Error
.
MANDATORY_CONTINUATION_AT_EOF
if
(
failed_on_lookahead
and
not
text_
)
else
Error
.
MANDATORY_CONTINUATION
)
grammar
.
tree__
.
add_error
(
err_node
,
error
)
if
reloc
>=
0
:
grammar
.
most_recent_error__
=
ParserError
(
None
,
text_
,
error
,
first_throw
=
True
)
return
error
,
err_node
,
text_
[
i
:]
class
Series
(
NaryParser
):
class
MandatoryElementsParser
(
NaryParser
):
r
"""
Matches if each of a series of parsers matches exactly in the order of
the series.
Attributes:
mandatory
(int)
: Number of the element starting at which the element
mandatory: Number of the element starting at which the element
and all following elements are considered "mandatory". This
means that rather than returning a non-match an error message
is issued. The default value is NO_MANDATORY, which means that
no elements are mandatory.
errmsg (str): An optional error message that overrides the default
message for mandatory continuation errors. This can be used to
provide more helpful error messages to the user.
Example::
>>> variable_name = RegExp(r'(?!\d)\w') + RE(r'\w*')
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser "/(?!\\d)\\w/ /\\w*/ ~" did not match! >>> '
EBNF-Notation: ``... ...`` (sequence of parsers separated by a blank or new line)
EBNF-Example: ``series = letter letter_or_digit``
err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen
if the regular expression matches the text where the error
occurred.
skip_list: A list of regular expressions. The rest of the text is searched for
each of these. The closest match is the point where parsing will be
resumed.
"""
RX_ARGUMENT
=
re
.
compile
(
r
'\s(\S)'
)
def
__init__
(
self
,
*
parsers
:
Parser
,
mandatory
:
int
=
NO_MANDATORY
,
err_msgs
:
MessagesType
=
[],
skip
:
ResumeList
=
[])
->
None
:
super
(
Series
,
self
).
__init__
(
*
parsers
)
super
(
MandatoryElementsParser
,
self
).
__init__
(
*
parsers
)
length
=
len
(
self
.
parsers
)
if
mandatory
<
0
:
mandatory
+=
length
...
...
@@ -1957,11 +1865,10 @@ class Series(NaryParser):
'Custom error messages require that parameter "mandatory" is set!'
assert
not
(
mandatory
==
NO_MANDATORY
and
skip
),
\
'Search expressions for skipping text require that parameter "mandatory" is set!'
assert
length
>
0
,
\
'
Length of serie
s %i is below minimum length of 1'
%
length
'
Number of element
s %i is below minimum length of 1'
%
length
assert
length
<
NO_MANDATORY
,
\
'
Length
%i of series exceeds maximum length of %i'
%
(
length
,
NO_MANDATORY
)
'
Number of elemnts
%i of series exceeds maximum length of %i'
%
(
length
,
NO_MANDATORY
)
assert
0
<=
mandatory
<
length
or
mandatory
==
NO_MANDATORY
...
...
@@ -1969,6 +1876,108 @@ class Series(NaryParser):
self
.
err_msgs
=
err_msgs
# type: MessagesType
self
.
skip
=
skip
# type: ResumeList
def
get_reentry_point
(
self
,
text_
:
StringView
)
->
int
:
"""Returns a reentry-point determined by the skip-list in `self.skip`.
If no reentry-point was found or the skip-list ist empty, -1 is returned.
"""
if
self
.
skip
:
gr
=
self
.
_grammar
return
reentry_point
(
text_
,
self
.
skip
,
gr
.
comment_rx__
,
gr
.
reentry_search_window__
)
return
-
1
@
cython
.
locals
(
i
=
cython
.
int
,
location
=
cython
.
int
)
def
mandatory_violation
(
self
,
text_
:
StringView
,
failed_on_lookahead
:
bool
,
expected
:
str
,
reloc
:
int
)
->
Tuple
[
Error
,
Node
,
StringView
]:
"""
Choses the right error message in case of a mandatory violation and
returns an error with this message, an error node, to which the error
is attached, and the text segment where parsing is to continue.
This is a helper function that abstracts functionality that is
needed by the AllOf- as well as the Series-parser.
:param parser: the grammar
:param text_: the point, where the mandatory violation. As usual the
string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param reloc: A position value that represents the reentry point for
parsing after the error occurred.
:return: a tuple of an error object, a zombie node at the position
where the mandatory violation occurred and to which the error
object is attached and a string view for continuing the
parsing process
"""
grammar
=
self
.
_grammar
i
=
reloc
if
reloc
>=
0
else
0
location
=
grammar
.
document_length__
-
len
(
text_
)
err_node
=
Node
(
ZOMBIE_TAG
,
text_
[:
i
]).
with_pos
(
location
)
found
=
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)
+
'...'
for
search
,
message
in
self
.
err_msgs
:
rxs
=
not
isinstance
(
search
,
str
)
if
(
rxs
and
text_
.
match
(
search
))
or
(
not
rxs
and
text_
.
startswith
(
search
)):
try
:
msg
=
message
.
format
(
expected
,
found
)
break
except
(
ValueError
,
KeyError
,
IndexError
)
as
e
:
error
=
Error
(
"Malformed error format string »{}« leads to »{}«"
.
format
(
message
,
str
(
e
)),
location
,
Error
.
MALFORMED_ERROR_STRING
)
grammar
.
tree__
.
add_error
(
err_node
,
error
)
else
:
if
grammar
.
history_tracking__
:
pname
=
':root'
for
pname
,
_
in
reversed
(
grammar
.
call_stack__
):
if
not
pname
.
startswith
(
':'
):
break
msg
=
'%s expected by parser %s, »%s« found!'
%
(
expected
,
pname
,
found
)
else
:
msg
=
'%s expected, »%s« found!'
%
(
expected
,
found
)
error
=
Error
(
msg
,
location
,
Error
.
MANDATORY_CONTINUATION_AT_EOF
if
(
failed_on_lookahead
and
not
text_
)
else
Error
.
MANDATORY_CONTINUATION
)
grammar
.
tree__
.
add_error
(
err_node
,
error
)
if
reloc
>=
0
:
errors
=
[
error
]
if
grammar
.
resume_notices__
:
target
=
text_
[
reloc
:]
if
len
(
target
)
>=
10
:
target
=
target
[:
7
]
+
'...'
notice
=
Error
(
'Skipping within parser {} to point {}'
.
format
(
self
.
pname
or
self
.
ptype
,
repr
(
target
)),
self
.
_grammar
.
document_length__
-
len
(
text_
),
Error
.
RESUME_NOTICE
)
grammar
.
tree__
.
add_error
(
err_node
,
notice
)
errors
.
append
(
notice
)
if
grammar
.
history_tracking__
:
lc
=
line_col
(
grammar
.
document_lbreaks__
,
location
)
grammar
.
history__
.
append
(
HistoryRecord
(
grammar
.
call_stack__
,
None
,
text_
,
lc
,
errors
))
return
error
,
err_node
,
text_
[
i
:]
class
Series
(
MandatoryElementsParser
):
r
"""
Matches if each of a series of parsers matches exactly in the order of
the series.
Example::
>>> variable_name = RegExp(r'(?!\d)\w') + RE(r'\w*')
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser "/(?!\\d)\\w/ /\\w*/ ~" did not match! >>> '
EBNF-Notation: ``... ...`` (sequence of parsers separated by a blank or new line)
EBNF-Example: ``series = letter letter_or_digit``
"""
RX_ARGUMENT
=
re
.
compile
(
r
'\s(\S)'
)
def
__deepcopy__
(
self
,
memo
):
parsers
=
copy
.
deepcopy
(
self
.
parsers
,
memo
)
duplicate
=
self
.
__class__
(
*
parsers
,
mandatory
=
self
.
mandatory
,
...
...
@@ -1987,12 +1996,9 @@ class Series(NaryParser):
if
pos
<
self
.
mandatory
:
return
None
,
text
else
:
grammar
=
self
.
grammar
reloc
=
reentry_point
(
text_
,
self
.
skip
,
grammar
.
comment_rx__
,
grammar
.
reentry_search_window__
)
if
self
.
skip
else
-
1
error
,
node
,
text_
=
mandatory_violation
(
grammar
,
text_
,
isinstance
(
parser
,
Lookahead
),
parser
.
repr
,
self
.
err_msgs
,
reloc
)
reloc
=
self
.
get_reentry_point
(
text_
)
error
,
node
,
text_
=
self
.
mandatory_violation
(
text_
,
isinstance
(
parser
,
Lookahead
),
parser
.
repr
,
reloc
)
# check if parsing of the series can be resumed somewhere
if
reloc
>=
0
:
rest
=
text_
...
...
@@ -2000,7 +2006,6 @@ class Series(NaryParser):
if
nd
is
not
None
:
results
+=
(
node
,)
node
=
nd
self
.
_add_skip_notice
(
rest
,
node
)
else
:
results
+=
(
node
,)
break
...
...
@@ -2023,7 +2028,7 @@ class Series(NaryParser):
# `RE('\d+') + Optional(RE('\.\d+)` instead of `Series(RE('\d+'), Optional(RE('\.\d+))`
@
staticmethod
def
combined_mandatory
(
left
:
Parser
,
right
:
Parser
):
def
combined_mandatory
(
left
:
'Series'
,
right
:
'Series'
):
"""
Returns the position of the first mandatory element (if any) when
parsers `left` and `right` are joined to a sequence.
...
...
@@ -2126,7 +2131,7 @@ class Alternative(NaryParser):
return
self
class
AllOf
(
Nary
Parser
):
class
AllOf
(
MandatoryElements
Parser
):
"""
Matches if all elements of a list of parsers match. Each parser must
match exactly once. Other than in a sequence, the order in which
...
...
@@ -2169,24 +2174,8 @@ class AllOf(NaryParser):
parsers
=
series
.
parsers
super
(
AllOf
,
self
).
__init__
(
*
parsers
)
super
(
AllOf
,
self
).
__init__
(
*
parsers
,
mandatory
=
mandatory
,
err_msgs
=
err_msgs
,
skip
=
skip
)
self
.
num_parsers
=
len
(
self
.
parsers
)
# type: int
if
mandatory
<
0
:
mandatory
+=
self
.
num_parsers
assert
not
(
mandatory
==
NO_MANDATORY
and
err_msgs
),
\
'Custom error messages require that parameter "mandatory" is set!'
assert
not
(
mandatory
==
NO_MANDATORY
and
skip
),
\
'Search expressions for skipping text require that parameter "mandatory" is set!'
assert
self
.
num_parsers
>
0
,
\
'Number of elements %i is below minimum of 1'
%
self
.
num_parsers
assert
self
.
num_parsers
<
NO_MANDATORY
,
\
'Number of elemnts %i of exceeds maximum of %i'
%
(
self
.
num_parsers
,
NO_MANDATORY
)
assert
0
<=
mandatory
<
self
.
num_parsers
or
mandatory
==
NO_MANDATORY
self
.
mandatory
=
mandatory
# type: int
self
.
err_msgs
=
err_msgs
# type: MessagesType
self
.
skip
=
skip
# type: ResumeList
def
__deepcopy__
(
self
,
memo
):
parsers
=
copy
.
deepcopy
(
self
.
parsers
,
memo
)
...
...
@@ -2215,13 +2204,11 @@ class AllOf(NaryParser):
if
self
.
num_parsers
-
len
(
parsers
)
<
self
.
mandatory
:
return
None
,
text
else
:
grammar
=
self
.
grammar
reloc
=
reentry_point
(
text_
,
self
.
skip
,
grammar
.
comment_rx__
,
grammar
.
reentry_search_window__
)
if
self
.
skip
else
-
1
reloc
=
self
.
get_reentry_point
(
text_
)
expected
=
'< '
+
' '
.
join
([
parser
.
repr
for
parser
in
parsers
])
+
' >'
lookahead
=
any
([
isinstance
(
p
,
Lookahead
)
for
p
in
parsers
])
error
,
err_node
,
text_
=
mandatory_violation
(
grammar
,
text_
,
lookahead
,
expected
,
self
.
err_msgs
,
reloc
)
error
,
err_node
,
text_
=
self
.
mandatory_violation
(
text_
,
lookahead
,
expected
,
reloc
)
results
+=
(
err_node
,)
if
reloc
<
0
:
parsers
=
[]
...
...
DHParser/syntaxtree.py
View file @
e1da7f7f
...
...
@@ -1393,10 +1393,10 @@ class RootNode(Node):
"""
Adds an Error object to the tree, locating it at a specific node.
"""
assert
isinstance
(
node
,
Node
)
if
not
node
:
node
=
Node
(
ZOMBIE_TAG
,
''
).
with_pos
(
error
.
pos
)
else
:
assert
isinstance
(
node
,
Node
)
assert
isinstance
(
node
,
FrozenNode
)
or
node
.
pos
<=
error
.
pos
,
\
"%i <= %i <= %i ?"
%
(
node
.
pos
,
error
.
pos
,
node
.
pos
+
max
(
1
,
len
(
node
)
-
1
))
# assert node.pos == error.pos or isinstance(node, FrozenNode)
...
...
DHParser/trace.py
View file @
e1da7f7f
...
...
@@ -36,24 +36,12 @@ __all__ = ('trace_history', 'all_descendants', 'set_tracer',
'resume_notices_on'
)
def
add_resume_notice
(
parser
,
rest
:
StringView
,
err_node
:
Node
)
->
None
:
"""Adds a resume notice to the error node with information about
the reentry point and the parser."""
if
parser
==
parser
.
_grammar
.
start_parser__
:
return
call_stack
=
parser
.
_grammar
.
call_stack__
if
len
(
call_stack
)
>=
2
:
i
,
N
=
-
2
,
-
len
(
call_stack
)
while
i
>=
N
and
call_stack
[
i
][
0
][
0
:
1
]
in
(
':'
,
'/'
,
'"'
,
"'"
,
"`"
):
i
-=
1
if
i
>=
N
and
i
!=
-
2
:
parent_info
=
"{}->{}"
.
format
(
call_stack
[
i
][
0
],
call_stack
[
-
2
][
0
])
else
:
parent_info
=
call_stack
[
-
2
][
0
]
else
:
parent_info
=
"?"
notice
=
Error
(
'Resuming from parser {} with parser {} at point: {}'
.
format
(
parser
.
pname
or
parser
.
ptype
,
parent_info
,
repr
(
rest
[:
10
])),
notice
=
Error
(
'Resuming from {} with parser {} at point: {}'
.
format
(
err_node
.
tag_name
,
parser
.
tag_name
,
repr
(
rest
[:
10
])),
parser
.
_grammar
.
document_length__
-
len
(
rest
),
Error
.
RESUME_NOTICE
)
parser
.
_grammar
.
tree__
.
add_error
(
err_node
,
notice
)
...
...
@@ -64,42 +52,66 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
grammar
.
call_stack__
.
append
(
((
self
.
repr
if
self
.
tag_name
in
(
REGEXP_PTYPE
,
TOKEN_PTYPE
)
else
(
self
.
pname
or
self
.
tag_name
)),
location
))
# TODO: Record history on turning points here? i.e. when moving_forward is False
grammar
.
moving_forward__
=
True
if
grammar
.
most_recent_error__
:
save_error
=
grammar
.
most_recent_error__
grammar
.
most_recent_error__
=
None
else
:
save_error
=
None
try
:
node
,
rest
=
self
.
_parse
(
text
)
except
ParserError
as
pe
:
grammar
.
call_stack__
.
pop
()
if
self
==
grammar
.
start_parser__
:
if
pe
.
first_throw
:
grammar
.
most_recent_error__
=
pe
lc
=
line_col
(
grammar
.
document_lbreaks__
,
pe
.
error
.
pos
)
# TODO: get the call stack from when the error occured, her
e
nd
=
pe
.
nod
e
grammar
.
history__
.
append
(
HistoryRecord
(
grammar
.
call_stack__
,
pe
.
node
,
pe
.
rest
,
lc
,
[
pe
.
error
]))
HistoryRecord
(
grammar
.
call_stack__
,
nd
,
pe
.
rest
[
len
(
nd
):],
lc
,
[
pe
.
error
]))
# if self == grammar.start_parser__:
# lc = line_col(grammar.document_lbreaks__, pe.error.pos)
# # TODO: get the call stack from when the error occured, here
# nd = pe.node
# grammar.history__.append(
# HistoryRecord(grammar.call_stack__, nd, pe.rest[len(nd):], lc, [pe.error]))
raise
pe
# Mind that memoized parser calls will not appear in the history record!
# Don't track returning parsers except in case an error has occurred!
# TODO: Try recording all named parsers on the way back?
delta
=
text
.
_len
-
rest
.
_len
p
arser_error
=
grammar
.
most_recent_error__
if
((
grammar
.
moving_forward__
or
p
arser_error
or
(
node
and
not
self
.
anonymous
))
p
e
=
grammar
.
most_recent_error__
if
((
grammar
.
moving_forward__
or
p
e
or
(
node
and
not
self
.
anonymous
))
and
(
self
.
tag_name
!=
WHITESPACE_PTYPE
)):
# TODO: Make dropping insignificant whitespace from history configurable
errors
=
[
parser_error
.
error
]
if
parser_error
else
[]
# type: List[Error]
line_col
=
grammar
.
line_col__
(
text
)
errors
=
[
pe
.
error
]
if
pe
else
[]
# type: List[Error]
nd
=
Node
(
node
.
tag_name
,
text
[:
delta
]).
with_pos
(
location
)
if
node
else
None
record
=
HistoryRecord
(
grammar
.
call_stack__
,
nd
,
rest
,
line_col
,
errors
)
if
(
not
grammar
.
history__
or
line_col
!=
grammar
.
history__
[
-
1
].
line_col
or
record
.
call_stack
!=
grammar
.
history__
[
-
1
].
call_stack
[:
len
(
record
.
call_stack
)]):
lc
=
line_col
(
grammar
.
document_lbreaks__
,
location
)
record
=
HistoryRecord
(
grammar
.
call_stack__
,
nd
,
pe
.
rest
if
pe
else
rest
,
lc
,
errors
)
cs_len
=
len
(
record
.
call_stack
)
if
(
not
grammar
.
history__
or
lc
!=
grammar
.
history__
[
-
1
].
line_col
or
record
.
call_stack
!=
grammar
.
history__
[
-
1
].
call_stack
[:
cs_len
]):
grammar
.
history__
.
append
(
record
)
if
parser_error
:
if
grammar
.
resume_notices__
:
add_resume_notice
(
self
,
rest
,
parser_error
.
node
)
grammar
.
most_recent_error__
=
None
if
pe
:
grammar
.
most_recent_error__
=
None
if
grammar
.
resume_notices__
:
# add_resume_notice(self, pe.rest[len(pe.node):], pe.node)
text_
=
pe
.
rest
[
len
(
pe
.
node
):]
target
=
text_
if
len
(
target
)
>=
10
:
target
=
target
[:
7
]
+
'...'
notice
=
Error
(
'Resuming from {} with parser {} at point: {}'
.
format
(
pe
.
node
.
tag_name
,
self
.
tag_name
,
repr
(
target
)),
self
.
_grammar
.
document_length__
-
len
(
text_
),
Error
.
RESUME_NOTICE
)
self
.
_grammar
.
tree__
.
add_error
(
pe
.
node
,
notice
)
if
save_error
:
grammar
.
most_recent_error__
=
save_error
grammar
.
moving_forward__
=
False
grammar
.
call_stack__
.
pop
()
return
node
,
rest
...
...
test/test_parse.py
View file @
e1da7f7f
...
...
@@ -531,6 +531,7 @@ class TestErrorRecovery:
resume_notices_on
(
parser
)
st
=
parser
(
'AB_D'
)
assert
len
(
st
.
errors
)
==
2
and
any
(
err
.
code
==
Error
.
RESUME_NOTICE
for
err
in
st
.
errors
)
assert
'Skipping'
in
str
(
st
.
errors_sorted
[
1
])
def
test_AllOf_skip
(
self
):
...
...
@@ -824,7 +825,7 @@ class TestReentryAfterError:
assert
cst
.
content
==
content
# assert cst.pick('alpha').content.startswith('ALPHA')
def
test_severl_reentry_points
(
self
):
def
test_sever
a
l_reentry_points
(
self
):
gr
=
self
.
gr
;
gr
.
resume_rules
=
dict
()
gr
.
resume_rules__
[
'alpha'
]
=
[
re
.
compile
(
r
'(?=BETA)'
),
re
.
compile
(
r
'(?=GAMMA)'
)]
content
=
'ALPHA acb BETA bac GAMMA cab .'
...
...
@@ -847,7 +848,6 @@ class TestReentryAfterError:
assert
len
(
cst
.
errors_sorted
)
==
1
resume_notices_on
(
gr
)
cst
=
gr
(
content
)
# print(cst.errors)
assert
len
(
cst
.
errors
)
==
2
and
any
(
err
.
code
==
Error
.
RESUME_NOTICE
for
err
in
cst
.
errors
)
def
test_several_resume_rules_innermost_rule_matching
(
self
):
...
...
test/test_trace.py
View file @
e1da7f7f
...
...
@@ -28,13 +28,16 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from
DHParser
import
grammar_provider
,
all_descendants
,
\
set_tracer
,
trace_history
,
log_parsing_history
,
start_logging
,
log_dir
,
\
set_config_value
,
resume_notices_on
set_config_value
,
resume_notices_on
,
Error
def
get_history
(
name
)
->
str
:
history_fname
=
os
.
path
.
join
(
log_dir
()
or
''
,
name
+
"_full_parser.log.html"
)
import
webbrowser
# just for debugging:
import
webbrowser
,
time
webbrowser
.
open
(
history_fname
)
time
.
sleep
(
1
)
# ------------------
with
open
(
history_fname
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
history_file
=
f
.
read
()