Commit 4aa8230b authored by di68kap's avatar di68kap
Browse files

parse.py, trace.py: refactoring finished: moved history tracking and resume notices to module trace

parent 63dd080c
......@@ -358,27 +358,27 @@ class Parser:
self.visited = dict() # type: Dict[int, Tuple[Optional[Node], StringView]]
self.recursion_counter = defaultdict(int) # type: DefaultDict[int, int]
# TODO: Outsource this to trace.py !!!
def _add_resume_notice(self, rest: StringView, err_node: Node) -> None:
"""Adds a resume notice to the error node with information about
the reentry point and the parser."""
if not self._grammar.resume_notices__ or self == self._grammar.start_parser__:
return
call_stack = self._grammar.call_stack__
if len(call_stack) >= 2:
i, N = -2, -len(call_stack)
while i >= N and call_stack[i][0][0:1] in (':', '/', '"', "'", "`"):
i -= 1
if i >= N and i != -2:
parent_info = "{}->{}".format(call_stack[i][0], call_stack[-2][0])
else:
parent_info = call_stack[-2][0]
else:
parent_info = "?"
notice = Error('Resuming from parser {} with parser {} at point: {}'
.format(self.pname or self.ptype, parent_info, repr(rest[:10])),
self._grammar.document_length__ - len(rest), Error.RESUME_NOTICE)
self._grammar.tree__.add_error(err_node, notice)
# # TODO: Outsource this to trace.py !!!
# def _add_resume_notice(self, rest: StringView, err_node: Node) -> None:
# """Adds a resume notice to the error node with information about
# the reentry point and the parser."""
# if not self._grammar.resume_notices__ or self == self._grammar.start_parser__:
# return
# call_stack = self._grammar.call_stack__
# if len(call_stack) >= 2:
# i, N = -2, -len(call_stack)
# while i >= N and call_stack[i][0][0:1] in (':', '/', '"', "'", "`"):
# i -= 1
# if i >= N and i != -2:
# parent_info = "{}->{}".format(call_stack[i][0], call_stack[-2][0])
# else:
# parent_info = call_stack[-2][0]
# else:
# parent_info = "?"
# notice = Error('Resuming from parser {} with parser {} at point: {}'
# .format(self.pname or self.ptype, parent_info, repr(rest[:10])),
# self._grammar.document_length__ - len(rest), Error.RESUME_NOTICE)
# self._grammar.tree__.add_error(err_node, notice)
@cython.locals(location=cython.int, gap=cython.int, i=cython.int)
def __call__(self: 'Parser', text: StringView) -> Tuple[Optional[Node], StringView]:
......@@ -419,13 +419,13 @@ class Parser:
return None, text
self.recursion_counter[location] += 1
# write current step to call stack, if history tracking is configured
history_tracking__ = grammar.history_tracking__
if history_tracking__:
grammar.call_stack__.append(
((self.repr if self.tag_name in (':RegExp', ':Token', ':DropToken')
else (self.pname or self.tag_name)), location))
grammar.moving_forward__ = True
# # write current step to call stack, if history tracking is configured
# history_tracking__ = grammar.history_tracking__
# if history_tracking__:
# grammar.call_stack__.append(
# ((self.repr if self.tag_name in (':RegExp', ':Token', ':DropToken')
# else (self.pname or self.tag_name)), location))
# grammar.moving_forward__ = True
# finally, the actual parser call!
try:
......@@ -465,17 +465,17 @@ class Parser:
# self._add_resume_notice(rest, node)
elif pe.first_throw:
# TODO: Will this option be needed, if history tracking is deferred to module "trace"?
if history_tracking__: grammar.call_stack__.pop()
# if history_tracking__: grammar.call_stack__.pop()
raise ParserError(pe.node, pe.rest, pe.error, first_throw=False)
elif grammar.tree__.errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF:
node = Node(self.tag_name, pe.node).with_pos(location) # try to create tree as faithful as possible
else:
result = (Node(ZOMBIE_TAG, text[:gap]).with_pos(location), pe.node) if gap \
else pe.node # type: ResultType
if history_tracking__: grammar.call_stack__.pop()
# if history_tracking__: grammar.call_stack__.pop()
raise ParserError(Node(self.tag_name, result).with_pos(location),
text, pe.error, first_throw=False)
self._add_resume_notice(rest, node)
# self._add_resume_notice(rest, node)
grammar.most_recent_error__ = pe.error # needed for history tracking
if left_recursion_depth__:
......@@ -515,24 +515,24 @@ class Parser:
# TODO: need a unit-test concerning interference of variable manipulation and left recursion algorithm?
visited[location] = (node, rest)
# Mind that memoized parser calls will not appear in the history record!
# Does this make sense? Or should it be changed?
if history_tracking__:
# don't track returning parsers except in case an error has occurred
if grammar.moving_forward__:
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text))
grammar.history__.append(record)
elif grammar.most_recent_error__:
# error_nid = id(node) # type: int
# if error_nid in grammar.tree__.error_nodes:
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text),
[grammar.most_recent_error__])
grammar.most_recent_error__ = None
grammar.history__.append(record)
grammar.moving_forward__ = False
grammar.call_stack__.pop()
# # Mind that memoized parser calls will not appear in the history record!
# # Does this make sense? Or should it be changed?
# if history_tracking__:
# # don't track returning parsers except in case an error has occurred
# if grammar.moving_forward__:
# record = HistoryRecord(grammar.call_stack__, node, text,
# grammar.line_col__(text))
# grammar.history__.append(record)
# elif grammar.most_recent_error__:
# # error_nid = id(node) # type: int
# # if error_nid in grammar.tree__.error_nodes:
# record = HistoryRecord(grammar.call_stack__, node, text,
# grammar.line_col__(text),
# [grammar.most_recent_error__])
# grammar.most_recent_error__ = None
# grammar.history__.append(record)
# grammar.moving_forward__ = False
# grammar.call_stack__.pop()
except RecursionError:
node = Node(ZOMBIE_TAG, str(text[:min(10, max(1, text.find("\n")))]) + " ...")
......@@ -1163,8 +1163,7 @@ class Grammar:
def __call__(self,
document: str,
start_parser: Union[str, Parser] = "root_parser__",
*, complete_match: bool = True,
track_history: bool = False) -> RootNode:
*, complete_match: bool = True) -> RootNode:
"""
Parses a document with with parser-combinators.
......@@ -1175,10 +1174,6 @@ class Grammar:
(i.e. particular parts of the EBNF-Grammar.)
complete_match (bool): If True, an error is generated, if
`start_parser` did not match the entire document.
track_history (bool): If true, the parsing history will be
recorded in self. history__. If self.history_tracking__ is
True, the parsing history will always be recorded,
even if `False` is passed to the `track_history` parameter.
Returns:
Node: The root node to the parse tree.
"""
......@@ -1219,10 +1214,10 @@ class Grammar:
parser.reset()
else:
self._dirty_flag__ = True
save_history_tracking = self.history_tracking__
self.history_tracking__ = track_history or self.history_tracking__ or self.resume_notices__
# save_history_tracking = self.history_tracking__
# self.history_tracking__ = track_history or self.history_tracking__ or self.resume_notices__
# track history contains and retains the current tracking state
track_history = self.history_tracking__
# track_history = self.history_tracking__
self.document__ = StringView(document)
self.document_length__ = len(self.document__)
self._document_lbreaks__ = linebreaks(document) if self.history_tracking__ else []
......@@ -1256,7 +1251,7 @@ class Grammar:
fwd = rest.find("\n") + 1 or len(rest)
skip, rest = rest[:fwd], rest[fwd:]
if result is None:
err_info = '' if not track_history else \
err_info = '' if not self.history_tracking__ else \
'\n Most advanced: %s\n Last match: %s;' % \
(str(HistoryRecord.most_advanced_match(self.history__)),
str(HistoryRecord.last_match(self.history__)))
......@@ -1293,20 +1288,20 @@ class Grammar:
error_code = Error.PARSER_STOPPED_BEFORE_END
stitches.append(Node(ZOMBIE_TAG, skip).with_pos(tail_pos(stitches)))
self.tree__.new_error(stitches[-1], error_msg, error_code)
if self.history_tracking__:
# # some parsers may have matched and left history records with nodes != None.
# # Because these are not connected to the stitched root node, their pos-
# # properties will not be initialized by setting the root node's pos property
# # to zero. Therefore, their pos properties need to be initialized here
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.with_pos(0)
# print(self.call_stack__)
# record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest,
# self.line_col__(rest))
# self.history__.append(record)
# stop history tracking when parser returned too early
self.history_tracking__ = False
# if self.history_tracking__:
# # # some parsers may have matched and left history records with nodes != None.
# # # Because these are not connected to the stitched root node, their pos-
# # # properties will not be initialized by setting the root node's pos property
# # # to zero. Therefore, their pos properties need to be initialized here
# # for record in self.history__:
# # if record.node and record.node._pos < 0:
# # record.node.with_pos(0)
# # print(self.call_stack__)
# # record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest,
# # self.line_col__(rest))
# # self.history__.append(record)
# # stop history tracking when parser returned too early
# self.history_tracking__ = False
else:
rest = '' # if complete_match is False, ignore the rest and leave while loop
if stitches:
......@@ -1330,7 +1325,7 @@ class Grammar:
if result:
self.tree__.swallow(result)
self.start_parser__ = None
self.history_tracking__ = save_history_tracking
# self.history_tracking__ = save_history_tracking
return self.tree__
......
......@@ -22,14 +22,9 @@ all or some particular parsers of a grammar and trace the actions
of these parsers, making use of the `call_stack__`, `history__`
and `moving_forward__`, `most_recent_error__`-hooks in the
Grammar-object.
This allows for more flexible and at the same time more focused
tracing of the parsing process than the (older) parsing-history-
tracking-mechanism in the `parse` module, which will eventually
be superceded by tracing.
"""
from typing import Tuple, Optional, List, Collection, Union
from typing import Tuple, Optional, List, Iterable, Union
from DHParser.error import Error
from DHParser.stringview import StringView
......@@ -120,7 +115,7 @@ def with_unnamed_descendants(root: Parser) -> List[Parser]:
return descendants
def set_tracer(parsers: Union[Grammar, Parser, Collection[Parser]], tracer: Optional[ParseFunc]):
def set_tracer(parsers: Union[Grammar, Parser, Iterable[Parser]], tracer: Optional[ParseFunc]):
if isinstance(parsers, Grammar):
if tracer is None:
parsers.history_tracking__ = False
......@@ -129,8 +124,10 @@ def set_tracer(parsers: Union[Grammar, Parser, Collection[Parser]], tracer: Opti
elif isinstance(parsers, Parser):
parsers = [parsers]
if parsers:
assert all(parsers[0].grammar == parser.grammar for parser in parsers)
parsers[0].grammar.history_tracking__ = True
pivot = next(parsers.__iter__())
assert all(pivot.grammar == parser.grammar for parser in parsers)
if tracer is not None:
pivot.grammar.history_tracking__ = True
for parser in parsers:
if parser.ptype != ':Forward':
parser.set_proxy(tracer)
......@@ -139,4 +136,4 @@ def set_tracer(parsers: Union[Grammar, Parser, Collection[Parser]], tracer: Opti
def resume_notices_on(grammar: Grammar):
grammar.history_tracking__ = True
grammar.resume_notices__ = True
set_tracer(grammar, trace_history)
\ No newline at end of file
set_tracer(grammar, trace_history)
......@@ -38,6 +38,7 @@ from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compi
from DHParser.dsl import grammar_provider
from DHParser.syntaxtree import Node, parse_sxpr
from DHParser.stringview import StringView
from DHParser.trace import set_tracer, trace_history, resume_notices_on
......@@ -62,9 +63,10 @@ class TestParserError:
letters = /[A-Za-z]+/
"""
gr = grammar_provider(lang)()
st = gr('hard-time', track_history=True)
set_tracer(gr, trace_history)
st = gr('hard-time')
assert not st.errors
st = gr('hard-', track_history=True)
st = gr('hard-')
assert st.errors and not any(e.code == 1045 for e in st.errors)
......@@ -526,7 +528,7 @@ class TestErrorRecovery:
parser = grammar_provider(lang)()
st = parser('AB_D')
assert len(st.errors) == 1 # no additional "stopped before end"-error!
parser.resume_notices__ = True
resume_notices_on(parser)
st = parser('AB_D')
assert len(st.errors) == 2 and any(err.code == Error.RESUME_NOTICE for err in st.errors)
......@@ -843,8 +845,9 @@ class TestReentryAfterError:
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only one error message
assert len(cst.errors_sorted) == 1
gr.resume_notices__ = True
cst = gr(content, track_history=True)
resume_notices_on(gr)
cst = gr(content)
# print(cst.errors)
assert len(cst.errors) == 2 and any(err.code == Error.RESUME_NOTICE for err in cst.errors)
def test_several_resume_rules_innermost_rule_matching(self):
......
......@@ -33,6 +33,8 @@ from DHParser.dsl import grammar_provider
from DHParser.error import Error
from DHParser.testing import get_report, grammar_unit, unit_from_file, \
clean_report
from DHParser.trace import set_tracer, trace_history
CFG_FILE_1 = '''
# a comment
......@@ -310,13 +312,14 @@ class TestLookahead:
def test_unit_lookahead(self):
gr = self.grammar_fac()
set_tracer(gr, trace_history)
# Case 1: Lookahead string is part of the test case; parser fails but for the lookahead
result = gr(self.cases['category']['match'][1], 'category', track_history=True)
result = gr(self.cases['category']['match'][1], 'category')
assert any(e.code in (Error.PARSER_LOOKAHEAD_FAILURE_ONLY,
Error.PARSER_LOOKAHEAD_MATCH_ONLY)
for e in result.errors), str(result.errors)
# Case 2: Lookahead string is not part of the test case; parser matches but for the mandatory continuation
result = gr(self.cases['category']['match'][2], 'category', track_history=True)
result = gr(self.cases['category']['match'][2], 'category')
assert any(e.code == Error.MANDATORY_CONTINUATION_AT_EOF for e in result.errors)
errata = grammar_unit(self.cases, self.grammar_fac, self.trans_fac,
'REPORT_TestLookahead')
......
......@@ -20,13 +20,15 @@ limitations under the License.
"""
import os
import re
import sys
scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser import grammar_provider, with_all_descendants, with_unnamed_descendants, \
set_tracer, trace_history, log_parsing_history, start_logging, set_config_value
set_tracer, trace_history, log_parsing_history, start_logging, set_config_value, \
resume_notices_on
class TestTrace:
......@@ -67,6 +69,37 @@ class TestTrace:
log_parsing_history(gr, 'trace_drop')
print(st.serialize())
def test_trace_resume(self):
lang = """
document = alpha [beta] gamma "."
alpha = "ALPHA" abc
abc = §"a" "b" "c"
beta = "BETA" (bac | bca)
bac = "b" "a" §"c"
bca = "b" "c" §"a"
gamma = "GAMMA" §(cab | cba)
cab = "c" "a" §"b"
cba = "c" "b" §"a"
"""
gr = grammar_provider(lang)()
gr.resume_rules = dict()
gr.resume_rules__['alpha'] = [re.compile(r'(?=BETA)')]
content = 'ALPHA acb BETA bac GAMMA cab .'
cst = gr(content)
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only one error message
assert len(cst.errors_sorted) == 1
# test resume notice
resume_notices_on(gr)
cst = gr(content)
# there should be one error message and one resume notice
assert len(cst.errors_sorted) == 2
set_tracer(gr, None)
assert not gr.history_tracking__
if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment