Commit 781802c9 authored by eckhart's avatar eckhart

parse.py: use memoize as flag

parent 1215f1f6
...@@ -224,16 +224,6 @@ CONFIG_PRESET['history_tracking'] = False ...@@ -224,16 +224,6 @@ CONFIG_PRESET['history_tracking'] = False
# Default value: False # Default value: False
CONFIG_PRESET['resume_notices'] = False CONFIG_PRESET['resume_notices'] = False
# Turns on memoization. This means that whenever a parser returns the
# result it yielded at the location at which it was called will be
# stored in the parser's "visited"-cache, so that in case the same
# parser is called again at the same location, the result can be
# retrieved from the cache. This kind of caching is the central
# design feature of pack-rat parsers, which ensures their finishing
# in linear time (https://pdos.csail.mit.edu/~baford/packrat/thesis/)
# Default value: True
CONFIG_PRESET['memoization'] = True
# Turns on the left-recursion-handling algorithm. This allows the use # Turns on the left-recursion-handling algorithm. This allows the use
# of left-recursion in grammars, which otherwise would run a recursive # of left-recursion in grammars, which otherwise would run a recursive
# descent parser into an infinite-loop. # descent parser into an infinite-loop.
......
...@@ -450,6 +450,8 @@ class Parser: ...@@ -450,6 +450,8 @@ class Parser:
recursion_state = grammar.returning_from_recursion__ recursion_state = grammar.returning_from_recursion__
grammar.returning_from_recursion__ = False grammar.returning_from_recursion__ = False
memoization_state = grammar.memoization__
grammar.memoization__ = True
# now, the actual parser call! # now, the actual parser call!
try: try:
...@@ -504,16 +506,14 @@ class Parser: ...@@ -504,16 +506,14 @@ class Parser:
if node is not None: if node is not None:
node._pos = location node._pos = location
if (grammar.memoization__
and not grammar.returning_from_recursion__
# Variable-manipulating parsers will not be entered into the cache,
# because caching would interfere with changes of variable state.
# See `_rollback_location()` for the added compensation term.
and location > grammar.last_rb__loc__ + int(text._len == rest._len)):
visited[location] = (node, rest)
if not grammar.returning_from_recursion__: if not grammar.returning_from_recursion__:
# grammar.memoization__ = location > (grammar.last_rb__loc__
# + int(text._len == rest._len))
if grammar.memoization__:
visited[location] = (node, rest)
grammar.returning_from_recursion__ = recursion_state grammar.returning_from_recursion__ = recursion_state
if grammar.memoization__:
grammar.memoization__ = memoization_state
except RecursionError: except RecursionError:
node = Node(ZOMBIE_TAG, str(text[:min(10, max(1, text.find("\n")))]) + " ...") node = Node(ZOMBIE_TAG, str(text[:min(10, max(1, text.find("\n")))]) + " ...")
...@@ -990,10 +990,10 @@ class Grammar: ...@@ -990,10 +990,10 @@ class Grammar:
(resulting in a maximum recursion depth reached error) when (resulting in a maximum recursion depth reached error) when
the grammar definition contains left recursions. the grammar definition contains left recursions.
memoization__: Turns full memoization on or off. Turning memoization off memoization__: A flag that signals that return values shall not be
results in less memory usage and sometimes reduced parsing time. momoized. This flag is set to `True` when the parser is going
In some situations it may drastically increase parsing time, so forward in the call sequence, but may be set to `False` by
it is safer to leave it on. (Default: on) context-sensitive parsers that are incompatible with memoization.
# mirrored class attributes: # mirrored class attributes:
...@@ -1152,7 +1152,6 @@ class Grammar: ...@@ -1152,7 +1152,6 @@ class Grammar:
or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH)) or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH))
self.start_parser__ = None # type: Optional[Parser] self.start_parser__ = None # type: Optional[Parser]
self._dirty_flag__ = False # type: bool self._dirty_flag__ = False # type: bool
self.memoization__ = get_config_value('memoization') # type: bool
self.left_recursion__ = get_config_value('left_recursion') # type: bool self.left_recursion__ = get_config_value('left_recursion') # type: bool
self.history_tracking__ = get_config_value('history_tracking') # type: bool self.history_tracking__ = get_config_value('history_tracking') # type: bool
self.resume_notices__ = get_config_value('resume_notices') # type: bool self.resume_notices__ = get_config_value('resume_notices') # type: bool
...@@ -1233,6 +1232,7 @@ class Grammar: ...@@ -1233,6 +1232,7 @@ class Grammar:
self.variables__ = defaultdict(lambda: []) # type: DefaultDict[str, List[str]] self.variables__ = defaultdict(lambda: []) # type: DefaultDict[str, List[str]]
self.rollback__ = [] # type: List[Tuple[int, Callable]] self.rollback__ = [] # type: List[Tuple[int, Callable]]
self.last_rb__loc__ = -2 # type: int self.last_rb__loc__ = -2 # type: int
self.memoization__ = True # type: bool
# support for call stack tracing # support for call stack tracing
self.call_stack__ = [] # type: List[CallItem] # tag_name, location self.call_stack__ = [] # type: List[CallItem] # tag_name, location
# snapshots of call stacks # snapshots of call stacks
...@@ -1441,6 +1441,7 @@ class Grammar: ...@@ -1441,6 +1441,7 @@ class Grammar:
""" """
self.rollback__.append((location, func)) self.rollback__.append((location, func))
self.last_rb__loc__ = location self.last_rb__loc__ = location
self.memoization__ = False
@property @property
...@@ -2930,12 +2931,6 @@ class ContextSensitive(UnaryParser): ...@@ -2930,12 +2931,6 @@ class ContextSensitive(UnaryParser):
starts to move forward again. Only those variable changes should be starts to move forward again. Only those variable changes should be
rolled back the locations of which have been passed when backtracking. rolled back the locations of which have been passed when backtracking.
The rollback location is furthermore used to block memoizing. Since
the result returned by a variable changing parser (or a parser
that directly or indirectly calls a variable changing parser), should
never be memoized, memoizing is only triggered, when the location of
a returning parser is greater than the last rollback location.
Usually, the rollback location is exactly the location, where the parser Usually, the rollback location is exactly the location, where the parser
started parsing. However, the rollback-location must lie before the started parsing. However, the rollback-location must lie before the
location where the parser stopped, because otherwise variable changes location where the parser stopped, because otherwise variable changes
...@@ -2943,12 +2938,7 @@ class ContextSensitive(UnaryParser): ...@@ -2943,12 +2938,7 @@ class ContextSensitive(UnaryParser):
zero length data. In order to avoid this, the rollback location is zero length data. In order to avoid this, the rollback location is
artificially reduced by one in case the parser did not capture any text artificially reduced by one in case the parser did not capture any text
(either of the two equivalent criteria len(text) == len(rest) or (either of the two equivalent criteria len(text) == len(rest) or
len(node) == 0) identifies this case). As this in turn could lead len(node) == 0) identifies this case).
to the return values of variable changing parsers being memoized, because
memoizing is triggered if the location of a returning parser is greater
than the last rollback location, this must be compensated again in
`Parser.__call__()` (and, likewise, `Forward.__call__()`) before
memoizing is triggered.
""" """
L = text._len L = text._len
rb_loc = self.grammar.document_length__ - L rb_loc = self.grammar.document_length__ - L
...@@ -3320,6 +3310,7 @@ class Forward(UnaryParser): ...@@ -3320,6 +3310,7 @@ class Forward(UnaryParser):
else: else:
recursion_state = grammar.returning_from_recursion__ recursion_state = grammar.returning_from_recursion__
self.recursion_counter[location] = 0 # fail on the first recursion self.recursion_counter[location] = 0 # fail on the first recursion
grammar.memoization__ = True
result = self.parser(text) result = self.parser(text)
if result[0] is not None: if result[0] is not None:
# keep calling the (potentially left-)recursive parser and increase # keep calling the (potentially left-)recursive parser and increase
...@@ -3330,6 +3321,7 @@ class Forward(UnaryParser): ...@@ -3330,6 +3321,7 @@ class Forward(UnaryParser):
self.recursion_counter[location] = depth self.recursion_counter[location] = depth
grammar.returning_from_recursion__ = False grammar.returning_from_recursion__ = False
rb_stack_size = len(grammar.rollback__) rb_stack_size = len(grammar.rollback__)
grammar.memoization__ = True
next_result = self.parser(text) next_result = self.parser(text)
# discard next_result if it is not the longest match and return # discard next_result if it is not the longest match and return
if len(next_result[1]) >= len(result[1]): # also true, if no match if len(next_result[1]) >= len(result[1]): # also true, if no match
...@@ -3354,8 +3346,9 @@ class Forward(UnaryParser): ...@@ -3354,8 +3346,9 @@ class Forward(UnaryParser):
break break
result = next_result result = next_result
depth += 1 depth += 1
if (grammar.memoization__ # see `_rollback_location()` for added compensation term grammar.memoization__ = location > (grammar.last_rb__loc__
and location > grammar.last_rb__loc__ + int(text._len == result[1]._len)): + int(text._len == result[1]._len))
if grammar.memoization__:
visited[location] = result visited[location] = result
grammar.returning_from_recursion__ = recursion_state grammar.returning_from_recursion__ = recursion_state
return result return result
......
...@@ -988,7 +988,7 @@ VARIABLE ::= /[A-Za-z]/, ~; ...@@ -988,7 +988,7 @@ VARIABLE ::= /[A-Za-z]/, ~;
class TestAlternativeEBNFSyntax: class TestAlternativeEBNFSyntax:
def test_alt_syntax(self): def test_alt_syntax(self):
code, errors, ast = compile_ebnf(ArithmeticEBNF, preserve_AST=True) code, errors, ast = compile_ebnf(ArithmeticEBNF, preserve_AST=True)
assert not ast.error_flag assert not ast.error_flag, str(ast.errors)
arithmetic_grammer = compile_python_object( arithmetic_grammer = compile_python_object(
DHPARSER_IMPORTS.format(dhparser_parentdir=DHPARSER_PARENTDIR) + code) DHPARSER_IMPORTS.format(dhparser_parentdir=DHPARSER_PARENTDIR) + code)
arithmetic_parser = arithmetic_grammer() arithmetic_parser = arithmetic_grammer()
......
...@@ -892,7 +892,7 @@ class TestPopRetrieve: ...@@ -892,7 +892,7 @@ class TestPopRetrieve:
st = gr(case) st = gr(case)
# log_parsing_history(gr, 'test_cache_neutrality_3') # log_parsing_history(gr, 'test_cache_neutrality_3')
# print(st.as_sxpr()) # print(st.as_sxpr())
assert not st.errors assert not st.errors, str(errors)
case = 'AXXX!' case = 'AXXX!'
st = gr(case) st = gr(case)
assert not st.errors assert not st.errors
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment