From 781802c9940e0accf41336e8de9fa3b1d94b4bd7 Mon Sep 17 00:00:00 2001 From: eckhart Date: Sat, 13 Jun 2020 00:39:23 +0200 Subject: [PATCH] parse.py: use memoize as flag --- DHParser/configuration.py | 10 --------- DHParser/parse.py | 47 +++++++++++++++++---------------------- tests/test_ebnf.py | 2 +- tests/test_parse.py | 2 +- 4 files changed, 22 insertions(+), 39 deletions(-) diff --git a/DHParser/configuration.py b/DHParser/configuration.py index fcd1711..b5aad39 100644 --- a/DHParser/configuration.py +++ b/DHParser/configuration.py @@ -224,16 +224,6 @@ CONFIG_PRESET['history_tracking'] = False # Default value: False CONFIG_PRESET['resume_notices'] = False -# Turns on memoization. This means that whenever a parser returns the -# result it yielded at the location at which it was called will be -# stored in the parser's "visited"-cache, so that in case the same -# parser is called again at the same location, the result can be -# retrieved from the cache. This kind of caching is the central -# design feature of pack-rat parsers, which ensures their finishing -# in linear time (https://pdos.csail.mit.edu/~baford/packrat/thesis/) -# Default value: True -CONFIG_PRESET['memoization'] = True - # Turns on the left-recursion-handling algorithm. This allows the use # of left-recursion in grammars, which otherwise would run a recursive # descent parser into an infinite-loop. diff --git a/DHParser/parse.py b/DHParser/parse.py index 28f0e60..d1c8375 100644 --- a/DHParser/parse.py +++ b/DHParser/parse.py @@ -450,6 +450,8 @@ class Parser: recursion_state = grammar.returning_from_recursion__ grammar.returning_from_recursion__ = False + memoization_state = grammar.memoization__ + grammar.memoization__ = True # now, the actual parser call! try: @@ -504,16 +506,14 @@ class Parser: if node is not None: node._pos = location - if (grammar.memoization__ - and not grammar.returning_from_recursion__ - # Variable-manipulating parsers will not be entered into the cache, - # because caching would interfere with changes of variable state. - # See `_rollback_location()` for the added compensation term. - and location > grammar.last_rb__loc__ + int(text._len == rest._len)): - visited[location] = (node, rest) - if not grammar.returning_from_recursion__: + # grammar.memoization__ = location > (grammar.last_rb__loc__ + # + int(text._len == rest._len)) + if grammar.memoization__: + visited[location] = (node, rest) grammar.returning_from_recursion__ = recursion_state + if grammar.memoization__: + grammar.memoization__ = memoization_state except RecursionError: node = Node(ZOMBIE_TAG, str(text[:min(10, max(1, text.find("\n")))]) + " ...") @@ -990,10 +990,10 @@ class Grammar: (resulting in a maximum recursion depth reached error) when the grammar definition contains left recursions. - memoization__: Turns full memoization on or off. Turning memoization off - results in less memory usage and sometimes reduced parsing time. - In some situations it may drastically increase parsing time, so - it is safer to leave it on. (Default: on) + memoization__: A flag that signals that return values shall not be + momoized. This flag is set to `True` when the parser is going + forward in the call sequence, but may be set to `False` by + context-sensitive parsers that are incompatible with memoization. # mirrored class attributes: @@ -1152,7 +1152,6 @@ class Grammar: or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH)) self.start_parser__ = None # type: Optional[Parser] self._dirty_flag__ = False # type: bool - self.memoization__ = get_config_value('memoization') # type: bool self.left_recursion__ = get_config_value('left_recursion') # type: bool self.history_tracking__ = get_config_value('history_tracking') # type: bool self.resume_notices__ = get_config_value('resume_notices') # type: bool @@ -1233,6 +1232,7 @@ class Grammar: self.variables__ = defaultdict(lambda: []) # type: DefaultDict[str, List[str]] self.rollback__ = [] # type: List[Tuple[int, Callable]] self.last_rb__loc__ = -2 # type: int + self.memoization__ = True # type: bool # support for call stack tracing self.call_stack__ = [] # type: List[CallItem] # tag_name, location # snapshots of call stacks @@ -1441,6 +1441,7 @@ class Grammar: """ self.rollback__.append((location, func)) self.last_rb__loc__ = location + self.memoization__ = False @property @@ -2930,12 +2931,6 @@ class ContextSensitive(UnaryParser): starts to move forward again. Only those variable changes should be rolled back the locations of which have been passed when backtracking. - The rollback location is furthermore used to block memoizing. Since - the result returned by a variable changing parser (or a parser - that directly or indirectly calls a variable changing parser), should - never be memoized, memoizing is only triggered, when the location of - a returning parser is greater than the last rollback location. - Usually, the rollback location is exactly the location, where the parser started parsing. However, the rollback-location must lie before the location where the parser stopped, because otherwise variable changes @@ -2943,12 +2938,7 @@ class ContextSensitive(UnaryParser): zero length data. In order to avoid this, the rollback location is artificially reduced by one in case the parser did not capture any text (either of the two equivalent criteria len(text) == len(rest) or - len(node) == 0) identifies this case). As this in turn could lead - to the return values of variable changing parsers being memoized, because - memoizing is triggered if the location of a returning parser is greater - than the last rollback location, this must be compensated again in - `Parser.__call__()` (and, likewise, `Forward.__call__()`) before - memoizing is triggered. + len(node) == 0) identifies this case). """ L = text._len rb_loc = self.grammar.document_length__ - L @@ -3320,6 +3310,7 @@ class Forward(UnaryParser): else: recursion_state = grammar.returning_from_recursion__ self.recursion_counter[location] = 0 # fail on the first recursion + grammar.memoization__ = True result = self.parser(text) if result[0] is not None: # keep calling the (potentially left-)recursive parser and increase @@ -3330,6 +3321,7 @@ class Forward(UnaryParser): self.recursion_counter[location] = depth grammar.returning_from_recursion__ = False rb_stack_size = len(grammar.rollback__) + grammar.memoization__ = True next_result = self.parser(text) # discard next_result if it is not the longest match and return if len(next_result[1]) >= len(result[1]): # also true, if no match @@ -3354,8 +3346,9 @@ class Forward(UnaryParser): break result = next_result depth += 1 - if (grammar.memoization__ # see `_rollback_location()` for added compensation term - and location > grammar.last_rb__loc__ + int(text._len == result[1]._len)): + grammar.memoization__ = location > (grammar.last_rb__loc__ + + int(text._len == result[1]._len)) + if grammar.memoization__: visited[location] = result grammar.returning_from_recursion__ = recursion_state return result diff --git a/tests/test_ebnf.py b/tests/test_ebnf.py index 9ad2fc2..36c4c42 100644 --- a/tests/test_ebnf.py +++ b/tests/test_ebnf.py @@ -988,7 +988,7 @@ VARIABLE ::= /[A-Za-z]/, ~; class TestAlternativeEBNFSyntax: def test_alt_syntax(self): code, errors, ast = compile_ebnf(ArithmeticEBNF, preserve_AST=True) - assert not ast.error_flag + assert not ast.error_flag, str(ast.errors) arithmetic_grammer = compile_python_object( DHPARSER_IMPORTS.format(dhparser_parentdir=DHPARSER_PARENTDIR) + code) arithmetic_parser = arithmetic_grammer() diff --git a/tests/test_parse.py b/tests/test_parse.py index 229e2b7..6ef2f04 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -892,7 +892,7 @@ class TestPopRetrieve: st = gr(case) # log_parsing_history(gr, 'test_cache_neutrality_3') # print(st.as_sxpr()) - assert not st.errors + assert not st.errors, str(errors) case = 'AXXX!' st = gr(case) assert not st.errors -- GitLab