Commit b75d6237 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- parser.py: memoization streamlined and optimized

parent bcd332a8
...@@ -230,6 +230,12 @@ def add_parser_guard(parser_func): ...@@ -230,6 +230,12 @@ def add_parser_guard(parser_func):
(aka "history tracking") of parser calls. Returns the wrapped call. (aka "history tracking") of parser calls. Returns the wrapped call.
""" """
def guarded_call(parser: 'Parser', text: str) -> Tuple[Node, str]: def guarded_call(parser: 'Parser', text: str) -> Tuple[Node, str]:
def memoized(parser, location):
node = parser.visited[location]
rlen = location - (0 if node is None else node.len)
rest = grammar.document__[-rlen:] if rlen else ''
return node, rest
try: try:
location = len(text) # mind that location is always the distance to the end location = len(text) # mind that location is always the distance to the end
grammar = parser.grammar # grammar may be 'None' for unconnected parsers! grammar = parser.grammar # grammar may be 'None' for unconnected parsers!
...@@ -240,10 +246,7 @@ def add_parser_guard(parser_func): ...@@ -240,10 +246,7 @@ def add_parser_guard(parser_func):
# if location has already been visited by the current parser, # if location has already been visited by the current parser,
# return saved result # return saved result
if location in parser.visited: if location in parser.visited:
node = parser.visited[location] return memoized(parser, location)
rlen = location - (0 if node is None else node.len)
rest = grammar.document__[-rlen:] if rlen else ''
return node, rest
# break left recursion at the maximum allowed depth # break left recursion at the maximum allowed depth
if parser.recursion_counter.setdefault(location, 0) > LEFT_RECURSION_DEPTH: if parser.recursion_counter.setdefault(location, 0) > LEFT_RECURSION_DEPTH:
...@@ -261,24 +264,21 @@ def add_parser_guard(parser_func): ...@@ -261,24 +264,21 @@ def add_parser_guard(parser_func):
if node is None: if node is None:
# retrieve an earlier match result (from left recursion) if it exists # retrieve an earlier match result (from left recursion) if it exists
node = parser.visited.get(location, None) if location in grammar.recursion_locations__:
rlen = location - (0 if node is None else node.len) if location in parser.visited:
rest = grammar.document__[-rlen:] if rlen else '' node, rest = memoized(parser, location)
# don't overwrite any positive match (i.e. node not None) in the cache # don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls! # and don't add empty entries for parsers returning from left recursive calls!
# COMMENT THIS TO TURN FULL MEMOIZATION OFF elif grammar.memoization__:
if node is None and location not in grammar.recursion_locations__:
# otherwise also cache None-results # otherwise also cache None-results
parser.visited[location] = None parser.visited[location] = None
else: elif ((grammar.memoization__ or location in grammar.recursion_locations__)
# variable manipulating parsers will be excluded, though, and grammar.last_rb__loc__ > location):
# because caching would interfere with changes of variable state # - variable manipulating parsers will not be entered into the cache,
if grammar.last_rb__loc__ > location: # because caching would interfere with changes of variable state
# in case of left recursion, the first recursive step that # - in case of left recursion, the first recursive step that
# matches will store its result in the cache # matches will store its result in the cache
# UNCOMMENT THIS TO TURN FULL MEMOIZATION OFF parser.visited[location] = node
# if location in grammar.recursion_locations__:
parser.visited[location] = node
parser.recursion_counter[location] -= 1 parser.recursion_counter[location] -= 1
...@@ -616,6 +616,10 @@ class Grammar: ...@@ -616,6 +616,10 @@ class Grammar:
recursion detection algorithm, but, strictly speaking, superfluous recursion detection algorithm, but, strictly speaking, superfluous
if full memoization is enabled. (See `add_parser_guard` and its if full memoization is enabled. (See `add_parser_guard` and its
local function `guarded_call`) local function `guarded_call`)
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
In some situations it may drastically increase parsing time, so
it is safer to leave it on.
""" """
root__ = None # type: Union[Parser, None] root__ = None # type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass # root__ must be overwritten with the root-parser by grammar subclass
...@@ -672,6 +676,7 @@ class Grammar: ...@@ -672,6 +676,7 @@ class Grammar:
self.all_parsers__ = set() # type: Set[Parser] self.all_parsers__ = set() # type: Set[Parser]
self._dirty_flag__ = False # type: bool self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool
self._reset__() self._reset__()
# prepare parsers in the class, first # prepare parsers in the class, first
......
...@@ -208,11 +208,11 @@ def create_project(path, ...@@ -208,11 +208,11 @@ def create_project(path,
print('ready.') print('ready.')
def profile(func): def cpu_profile(func, repetitions=1):
import cProfile, pstats import cProfile, pstats
pr = cProfile.Profile() pr = cProfile.Profile()
pr.enable() pr.enable()
for i in range(1): for i in range(repetitions):
success = func() success = func()
if not success: if not success:
break break
...@@ -220,11 +220,11 @@ def profile(func): ...@@ -220,11 +220,11 @@ def profile(func):
# after your program ends # after your program ends
st = pstats.Stats(pr) st = pstats.Stats(pr)
st.strip_dirs() st.strip_dirs()
st.sort_stats('time').print_stats(10) st.sort_stats('time').print_stats(40)
return success return success
def mem_profile(func): def mem_profile(func, dummy=0):
import tracemalloc import tracemalloc
tracemalloc.start() tracemalloc.start()
success = func() success = func()
...@@ -250,6 +250,6 @@ if __name__ == "__main__": ...@@ -250,6 +250,6 @@ if __name__ == "__main__":
# run self test # run self test
# selftest('EBNF/EBNF.ebnf') # selftest('EBNF/EBNF.ebnf')
with logging(False): with logging(False):
if not mem_profile(selftest): if not cpu_profile(selftest, 1):
sys.exit(1) sys.exit(1)
...@@ -73,7 +73,7 @@ def cpu_profile(func): ...@@ -73,7 +73,7 @@ def cpu_profile(func):
pr.disable() pr.disable()
st = pstats.Stats(pr) st = pstats.Stats(pr)
st.strip_dirs() st.strip_dirs()
st.sort_stats('time').print_stats(20) st.sort_stats('time').print_stats(40)
def mem_profile(func): def mem_profile(func):
...@@ -87,7 +87,7 @@ def mem_profile(func): ...@@ -87,7 +87,7 @@ def mem_profile(func):
print(stat) print(stat)
if __name__ == "__main__": if __name__ == "__main__":
mem_profile(test) cpu_profile(test)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment