2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 0a6602bf authored by di68kap's avatar di68kap
Browse files

- parsers.py: memoizing simplified

parent a471a1e0
...@@ -231,19 +231,17 @@ def add_parser_guard(parser_func): ...@@ -231,19 +231,17 @@ def add_parser_guard(parser_func):
(aka "history tracking") of parser calls. Returns the wrapped call. (aka "history tracking") of parser calls. Returns the wrapped call.
""" """
def guarded_call(parser: 'Parser', text: StringView) -> Tuple[Node, StringView]: def guarded_call(parser: 'Parser', text: StringView) -> Tuple[Node, StringView]:
assert isinstance(text, StringView) # def memoized(parser, location):
# node = parser.visited[location]
def memoized(parser, location): # rlen = location - (0 if node is None else node.len)
node = parser.visited[location] # rest = grammar.document__[-rlen:] if rlen else EMPTY_STRING_VIEW
rlen = location - (0 if node is None else node.len) # return node, rest
rest = grammar.document__[-rlen:] if rlen else EMPTY_STRING_VIEW # # NOTE: An older and simpler implementation of memoization
return node, rest # # relied on `parser.visited[location] == node, rest`. Although,
# NOTE: An older and simpler implementation of memoization # # rest is really just a substring of one and the same document,
# relied on `parser.visited[location] == node, rest`. Although, # # this resulted in an explosion of memory usage. Seems that
# rest is really just a substring of one and the same document, # # `rext = text[i:]` really copies the sub-string. See:
# this resulted in an explosion of memory usage. Seems that # # https://mail.python.org/pipermail/python-dev/2008-May/079699.html
# `rext = text[i:]` really copies the sub-string. See:
# https://mail.python.org/pipermail/python-dev/2008-May/079699.html
try: try:
location = len(text) # mind that location is always the distance to the end location = len(text) # mind that location is always the distance to the end
...@@ -255,7 +253,7 @@ def add_parser_guard(parser_func): ...@@ -255,7 +253,7 @@ def add_parser_guard(parser_func):
# if location has already been visited by the current parser, # if location has already been visited by the current parser,
# return saved result # return saved result
if location in parser.visited: if location in parser.visited:
return memoized(parser, location) return parser.visited[location]
# break left recursion at the maximum allowed depth # break left recursion at the maximum allowed depth
if parser.recursion_counter.setdefault(location, 0) > LEFT_RECURSION_DEPTH: if parser.recursion_counter.setdefault(location, 0) > LEFT_RECURSION_DEPTH:
...@@ -270,25 +268,24 @@ def add_parser_guard(parser_func): ...@@ -270,25 +268,24 @@ def add_parser_guard(parser_func):
# run original __call__ method # run original __call__ method
node, rest = parser_func(parser, text) node, rest = parser_func(parser, text)
assert isinstance(rest, StringView)
if node is None: if node is None:
# retrieve an earlier match result (from left recursion) if it exists # retrieve an earlier match result (from left recursion) if it exists
if location in grammar.recursion_locations__: if location in grammar.recursion_locations__:
if location in parser.visited: if location in parser.visited:
node, rest = memoized(parser, location) node, rest = parser.visited[location]
# don't overwrite any positive match (i.e. node not None) in the cache # don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls! # and don't add empty entries for parsers returning from left recursive calls!
elif grammar.memoization__: elif grammar.memoization__:
# otherwise also cache None-results # otherwise also cache None-results
parser.visited[location] = None parser.visited[location] = (None, rest)
elif ((grammar.memoization__ or location in grammar.recursion_locations__) elif ((grammar.memoization__ or location in grammar.recursion_locations__)
and grammar.last_rb__loc__ > location): and grammar.last_rb__loc__ > location):
# - variable manipulating parsers will not be entered into the cache, # - variable manipulating parsers will not be entered into the cache,
# because caching would interfere with changes of variable state # because caching would interfere with changes of variable state
# - in case of left recursion, the first recursive step that # - in case of left recursion, the first recursive step that
# matches will store its result in the cache # matches will store its result in the cache
parser.visited[location] = node parser.visited[location] = (node, rest)
parser.recursion_counter[location] -= 1 parser.recursion_counter[location] -= 1
...@@ -369,7 +366,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass): ...@@ -369,7 +366,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
Attributes: Attributes:
visited: Mapping of places this parser has already been to visited: Mapping of places this parser has already been to
during the current parsing process onto the node the during the current parsing process onto the results the
parser returned at the respective place. This dictionary parser returned at the respective place. This dictionary
is used to implement memoizing. is used to implement memoizing.
...@@ -408,7 +405,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass): ...@@ -408,7 +405,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
"""Initializes or resets any parser variables. If overwritten, """Initializes or resets any parser variables. If overwritten,
the `reset()`-method of the parent class must be called from the the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class.""" `reset()`-method of the derived class."""
self.visited = dict() # type: Dict[int, Node] self.visited = dict() # type: Dict[int, Tuple[Node, StringView]]
self.recursion_counter = dict() # type: Dict[int, int] self.recursion_counter = dict() # type: Dict[int, int]
self.cycle_detection = set() # type: Set[Callable] self.cycle_detection = set() # type: Set[Callable]
return self return self
......
...@@ -191,9 +191,9 @@ class StringView: ...@@ -191,9 +191,9 @@ class StringView:
return self.text[self.begin:self.end] return self.text[self.begin:self.end]
def __getitem__(self, index): def __getitem__(self, index):
assert isinstance(index, slice), "As of now, StringView only allows slicing." # assert isinstance(index, slice), "As of now, StringView only allows slicing."
assert index.step is None or index.step == 1, \ # assert index.step is None or index.step == 1, \
"Step sizes other than 1 are not yet supported by StringView" # "Step sizes other than 1 are not yet supported by StringView"
start, stop = StringView.real_indices(index.start, index.stop, self.len) start, stop = StringView.real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop) return StringView(self.text, self.begin + start, self.begin + stop)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment