10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f2162cfb authored by eckhart's avatar eckhart

- early position handling finished

parent cc0f248d
...@@ -868,11 +868,10 @@ class Grammar: ...@@ -868,11 +868,10 @@ class Grammar:
Returns: Returns:
Node: The root node ot the parse tree. Node: The root node ot the parse tree.
""" """
def add_pos(node: Node, predecessors: List[Node]) -> int: def tail_pos(predecessors: List[Node]) -> int:
"""Adds the position after the last node in the list of """Adds the position after the last node in the list of
predecessors to the node.""" predecessors to the node."""
node._pos = predecessors[-1]._pos + len(predecessors[-1]) if predecessors else 0 return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
return node
# assert isinstance(document, str), type(document) # assert isinstance(document, str), type(document)
if self.root__ is None: if self.root__ is None:
...@@ -897,7 +896,7 @@ class Grammar: ...@@ -897,7 +896,7 @@ class Grammar:
if not rest: if not rest:
result, _ = parser(rest) result, _ = parser(rest)
if result is None: if result is None:
result = Node(None, '') result = Node(None, '').init_pos(0)
result.add_error('Parser "%s" did not match empty document.' % str(parser)) result.add_error('Parser "%s" did not match empty document.' % str(parser))
while rest and len(stitches) < MAX_DROPOUTS: while rest and len(stitches) < MAX_DROPOUTS:
result, rest = parser(rest) result, rest = parser(rest)
...@@ -917,24 +916,24 @@ class Grammar: ...@@ -917,24 +916,24 @@ class Grammar:
if self.history_tracking__ else "...")) if self.history_tracking__ else "..."))
if len(stitches) < MAX_DROPOUTS if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.") else " too often! Terminating parser.")
stitches.append(add_pos(Node(None, skip), stitches)) stitches.append(Node(None, skip).init_pos(tail_pos(stitches)))
stitches[-1].add_error(error_msg) stitches[-1].add_error(error_msg)
if self.history_tracking__: if self.history_tracking__:
# some parsers may have matched and left history records with nodes != None. # # some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stitched root node, their pos- # # Because these are not connected to the stitched root node, their pos-
# properties will not be initialized by setting the root node's pos property # # properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here # # to zero. Therefore, their pos properties need to be initialized here
for record in self.history__: # for record in self.history__:
if record.node and record.node._pos < 0: # if record.node and record.node._pos < 0:
record.node.pos = 0 # record.node.init_pos(0)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest) record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest)
self.history__.append(record) self.history__.append(record)
# stop history tracking when parser returned too early # stop history tracking when parser returned too early
self.history_tracking__ = False self.history_tracking__ = False
if stitches: if stitches:
if rest: if rest:
stitches.append(add_pos(Node(None, rest), stitches)) stitches.append(Node(None, rest).init_pos(tail_pos(stitches)))
result = add_pos(Node(None, tuple(stitches)), []) result = Node(None, tuple(stitches))
if any(self.variables__.values()): if any(self.variables__.values()):
error_str = "Capture-retrieve-stack not empty after end of parsing: " + \ error_str = "Capture-retrieve-stack not empty after end of parsing: " + \
str(self.variables__) str(self.variables__)
...@@ -943,12 +942,12 @@ class Grammar: ...@@ -943,12 +942,12 @@ class Grammar:
# add another child node at the end to ensure that the position # add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error # of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical. # message above ("...after end of parsing") would appear illogical.
error_node = Node(ZOMBIE_PARSER, '') error_node = Node(ZOMBIE_PARSER, '').init_pos(tail_pos(result.children))
error_node.add_error(error_str) error_node.add_error(error_str)
result.result = result.children + (add_pos(error_node, result.children),) result.result = result.children + (error_node,)
else: else:
result.add_error(error_str) result.add_error(error_str)
result.pos = 0 # calculate all positions # result.pos = 0 # calculate all positions
# result.collect_errors(self.document__) # result.collect_errors(self.document__)
return result return result
...@@ -1529,7 +1528,7 @@ class Series(NaryOperator): ...@@ -1529,7 +1528,7 @@ class Series(NaryOperator):
# Provide useful error messages # Provide useful error messages
match = text.search(Series.RX_ARGUMENT) match = text.search(Series.RX_ARGUMENT)
i = max(1, text.index(match.regs[1][0])) if match else 1 i = max(1, text.index(match.regs[1][0])) if match else 1
node = Node(self, text_[:i]) node = Node(self, text_[:i]).init_pos(self.grammar.document_length__ - len(text_))
node.add_error('%s expected; "%s"... found!' node.add_error('%s expected; "%s"... found!'
% (str(parser), text_[:10].replace('\n', '\\n ')), % (str(parser), text_[:10].replace('\n', '\\n ')),
code=Error.MANDATORY_CONTINUATION) code=Error.MANDATORY_CONTINUATION)
......
...@@ -89,6 +89,7 @@ class StringView(collections.abc.Sized): ...@@ -89,6 +89,7 @@ class StringView(collections.abc.Sized):
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag'] __slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
assert isinstance(text, str)
self.text = text # type: str self.text = text # type: str
self.begin, self.end = real_indices(begin, end, len(text)) self.begin, self.end = real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) # type: int self.len = max(self.end - self.begin, 0) # type: int
......
...@@ -229,6 +229,7 @@ class Node(collections.abc.Sized): ...@@ -229,6 +229,7 @@ class Node(collections.abc.Sized):
""" """
self.error_flag = 0 # type: int self.error_flag = 0 # type: int
self._errors = [] # type: List[Error] self._errors = [] # type: List[Error]
self._pos = -1 # type: int
# Assignment to self.result initializes the attributes _result, children and _len # Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes # The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint: if leafhint:
...@@ -237,8 +238,6 @@ class Node(collections.abc.Sized): ...@@ -237,8 +238,6 @@ class Node(collections.abc.Sized):
self._len = -1 # type: int # lazy evaluation self._len = -1 # type: int # lazy evaluation
else: else:
self.result = result self.result = result
# self.pos: int = 0 # continuous updating of pos values wastes a lot of time
self._pos = -1 # type: int
self.parser = parser or ZOMBIE_PARSER self.parser = parser or ZOMBIE_PARSER
...@@ -321,6 +320,8 @@ class Node(collections.abc.Sized): ...@@ -321,6 +320,8 @@ class Node(collections.abc.Sized):
self.children = (result,) self.children = (result,)
self._result = self.children self._result = self.children
self.error_flag = result.error_flag self.error_flag = result.error_flag
if self._pos < 0:
self._pos = result._pos
else: else:
if isinstance(result, tuple): if isinstance(result, tuple):
self.children = result self.children = result
...@@ -328,6 +329,8 @@ class Node(collections.abc.Sized): ...@@ -328,6 +329,8 @@ class Node(collections.abc.Sized):
if result: if result:
if self.error_flag == 0: if self.error_flag == 0:
self.error_flag = max(child.error_flag for child in self.children) self.error_flag = max(child.error_flag for child in self.children)
if self._pos < 0:
self._pos = result[0]._pos
else: else:
self.children = NoChildren self.children = NoChildren
self._result = str(result) self._result = str(result)
...@@ -368,17 +371,33 @@ class Node(collections.abc.Sized): ...@@ -368,17 +371,33 @@ class Node(collections.abc.Sized):
return self._pos return self._pos
@pos.setter # @pos.setter
def pos(self, pos: int): # def pos(self, pos: int):
assert self._pos == pos, str("%i != %i" % (self._pos, pos)) # assert self._pos == pos, str("%i != %i" % (self._pos, pos))
offset = 0 # offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def init_pos(self, pos: int, overwrite: bool = False) -> 'Node':
if overwrite or self._pos < 0:
self._pos = pos
for err in self._errors:
err.pos = pos
else:
assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# recursively adjust pos-values of all children # recursively adjust pos-values of all children
offset = self.pos
for child in self.children: for child in self.children:
assert child.pos == pos + offset child.init_pos(offset)
offset += len(child) offset = child.pos + len(child)
# add pos-values to Error-objects return self
for err in self._errors:
err.pos = pos
@property @property
......
...@@ -147,8 +147,8 @@ class TestRegex: ...@@ -147,8 +147,8 @@ class TestRegex:
assert result assert result
assert not messages, str(messages) assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex(StringView('abc+def')) node = parser('abc+def', parser.regex)
assert rest == '' assert not node.error_flag
assert node.parser.name == "regex" assert node.parser.name == "regex"
assert str(node) == 'abc+def' assert str(node) == 'abc+def'
......
...@@ -120,7 +120,7 @@ class TestNode: ...@@ -120,7 +120,7 @@ class TestNode:
assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2) assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
nd = Node(None, (nd1, nd2)) nd = Node(None, (nd1, nd2))
assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd) assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
nd.pos = 0 nd.init_pos(0)
assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment