10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f2162cfb authored by eckhart's avatar eckhart

- early position handling finished

parent cc0f248d
......@@ -868,11 +868,10 @@ class Grammar:
Returns:
Node: The root node ot the parse tree.
"""
def add_pos(node: Node, predecessors: List[Node]) -> int:
def tail_pos(predecessors: List[Node]) -> int:
"""Adds the position after the last node in the list of
predecessors to the node."""
node._pos = predecessors[-1]._pos + len(predecessors[-1]) if predecessors else 0
return node
return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
# assert isinstance(document, str), type(document)
if self.root__ is None:
......@@ -897,7 +896,7 @@ class Grammar:
if not rest:
result, _ = parser(rest)
if result is None:
result = Node(None, '')
result = Node(None, '').init_pos(0)
result.add_error('Parser "%s" did not match empty document.' % str(parser))
while rest and len(stitches) < MAX_DROPOUTS:
result, rest = parser(rest)
......@@ -917,24 +916,24 @@ class Grammar:
if self.history_tracking__ else "..."))
if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.")
stitches.append(add_pos(Node(None, skip), stitches))
stitches.append(Node(None, skip).init_pos(tail_pos(stitches)))
stitches[-1].add_error(error_msg)
if self.history_tracking__:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stitched root node, their pos-
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for record in self.history__:
if record.node and record.node._pos < 0:
record.node.pos = 0
# # some parsers may have matched and left history records with nodes != None.
# # Because these are not connected to the stitched root node, their pos-
# # properties will not be initialized by setting the root node's pos property
# # to zero. Therefore, their pos properties need to be initialized here
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.init_pos(0)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest)
self.history__.append(record)
# stop history tracking when parser returned too early
self.history_tracking__ = False
if stitches:
if rest:
stitches.append(add_pos(Node(None, rest), stitches))
result = add_pos(Node(None, tuple(stitches)), [])
stitches.append(Node(None, rest).init_pos(tail_pos(stitches)))
result = Node(None, tuple(stitches))
if any(self.variables__.values()):
error_str = "Capture-retrieve-stack not empty after end of parsing: " + \
str(self.variables__)
......@@ -943,12 +942,12 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node = Node(ZOMBIE_PARSER, '')
error_node = Node(ZOMBIE_PARSER, '').init_pos(tail_pos(result.children))
error_node.add_error(error_str)
result.result = result.children + (add_pos(error_node, result.children),)
result.result = result.children + (error_node,)
else:
result.add_error(error_str)
result.pos = 0 # calculate all positions
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
return result
......@@ -1529,7 +1528,7 @@ class Series(NaryOperator):
# Provide useful error messages
match = text.search(Series.RX_ARGUMENT)
i = max(1, text.index(match.regs[1][0])) if match else 1
node = Node(self, text_[:i])
node = Node(self, text_[:i]).init_pos(self.grammar.document_length__ - len(text_))
node.add_error('%s expected; "%s"... found!'
% (str(parser), text_[:10].replace('\n', '\\n ')),
code=Error.MANDATORY_CONTINUATION)
......
......@@ -89,6 +89,7 @@ class StringView(collections.abc.Sized):
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
assert isinstance(text, str)
self.text = text # type: str
self.begin, self.end = real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) # type: int
......
......@@ -229,6 +229,7 @@ class Node(collections.abc.Sized):
"""
self.error_flag = 0 # type: int
self._errors = [] # type: List[Error]
self._pos = -1 # type: int
# Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint:
......@@ -237,8 +238,6 @@ class Node(collections.abc.Sized):
self._len = -1 # type: int # lazy evaluation
else:
self.result = result
# self.pos: int = 0 # continuous updating of pos values wastes a lot of time
self._pos = -1 # type: int
self.parser = parser or ZOMBIE_PARSER
......@@ -321,6 +320,8 @@ class Node(collections.abc.Sized):
self.children = (result,)
self._result = self.children
self.error_flag = result.error_flag
if self._pos < 0:
self._pos = result._pos
else:
if isinstance(result, tuple):
self.children = result
......@@ -328,6 +329,8 @@ class Node(collections.abc.Sized):
if result:
if self.error_flag == 0:
self.error_flag = max(child.error_flag for child in self.children)
if self._pos < 0:
self._pos = result[0]._pos
else:
self.children = NoChildren
self._result = str(result)
......@@ -368,17 +371,33 @@ class Node(collections.abc.Sized):
return self._pos
@pos.setter
def pos(self, pos: int):
assert self._pos == pos, str("%i != %i" % (self._pos, pos))
offset = 0
# @pos.setter
# def pos(self, pos: int):
# assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def init_pos(self, pos: int, overwrite: bool = False) -> 'Node':
if overwrite or self._pos < 0:
self._pos = pos
for err in self._errors:
err.pos = pos
else:
assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
assert child.pos == pos + offset
offset += len(child)
# add pos-values to Error-objects
for err in self._errors:
err.pos = pos
child.init_pos(offset)
offset = child.pos + len(child)
return self
@property
......
......@@ -147,8 +147,8 @@ class TestRegex:
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex(StringView('abc+def'))
assert rest == ''
node = parser('abc+def', parser.regex)
assert not node.error_flag
assert node.parser.name == "regex"
assert str(node) == 'abc+def'
......
......@@ -120,7 +120,7 @@ class TestNode:
assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
nd = Node(None, (nd1, nd2))
assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
nd.pos = 0
nd.init_pos(0)
assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment