Commit e51e674c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Errors now centrally managed by RootNode object

parent 97d7f3bc
......@@ -225,16 +225,17 @@ def compile_source(source: str,
log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
str(syntax_tree)
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
# efl = syntax_tree.error_flag
# messages = syntax_tree.collect_errors(clear_errors=True)
# messages = syntax_tree.errors(clear_errors=True)
if not is_error(syntax_tree.error_flag):
transformer(syntax_tree)
# efl = max(efl, syntax_tree.error_flag)
# messages.extend(syntax_tree.collect_errors(clear_errors=True))
# messages.extend(syntax_tree.errors(clear_errors=True))
if is_logging():
log_ST(syntax_tree, log_file_name + '.ast')
if not is_error(syntax_tree.error_flag):
......@@ -242,10 +243,10 @@ def compile_source(source: str,
ast = copy.deepcopy(syntax_tree)
result = compiler(syntax_tree)
# print(syntax_tree.as_sxpr())
# messages.extend(syntax_tree.collect_errors())
# messages.extend(syntax_tree.errors())
# syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
messages = syntax_tree.collect_errors()
messages = syntax_tree.errors()
adjust_error_locations(messages, original_text, source_mapping)
return result, messages, ast
......
......@@ -221,7 +221,7 @@ def adjust_error_locations(errors: List[Error],
Args:
errors: The list of errors as returned by the method
``collect_errors()`` of a Node object
``errors()`` of a Node object
original_text: The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
source_mapping: A function that maps error positions to their
......
......@@ -53,6 +53,7 @@ import contextlib
import html
import os
from DHParser.error import Error
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node
from DHParser.toolkit import is_filename, escape_control_characters, GLOBALS, typing
......@@ -192,7 +193,7 @@ class HistoryRecord:
parser call, which ist either MATCH, FAIL (i.e. no match)
or ERROR.
"""
__slots__ = ('call_stack', 'node', 'text', 'line_col')
__slots__ = ('call_stack', 'node', 'text', 'line_col', 'errors')
MATCH = "MATCH"
ERROR = "ERROR"
......@@ -221,12 +222,14 @@ class HistoryRecord:
def __init__(self, call_stack: List[str],
node: Node,
text: StringView,
line_col: Tuple[int, int]) -> None:
line_col: Tuple[int, int],
errors: List[Error] = []) -> None:
# copy call stack, dropping uninformative Forward-Parsers
self.call_stack = [tn for tn in call_stack if tn != ":Forward"] # type: List[str]
self.node = node # type: Node
self.text = text # type: StringView
self.line_col = line_col # type: Tuple[int, int]
self.errors = errors # type: List[Error]
def __str__(self):
return '%4i, %2i: %s; %s; "%s"' % self.as_tuple()
......@@ -278,7 +281,7 @@ class HistoryRecord:
for cls, item in zip(tpl._fields, tpl)] + ['</tr>'])
def err_msg(self) -> str:
return self.ERROR + ": " + "; ".join(str(e) for e in (self.node.errors))
return self.ERROR + ": " + "; ".join(str(e) for e in (self.errors))
@property
def stack(self) -> str:
......@@ -287,7 +290,7 @@ class HistoryRecord:
@property
def status(self) -> str:
return self.FAIL if self.node is None else \
('"%s"' % self.err_msg()) if self.node.errors else self.MATCH
('"%s"' % self.err_msg()) if self.errors else self.MATCH
@property
def excerpt(self):
......
......@@ -11,8 +11,6 @@ cdef class Parser:
cdef object recursion_counter
cdef object cycle_detection
cpdef _return_node(self, node)
cpdef _return_node_from_results(self, results)
cpdef _parse(self, text)
cpdef reset(self)
cpdef _apply(self, func, flip)
......@@ -58,55 +56,59 @@ cdef class RegExp(Parser):
cdef class Whitespace(RegExp):
pass
cdef class UnaryOperator(Parser):
cdef class MetaParser(Parser):
cpdef _return_value(self, node)
cpdef _return_values(self, results)
cdef class UnaryParser(MetaParser):
cdef public object parser
cdef class NaryOperator(Parser):
cdef class NaryParser(MetaParser):
cdef public object parsers
cdef class Option(UnaryOperator):
cdef class Option(UnaryParser):
pass
cdef class ZeroOrMore(Option):
pass
cdef class OneOrMore(UnaryOperator):
cdef class OneOrMore(UnaryParser):
pass
cdef class Series(NaryOperator):
cdef class Series(NaryParser):
cdef public int mandatory
cdef public object err_msgs
cdef public object skip
cdef class Alternative(NaryOperator):
cdef class Alternative(NaryParser):
pass
cdef class AllOf(NaryOperator):
cdef class AllOf(NaryParser):
cdef public int num_parsers
cdef public int mandatory
cdef public object err_msgs
cdef public object skip
cdef class SomeOf(NaryOperator):
cdef class SomeOf(NaryParser):
pass
cdef class FlowOperator(UnaryOperator):
cdef class FlowParser(UnaryParser):
pass
cdef class Lookahead(FlowOperator):
cdef class Lookahead(FlowParser):
pass
cdef class NegativeLookahead(Lookahead):
pass
cdef class Lookbehind(FlowOperator):
cdef class Lookbehind(FlowParser):
cdef public object regexp
cdef public str text
cdef class NegativeLookbehind(Lookbehind):
pass
cdef class Capture(UnaryOperator):
cdef class Capture(UnaryParser):
pass
cdef class Retrieve(Parser):
......@@ -116,7 +118,7 @@ cdef class Retrieve(Parser):
cdef class Pop(Retrieve):
cdef public list values
cdef class Synonym(UnaryOperator):
cdef class Synonym(UnaryParser):
pass
cdef class Forward(Parser):
......
......@@ -55,6 +55,7 @@ __all__ = ('Parser',
'Whitespace',
'DropWhitespace',
'mixin_comment',
'MetaParser',
'UnaryParser',
'NaryParser',
'Synonym',
......@@ -352,11 +353,17 @@ class Parser:
if grammar.history_tracking__:
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
if (grammar.moving_forward__ or (node and node.errors)):
if grammar.moving_forward__:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
grammar.line_col__(text))
grammar.history__.append(record)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
elif node:
nid = id(node) # type: int
if nid in grammar.tree__.error_nodes:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
grammar.line_col__(text),
grammar.tree__.error_nodes[nid])
grammar.history__.append(record)
grammar.moving_forward__ = False
grammar.call_stack__.pop()
......@@ -937,7 +944,7 @@ class Grammar:
else:
self.tree__.new_error(result, error_msg, error_code)
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
# result.errors(self.document__)
if result:
self.tree__.swallow(result)
self.start_parser__ = None
......@@ -1248,12 +1255,12 @@ class MetaParser(Parser):
return Node(self.tag_name, node) if self.pname else node
elif self.pname:
nd1 = Node(self.tag_name, ()) # type: Node
nd1.errors = node.errors
# nd1.errors = node.errors
return nd1
elif node.errors:
nd2 = Node(self.tag_name, ()) # type: Node
nd2.errors = node.errors
return nd2
# elif node.errors:
# nd2 = Node(self.tag_name, ()) # type: Node
# nd2.errors = node.errors
# return nd2
elif self.pname:
return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
......@@ -1585,7 +1592,7 @@ class Series(NaryParser):
else:
results += (node,)
break
if node._result or parser.pname or node.errors: # optimization
if node._result or parser.pname: # optimization
results += (node,)
# assert len(results) <= len(self.parsers) \
# or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
......@@ -1673,7 +1680,7 @@ class Alternative(NaryParser):
node, text_ = parser(text)
if node:
return Node(self.tag_name,
node if node._result or parser.pname or node.errors else ()), text_
node if node._result or parser.pname else ()), text_
return None, text
def __repr__(self):
......@@ -1784,7 +1791,7 @@ class AllOf(NaryParser):
for i, parser in enumerate(parsers):
node, text__ = parser(text_)
if node:
if node._result or parser.pname or node.errors:
if node._result or parser.pname:
results += (node,)
text_ = text__
del parsers[i]
......@@ -1849,7 +1856,7 @@ class SomeOf(NaryParser):
for i, parser in enumerate(parsers):
node, text__ = parser(text_)
if node:
if node._result or parser.pname or node.errors:
if node._result or parser.pname:
results += (node,)
text_ = text__
del parsers[i]
......@@ -2129,7 +2136,7 @@ class Pop(Retrieve):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, txt = self.retrieve_and_match(text)
if node and not node.errors:
if node and not id(node) in self.grammar.tree__.error_nodes:
self.values.append(self.grammar.variables__[self.symbol.pname].pop())
location = self.grammar.document_length__ - len(text)
self.grammar.push_rollback__(location, self._rollback) # lambda: stack.append(value))
......
......@@ -5,7 +5,6 @@
cdef class Node:
cdef public list errors
cdef public int _pos
cdef public object _result
cdef str _content
......@@ -16,6 +15,8 @@ cdef class Node:
cdef class RootNode(Node):
cdef public list all_errors
cdef public object error_nodes
cdef public object error_positions
cdef public int error_flag
cdef public set inline_tags
cdef public set omit_tags
......
......@@ -152,22 +152,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
At any rate, it should only be reassigned during the parsing
stage and never during or after the AST-transformation.
errors (list): A list of all errors that occured on this node.
attr (dict): An optional dictionary of XML-attr. This
dictionary is created lazily upon first usage. The attr
will only be shown in the XML-Representation, not in the
S-Expression-output.
"""
__slots__ = '_result', 'children', '_len', '_pos', 'tag_name', 'errors', '_xml_attr', '_content'
__slots__ = '_result', 'children', '_len', '_pos', 'tag_name', '_xml_attr', '_content'
def __init__(self, tag_name: str, result: ResultType, leafhint: bool = False) -> None:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self.errors = [] # type: List[Error]
self._pos = -1 # type: int
# Assignment to self.result initializes the attr _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
......@@ -186,7 +183,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
duplicate = self.__class__(self.tag_name, copy.deepcopy(self.children), False)
else:
duplicate = self.__class__(self.tag_name, self.result, True)
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if self.attr_active():
......@@ -195,19 +191,22 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return duplicate
def __str__(self):
s = "".join(str(child) for child in self.children) if self.children else self.content
if self.errors:
return ' <<< Error on "%s" | %s >>> ' % \
(s, '; '.join(e.message for e in self.errors))
return s
if isinstance(self, RootNode):
root = cast(RootNode, self)
errors = root.errors()
if errors:
e_pos = errors[0].pos
return self.content[:e_pos] + \
' <<< Error on "%s" | %s >>> ' % \
(self.content[e_pos - self.pos:], '; '.join(e.message for e in errors))
return self.content
def __repr__(self):
# mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype}
# name, ptype = (self._tag_name.split(':') + [''])[:2]
# parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype)
rarg = str(self) if not self.children else \
"(" + ", ".join(repr(child) for child in self.children) + ")"
"(" + ", ".join(child.__repr__() for child in self.children) + ")"
return "Node(%s, %s)" % (self.tag_name, rarg)
......@@ -226,13 +225,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def __eq__(self, other):
"""
Equality of nodes: Two nodes are considered as equal, if their tag
name is the same and if their results are equal.
Note: It is not required that two nodes have the same errors attached.
In case you need to check for error equality as well, compare a
serialization that includes error messages, as_sxpr() will do!
name is the same, if their results are equal and if their attributes
and attribute values are the same.
"""
return self.tag_name == other.tag_name and self.result == other.result
return self.tag_name == other.tag_name and self.result == other.result \
and self.compare_attr(other)
def __hash__(self):
......@@ -339,9 +336,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
@property
def content(self) -> str:
"""
Returns content as string, omitting error messages. If the node has
child-nodes, the string content of the child-nodes is recursively read
and then concatenated.
Returns content as string. If the node has child-nodes, the
string content of the child-nodes is recursively read and then
concatenated.
"""
if self._content is None:
if self.children:
......@@ -364,7 +361,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return flatten_sxpr(self.as_sxpr(showerrors=False))
return flatten_sxpr(self.as_sxpr())
@property
......@@ -395,6 +392,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self
@property
def attr(self):
"""
Returns a dictionary of XML-attr attached to the node.
"""
try:
if self._xml_attr is None: # cython compatibility
self._xml_attr = OrderedDict()
except AttributeError:
self._xml_attr = OrderedDict()
return self._xml_attr
def attr_active(self) -> bool:
"""
Returns True, if XML-Attributes of this node have ever been set
......@@ -408,17 +418,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return False
@property
def attr(self):
def compare_attr(self, other: 'Node') -> bool:
"""
Returns a dictionary of XML-attr attached to the node.
Returns True, if `self` and `other` have the same attributes with the
same attribute values.
"""
try:
if self._xml_attr is None: # cython compatibility
self._xml_attr = OrderedDict()
except AttributeError:
self._xml_attr = OrderedDict()
return self._xml_attr
if self.attr_active():
if other.attr_active():
return self.attr == other.attr
return len(self.attr) == 0
# self has empty dictionary and other has no attributes
elif other.attr_active():
return len(other.attr) == 0
# other has empty attribute dictionary and self as no attributes
return True # neither self nor other have any attributes
def _tree_repr(self, tab, open_fn, close_fn, data_fn=lambda i: i,
......@@ -484,17 +497,16 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def as_sxpr(self, src: str = None,
showerrors: bool = True,
indentation: int = 2,
compact: bool = False) -> str:
"""
Returns content as S-expression, i.e. in lisp-like form.
Returns content as S-expression, i.e. in lisp-like form. If this
method is callad on a RootNode-object,
Args:
src: The source text or `None`. In case the source text is
given the position of the element in the text will be
reported as line and column.
showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
compact: If True, a compact representation is returned where
brackets are omitted and only the indentation indicates the
......@@ -503,6 +515,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
left_bracket, right_bracket, density = ('', '', 1) if compact else ('(', '\n)', 0)
lbreaks = linebreaks(src) if src else [] # type: List[int]
root = cast(RootNode, self) if isinstance(self, RootNode) else None # type: Optional[Node]
def opening(node) -> str:
"""Returns the opening string for the representation of `node`."""
......@@ -513,8 +526,8 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if src:
line, col = line_col(lbreaks, node.pos)
txt.append(" `(pos %i %i %i)" % (node.pos, line, col))
if showerrors and node.errors:
txt.append(" `(err `%s)" % ' '.join(str(err) for err in node.errors))
if root and id(node) in root.error_nodes:
txt.append(" `(err `%s)" % ' '.join(str(err) for err in root.get_errors(node)))
return "".join(txt) + '\n'
def closing(node) -> str:
......@@ -531,7 +544,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def as_xml(self, src: str = None,
showerrors: bool = True,
indentation: int = 2,
inline_tags: Set[str] = set(),
omit_tags: Set[str] = set(),
......@@ -543,7 +555,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
src: The source text or `None`. In case the source text is
given the position will also be reported as line and
column.
showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
inline_tags: A set of tag names, the content of which will always be written
on a single line, unless it contains explicit line feeds ('\n').
......@@ -555,6 +566,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
empty_tags: A set of tags which shall be rendered as empty elements, e.g.
"<empty/>" instead of "<empty><empty>".
"""
root = cast(RootNode, self) if isinstance(self, RootNode) else None # type: Optional[Node]
def opening(node) -> str:
"""Returns the opening string for the representation of `node`."""
......@@ -567,9 +579,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
txt.extend(' %s="%s"' % (k, v) for k, v in node.attr.items())
if src and not has_reserved_attrs:
txt.append(' line="%i" col="%i"' % line_col(line_breaks, node.pos))
if showerrors and node.errors and not has_reserved_attrs:
if root and id(node) in root.error_nodes and not has_reserved_attrs:
txt.append(' err="%s"' % ''.join(str(err).replace('"', r'\"')
for err in node.errors))
for err in root.get_error(node)))
if node.tag_name in empty_tags:
assert not node.result, ("Node %s with content %s is not an empty element!" %
(node.tag_name, str(node)))
......@@ -712,14 +724,14 @@ class FrozenNode(Node):
def attr(self):
raise AssertionError("Attributes cannot be accessed on a frozen node")
@property
def errors(self) -> List[Error]:
return ()
@errors.setter
def errors(self, errors: List[Error]):
if errors:
raise AssertionError('Cannot assign error list to frozen node')
# @property
# def errors(self) -> List[Error]:
# return ()
#
# @errors.setter
# def errors(self, errors: List[Error]):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def init_pos(self, pos: int) -> 'Node':
pass
......@@ -741,7 +753,9 @@ class RootNode(Node):
def __init__(self, node: Optional[Node] = None):
super().__init__(ZOMBIE_TAG, '')
self.all_errors = [] # type: List[Error]
self.all_errors = [] # type: List[Error]
self.error_nodes = dict() # type: Dict[int, List[Error]] # id(node) -> error list
self.error_positions = dict() # type: Dict[int, Set[int]] # pos -> set of id(node)
self.error_flag = 0
if node is not None:
self.swallow(node)
......@@ -758,13 +772,14 @@ class RootNode(Node):
else:
duplicate.children = NoChildren
duplicate._result = self._result
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if self.attr_active():
duplicate.attr.update(copy.deepcopy(self._xml_attr))
# duplicate._xml_attr = copy.deepcopy(self._xml_attr) # this is blocked by cython
duplicate.all_errors = copy.deepcopy(self.all_errors)
duplicate.all_errors = copy.copy(self.all_errors)
duplicate.error_nodes = copy.copy(self.error_nodes)
duplicate.error_positions = copy.deepcopy(self.error_positions)
duplicate.error_flag = self.error_flag
duplicate.inline_tags = self.inline_tags
duplicate.omit_tags = self.omit_tags
......@@ -793,6 +808,8 @@ class RootNode(Node):
if node.attr_active():
self._xml_attr = node._xml_attr
self._content = node._content
if id(node) in self.error_nodes:
self.error_nodes[id(self)] = self.error_nodes[id(node)]
return self
def add_error(self, node: Node, error: Error) -> 'RootNode':
......@@ -802,7 +819,8 @@ class RootNode(Node):
assert not isinstance(node, FrozenNode)
self.all_errors.append(error)
self.error_flag = max(self.error_flag, error.code)
node.errors.append(error)
self.error_nodes.setdefault(id(node), []).append(error)
self.error_positions.setdefault(node.pos, set()).add(id(node))
return self
def new_error(self,
......@@ -820,7 +838,26 @@ class RootNode(Node):
self.add_error(node, error)
return self
def collect_errors(self) -> List[Error]:
def get_errors(self, node: Node) -> List[Error]:
"""
Returns the List of errors that occured on the node or any child node
at the same position that has already been removed from the tree,
for example, because it was an anonymous empty child node.
"""
node_id = id(node) # type: int
errors = [] # type: List[Error]
for nid in self.error_positions[node.pos]:
if nid == node_id:
errors.extend(self.error_nodes[nid])
else:
for nd in node.select(lambda n: id(n) == nid):
break
else:
# node is not connected to tree any more => display its errors on its parent
errors.extend(self.error_nodes[nid])
return errors
def errors(self) -> List[Error]:
"""
Returns the list of errors, ordered bv their position.
"""
......