Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit e51e674c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Errors now centrally managed by RootNode object

parent 97d7f3bc
...@@ -225,16 +225,17 @@ def compile_source(source: str, ...@@ -225,16 +225,17 @@ def compile_source(source: str,
log_ST(syntax_tree, log_file_name + '.cst') log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name) log_parsing_history(parser, log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text) assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
str(syntax_tree)
# only compile if there were no syntax errors, for otherwise it is # only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages # likely that error list gets littered with compile error messages
result = None result = None
# efl = syntax_tree.error_flag # efl = syntax_tree.error_flag
# messages = syntax_tree.collect_errors(clear_errors=True) # messages = syntax_tree.errors(clear_errors=True)
if not is_error(syntax_tree.error_flag): if not is_error(syntax_tree.error_flag):
transformer(syntax_tree) transformer(syntax_tree)
# efl = max(efl, syntax_tree.error_flag) # efl = max(efl, syntax_tree.error_flag)
# messages.extend(syntax_tree.collect_errors(clear_errors=True)) # messages.extend(syntax_tree.errors(clear_errors=True))
if is_logging(): if is_logging():
log_ST(syntax_tree, log_file_name + '.ast') log_ST(syntax_tree, log_file_name + '.ast')
if not is_error(syntax_tree.error_flag): if not is_error(syntax_tree.error_flag):
...@@ -242,10 +243,10 @@ def compile_source(source: str, ...@@ -242,10 +243,10 @@ def compile_source(source: str,
ast = copy.deepcopy(syntax_tree) ast = copy.deepcopy(syntax_tree)
result = compiler(syntax_tree) result = compiler(syntax_tree)
# print(syntax_tree.as_sxpr()) # print(syntax_tree.as_sxpr())
# messages.extend(syntax_tree.collect_errors()) # messages.extend(syntax_tree.errors())
# syntax_tree.error_flag = max(syntax_tree.error_flag, efl) # syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
messages = syntax_tree.collect_errors() messages = syntax_tree.errors()
adjust_error_locations(messages, original_text, source_mapping) adjust_error_locations(messages, original_text, source_mapping)
return result, messages, ast return result, messages, ast
......
...@@ -221,7 +221,7 @@ def adjust_error_locations(errors: List[Error], ...@@ -221,7 +221,7 @@ def adjust_error_locations(errors: List[Error],
Args: Args:
errors: The list of errors as returned by the method errors: The list of errors as returned by the method
``collect_errors()`` of a Node object ``errors()`` of a Node object
original_text: The source text on which the errors occurred. original_text: The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.) (Needed in order to determine the line and column numbers.)
source_mapping: A function that maps error positions to their source_mapping: A function that maps error positions to their
......
...@@ -53,6 +53,7 @@ import contextlib ...@@ -53,6 +53,7 @@ import contextlib
import html import html
import os import os
from DHParser.error import Error
from DHParser.stringview import StringView from DHParser.stringview import StringView
from DHParser.syntaxtree import Node from DHParser.syntaxtree import Node
from DHParser.toolkit import is_filename, escape_control_characters, GLOBALS, typing from DHParser.toolkit import is_filename, escape_control_characters, GLOBALS, typing
...@@ -192,7 +193,7 @@ class HistoryRecord: ...@@ -192,7 +193,7 @@ class HistoryRecord:
parser call, which ist either MATCH, FAIL (i.e. no match) parser call, which ist either MATCH, FAIL (i.e. no match)
or ERROR. or ERROR.
""" """
__slots__ = ('call_stack', 'node', 'text', 'line_col') __slots__ = ('call_stack', 'node', 'text', 'line_col', 'errors')
MATCH = "MATCH" MATCH = "MATCH"
ERROR = "ERROR" ERROR = "ERROR"
...@@ -221,12 +222,14 @@ class HistoryRecord: ...@@ -221,12 +222,14 @@ class HistoryRecord:
def __init__(self, call_stack: List[str], def __init__(self, call_stack: List[str],
node: Node, node: Node,
text: StringView, text: StringView,
line_col: Tuple[int, int]) -> None: line_col: Tuple[int, int],
errors: List[Error] = []) -> None:
# copy call stack, dropping uninformative Forward-Parsers # copy call stack, dropping uninformative Forward-Parsers
self.call_stack = [tn for tn in call_stack if tn != ":Forward"] # type: List[str] self.call_stack = [tn for tn in call_stack if tn != ":Forward"] # type: List[str]
self.node = node # type: Node self.node = node # type: Node
self.text = text # type: StringView self.text = text # type: StringView
self.line_col = line_col # type: Tuple[int, int] self.line_col = line_col # type: Tuple[int, int]
self.errors = errors # type: List[Error]
def __str__(self): def __str__(self):
return '%4i, %2i: %s; %s; "%s"' % self.as_tuple() return '%4i, %2i: %s; %s; "%s"' % self.as_tuple()
...@@ -278,7 +281,7 @@ class HistoryRecord: ...@@ -278,7 +281,7 @@ class HistoryRecord:
for cls, item in zip(tpl._fields, tpl)] + ['</tr>']) for cls, item in zip(tpl._fields, tpl)] + ['</tr>'])
def err_msg(self) -> str: def err_msg(self) -> str:
return self.ERROR + ": " + "; ".join(str(e) for e in (self.node.errors)) return self.ERROR + ": " + "; ".join(str(e) for e in (self.errors))
@property @property
def stack(self) -> str: def stack(self) -> str:
...@@ -287,7 +290,7 @@ class HistoryRecord: ...@@ -287,7 +290,7 @@ class HistoryRecord:
@property @property
def status(self) -> str: def status(self) -> str:
return self.FAIL if self.node is None else \ return self.FAIL if self.node is None else \
('"%s"' % self.err_msg()) if self.node.errors else self.MATCH ('"%s"' % self.err_msg()) if self.errors else self.MATCH
@property @property
def excerpt(self): def excerpt(self):
......
...@@ -11,8 +11,6 @@ cdef class Parser: ...@@ -11,8 +11,6 @@ cdef class Parser:
cdef object recursion_counter cdef object recursion_counter
cdef object cycle_detection cdef object cycle_detection
cpdef _return_node(self, node)
cpdef _return_node_from_results(self, results)
cpdef _parse(self, text) cpdef _parse(self, text)
cpdef reset(self) cpdef reset(self)
cpdef _apply(self, func, flip) cpdef _apply(self, func, flip)
...@@ -58,55 +56,59 @@ cdef class RegExp(Parser): ...@@ -58,55 +56,59 @@ cdef class RegExp(Parser):
cdef class Whitespace(RegExp): cdef class Whitespace(RegExp):
pass pass
cdef class UnaryOperator(Parser): cdef class MetaParser(Parser):
cpdef _return_value(self, node)
cpdef _return_values(self, results)
cdef class UnaryParser(MetaParser):
cdef public object parser cdef public object parser
cdef class NaryOperator(Parser): cdef class NaryParser(MetaParser):
cdef public object parsers cdef public object parsers
cdef class Option(UnaryOperator): cdef class Option(UnaryParser):
pass pass
cdef class ZeroOrMore(Option): cdef class ZeroOrMore(Option):
pass pass
cdef class OneOrMore(UnaryOperator): cdef class OneOrMore(UnaryParser):
pass pass
cdef class Series(NaryOperator): cdef class Series(NaryParser):
cdef public int mandatory cdef public int mandatory
cdef public object err_msgs cdef public object err_msgs
cdef public object skip cdef public object skip
cdef class Alternative(NaryOperator): cdef class Alternative(NaryParser):
pass pass
cdef class AllOf(NaryOperator): cdef class AllOf(NaryParser):
cdef public int num_parsers cdef public int num_parsers
cdef public int mandatory cdef public int mandatory
cdef public object err_msgs cdef public object err_msgs
cdef public object skip cdef public object skip
cdef class SomeOf(NaryOperator): cdef class SomeOf(NaryParser):
pass pass
cdef class FlowOperator(UnaryOperator): cdef class FlowParser(UnaryParser):
pass pass
cdef class Lookahead(FlowOperator): cdef class Lookahead(FlowParser):
pass pass
cdef class NegativeLookahead(Lookahead): cdef class NegativeLookahead(Lookahead):
pass pass
cdef class Lookbehind(FlowOperator): cdef class Lookbehind(FlowParser):
cdef public object regexp cdef public object regexp
cdef public str text cdef public str text
cdef class NegativeLookbehind(Lookbehind): cdef class NegativeLookbehind(Lookbehind):
pass pass
cdef class Capture(UnaryOperator): cdef class Capture(UnaryParser):
pass pass
cdef class Retrieve(Parser): cdef class Retrieve(Parser):
...@@ -116,7 +118,7 @@ cdef class Retrieve(Parser): ...@@ -116,7 +118,7 @@ cdef class Retrieve(Parser):
cdef class Pop(Retrieve): cdef class Pop(Retrieve):
cdef public list values cdef public list values
cdef class Synonym(UnaryOperator): cdef class Synonym(UnaryParser):
pass pass
cdef class Forward(Parser): cdef class Forward(Parser):
......
...@@ -55,6 +55,7 @@ __all__ = ('Parser', ...@@ -55,6 +55,7 @@ __all__ = ('Parser',
'Whitespace', 'Whitespace',
'DropWhitespace', 'DropWhitespace',
'mixin_comment', 'mixin_comment',
'MetaParser',
'UnaryParser', 'UnaryParser',
'NaryParser', 'NaryParser',
'Synonym', 'Synonym',
...@@ -352,11 +353,17 @@ class Parser: ...@@ -352,11 +353,17 @@ class Parser:
if grammar.history_tracking__: if grammar.history_tracking__:
# don't track returning parsers except in case an error has occurred # don't track returning parsers except in case an error has occurred
# remaining = len(rest) # remaining = len(rest)
if (grammar.moving_forward__ or (node and node.errors)): if grammar.moving_forward__:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text, record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
grammar.line_col__(text)) grammar.line_col__(text))
grammar.history__.append(record) grammar.history__.append(record)
# print(record.stack, record.status, rest[:20].replace('\n', '|')) elif node:
nid = id(node) # type: int
if nid in grammar.tree__.error_nodes:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
grammar.line_col__(text),
grammar.tree__.error_nodes[nid])
grammar.history__.append(record)
grammar.moving_forward__ = False grammar.moving_forward__ = False
grammar.call_stack__.pop() grammar.call_stack__.pop()
...@@ -937,7 +944,7 @@ class Grammar: ...@@ -937,7 +944,7 @@ class Grammar:
else: else:
self.tree__.new_error(result, error_msg, error_code) self.tree__.new_error(result, error_msg, error_code)
# result.pos = 0 # calculate all positions # result.pos = 0 # calculate all positions
# result.collect_errors(self.document__) # result.errors(self.document__)
if result: if result:
self.tree__.swallow(result) self.tree__.swallow(result)
self.start_parser__ = None self.start_parser__ = None
...@@ -1248,12 +1255,12 @@ class MetaParser(Parser): ...@@ -1248,12 +1255,12 @@ class MetaParser(Parser):
return Node(self.tag_name, node) if self.pname else node return Node(self.tag_name, node) if self.pname else node
elif self.pname: elif self.pname:
nd1 = Node(self.tag_name, ()) # type: Node nd1 = Node(self.tag_name, ()) # type: Node
nd1.errors = node.errors # nd1.errors = node.errors
return nd1 return nd1
elif node.errors: # elif node.errors:
nd2 = Node(self.tag_name, ()) # type: Node # nd2 = Node(self.tag_name, ()) # type: Node
nd2.errors = node.errors # nd2.errors = node.errors
return nd2 # return nd2
elif self.pname: elif self.pname:
return Node(self.tag_name, ()) # type: Node return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
...@@ -1585,7 +1592,7 @@ class Series(NaryParser): ...@@ -1585,7 +1592,7 @@ class Series(NaryParser):
else: else:
results += (node,) results += (node,)
break break
if node._result or parser.pname or node.errors: # optimization if node._result or parser.pname: # optimization
results += (node,) results += (node,)
# assert len(results) <= len(self.parsers) \ # assert len(results) <= len(self.parsers) \
# or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG]) # or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
...@@ -1673,7 +1680,7 @@ class Alternative(NaryParser): ...@@ -1673,7 +1680,7 @@ class Alternative(NaryParser):
node, text_ = parser(text) node, text_ = parser(text)
if node: if node:
return Node(self.tag_name, return Node(self.tag_name,
node if node._result or parser.pname or node.errors else ()), text_ node if node._result or parser.pname else ()), text_
return None, text return None, text
def __repr__(self): def __repr__(self):
...@@ -1784,7 +1791,7 @@ class AllOf(NaryParser): ...@@ -1784,7 +1791,7 @@ class AllOf(NaryParser):
for i, parser in enumerate(parsers): for i, parser in enumerate(parsers):
node, text__ = parser(text_) node, text__ = parser(text_)
if node: if node:
if node._result or parser.pname or node.errors: if node._result or parser.pname:
results += (node,) results += (node,)
text_ = text__ text_ = text__
del parsers[i] del parsers[i]
...@@ -1849,7 +1856,7 @@ class SomeOf(NaryParser): ...@@ -1849,7 +1856,7 @@ class SomeOf(NaryParser):
for i, parser in enumerate(parsers): for i, parser in enumerate(parsers):
node, text__ = parser(text_) node, text__ = parser(text_)
if node: if node:
if node._result or parser.pname or node.errors: if node._result or parser.pname:
results += (node,) results += (node,)
text_ = text__ text_ = text__
del parsers[i] del parsers[i]
...@@ -2129,7 +2136,7 @@ class Pop(Retrieve): ...@@ -2129,7 +2136,7 @@ class Pop(Retrieve):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]: def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, txt = self.retrieve_and_match(text) node, txt = self.retrieve_and_match(text)
if node and not node.errors: if node and not id(node) in self.grammar.tree__.error_nodes:
self.values.append(self.grammar.variables__[self.symbol.pname].pop()) self.values.append(self.grammar.variables__[self.symbol.pname].pop())
location = self.grammar.document_length__ - len(text) location = self.grammar.document_length__ - len(text)
self.grammar.push_rollback__(location, self._rollback) # lambda: stack.append(value)) self.grammar.push_rollback__(location, self._rollback) # lambda: stack.append(value))
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
cdef class Node: cdef class Node:
cdef public list errors
cdef public int _pos cdef public int _pos
cdef public object _result cdef public object _result
cdef str _content cdef str _content
...@@ -16,6 +15,8 @@ cdef class Node: ...@@ -16,6 +15,8 @@ cdef class Node:
cdef class RootNode(Node): cdef class RootNode(Node):
cdef public list all_errors cdef public list all_errors
cdef public object error_nodes
cdef public object error_positions
cdef public int error_flag cdef public int error_flag
cdef public set inline_tags cdef public set inline_tags
cdef public set omit_tags cdef public set omit_tags
......
...@@ -152,22 +152,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -152,22 +152,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
At any rate, it should only be reassigned during the parsing At any rate, it should only be reassigned during the parsing
stage and never during or after the AST-transformation. stage and never during or after the AST-transformation.
errors (list): A list of all errors that occured on this node.
attr (dict): An optional dictionary of XML-attr. This attr (dict): An optional dictionary of XML-attr. This
dictionary is created lazily upon first usage. The attr dictionary is created lazily upon first usage. The attr
will only be shown in the XML-Representation, not in the will only be shown in the XML-Representation, not in the
S-Expression-output. S-Expression-output.
""" """
__slots__ = '_result', 'children', '_len', '_pos', 'tag_name', 'errors', '_xml_attr', '_content' __slots__ = '_result', 'children', '_len', '_pos', 'tag_name', '_xml_attr', '_content'
def __init__(self, tag_name: str, result: ResultType, leafhint: bool = False) -> None: def __init__(self, tag_name: str, result: ResultType, leafhint: bool = False) -> None:
""" """
Initializes the ``Node``-object with the ``Parser``-Instance Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result. that generated the node and the parser's result.
""" """
self.errors = [] # type: List[Error]
self._pos = -1 # type: int self._pos = -1 # type: int
# Assignment to self.result initializes the attr _result, children and _len # Assignment to self.result initializes the attr _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes # The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
...@@ -186,7 +183,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -186,7 +183,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
duplicate = self.__class__(self.tag_name, copy.deepcopy(self.children), False) duplicate = self.__class__(self.tag_name, copy.deepcopy(self.children), False)
else: else:
duplicate = self.__class__(self.tag_name, self.result, True) duplicate = self.__class__(self.tag_name, self.result, True)
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos duplicate._pos = self._pos
duplicate._len = self._len duplicate._len = self._len
if self.attr_active(): if self.attr_active():
...@@ -195,19 +191,22 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -195,19 +191,22 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return duplicate return duplicate
def __str__(self): def __str__(self):
s = "".join(str(child) for child in self.children) if self.children else self.content if isinstance(self, RootNode):
if self.errors: root = cast(RootNode, self)
return ' <<< Error on "%s" | %s >>> ' % \ errors = root.errors()
(s, '; '.join(e.message for e in self.errors)) if errors:
return s e_pos = errors[0].pos
return self.content[:e_pos] + \
' <<< Error on "%s" | %s >>> ' % \
(self.content[e_pos - self.pos:], '; '.join(e.message for e in errors))
return self.content
def __repr__(self): def __repr__(self):
# mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype} # mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype}
# name, ptype = (self._tag_name.split(':') + [''])[:2] # name, ptype = (self._tag_name.split(':') + [''])[:2]
# parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype) # parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype)
rarg = str(self) if not self.children else \ rarg = str(self) if not self.children else \
"(" + ", ".join(repr(child) for child in self.children) + ")" "(" + ", ".join(child.__repr__() for child in self.children) + ")"
return "Node(%s, %s)" % (self.tag_name, rarg) return "Node(%s, %s)" % (self.tag_name, rarg)
...@@ -226,13 +225,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -226,13 +225,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def __eq__(self, other): def __eq__(self, other):
""" """
Equality of nodes: Two nodes are considered as equal, if their tag Equality of nodes: Two nodes are considered as equal, if their tag
name is the same and if their results are equal. name is the same, if their results are equal and if their attributes
and attribute values are the same.
Note: It is not required that two nodes have the same errors attached.
In case you need to check for error equality as well, compare a
serialization that includes error messages, as_sxpr() will do!
""" """
return self.tag_name == other.tag_name and self.result == other.result return self.tag_name == other.tag_name and self.result == other.result \
and self.compare_attr(other)
def __hash__(self): def __hash__(self):
...@@ -339,9 +336,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -339,9 +336,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
@property @property
def content(self) -> str: def content(self) -> str:
""" """
Returns content as string, omitting error messages. If the node has Returns content as string. If the node has child-nodes, the
child-nodes, the string content of the child-nodes is recursively read string content of the child-nodes is recursively read and then
and then concatenated. concatenated.
""" """
if self._content is None: if self._content is None:
if self.children: if self.children:
...@@ -364,7 +361,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -364,7 +361,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Return structure (and content) as S-expression on a single line Return structure (and content) as S-expression on a single line
without any line breaks. without any line breaks.
""" """
return flatten_sxpr(self.as_sxpr(showerrors=False)) return flatten_sxpr(self.as_sxpr())
@property @property
...@@ -395,6 +392,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -395,6 +392,19 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self return self
@property
def attr(self):
"""
Returns a dictionary of XML-attr attached to the node.
"""
try:
if self._xml_attr is None: # cython compatibility
self._xml_attr = OrderedDict()
except AttributeError:
self._xml_attr = OrderedDict()
return self._xml_attr
def attr_active(self) -> bool: def attr_active(self) -> bool:
""" """
Returns True, if XML-Attributes of this node have ever been set Returns True, if XML-Attributes of this node have ever been set
...@@ -408,17 +418,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -408,17 +418,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return False return False
@property def compare_attr(self, other: 'Node') -> bool:
<