Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit b30733c0 authored by eckhart's avatar eckhart
Browse files

- bug fixes

parent 885e2ce4
......@@ -507,7 +507,7 @@ class EBNFCompiler(Compiler):
if entry not in symbols and not entry.startswith(":"):
messages.append(Error(('Symbol "%s" is not defined in grammar %s but appears in '
'the transformation table!') % (entry, self.grammar_name),
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE), 0)
return messages
......@@ -647,13 +647,13 @@ class EBNFCompiler(Compiler):
self.tree.add_error(node, 'Symbol "%s" is a reserved symbol.' % rule)
elif not sane_parser_name(rule):
self.tree.add_error(node, 'Illegal symbol "%s". Symbols must not start or '
' end with a doube underscore "__".' % rule)
' end with a doube underscore "__".' % rule)
elif rule in self.directives['tokens']:
self.add_error(node, 'Symbol "%s" has already been defined as '
'a preprocessor token.' % rule)
self.tree.add_error(node, 'Symbol "%s" has already been defined as '
'a preprocessor token.' % rule)
elif keyword.iskeyword(rule):
self.add_error(node, 'Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the future.)')
self.tree.add_error(node, 'Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the future.)')
try:
self.current_symbols = [node]
self.rules[rule] = self.current_symbols
......@@ -669,7 +669,7 @@ class EBNFCompiler(Compiler):
trace = str(extract_tb(error.__traceback__)[-1])
errmsg = "%s (TypeError: %s; %s)\n%s" \
% (EBNFCompiler.AST_ERROR, str(error), trace, node.as_sxpr())
self.add_error(node, errmsg)
self.tree.add_error(node, errmsg)
rule, defn = rule + ':error', '"' + errmsg + '"'
return rule, defn
......@@ -685,8 +685,8 @@ class EBNFCompiler(Compiler):
try:
re.compile(rx)
except Exception as re_error:
self.add_error(node, "malformed regular expression %s: %s" %
(repr(rx), str(re_error)))
self.tree.add_error(node, "malformed regular expression %s: %s" %
(repr(rx), str(re_error)))
return rx
......@@ -696,36 +696,36 @@ class EBNFCompiler(Compiler):
if key not in self.REPEATABLE_DIRECTIVES:
if key in self.defined_directives:
self.add_error(node, 'Directive "%s" has already been defined earlier. '
% key + 'Later definition will be ignored!',
code=Error.REDEFINED_DIRECTIVE_WARNING)
self.tree.add_error(node, 'Directive "%s" has already been defined earlier. '
% key + 'Later definition will be ignored!',
code=Error.REDEFINED_DIRECTIVE_WARNING)
return ""
self.defined_directives.add(key)
if key in {'comment', 'whitespace'}:
if node.children[1].parser.name == "list_":
if len(node.children[1].result) != 1:
self.add_error(node, 'Directive "%s" must have one, but not %i values.'
% (key, len(node.children[1].result)))
self.tree.add_error(node, 'Directive "%s" must have one, but not %i values.'
% (key, len(node.children[1].result)))
value = self.compile(node.children[1]).pop()
if key == 'whitespace' and value in EBNFCompiler.WHITESPACE:
value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex
else:
self.add_error(node,
'Value "%s" not allowed for directive "%s".' % (value, key))
self.tree.add_error(node, 'Value "%s" not allowed for directive "%s".'
% (value, key))
else:
value = node.children[1].content.strip("~") # cast(str, node.children[
# 1].result).strip("~")
if value != node.children[1].content: # cast(str, node.children[1].result):
self.add_error(node, "Whitespace marker '~' not allowed in definition of "
"%s regular expression." % key)
self.tree.add_error(node, "Whitespace marker '~' not allowed in definition "
"of %s regular expression." % key)
if value[0] + value[-1] in {'""', "''"}:
value = escape_re(value[1:-1])
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
if key == 'whitespace' and not re.match(value, ''):
self.add_error(node, "Implicit whitespace should always "
"match the empty string, /%s/ does not." % value)
self.tree.add_error(node, "Implicit whitespace should always "
"match the empty string, /%s/ does not." % value)
self.directives[key] = value
elif key == 'ignorecase':
......@@ -740,8 +740,8 @@ class EBNFCompiler(Compiler):
value = {item.lower() for item in self.compile(node.children[1])}
if ((value - {'left', 'right', 'both', 'none'})
or ('none' in value and len(value) > 1)):
self.add_error(node, 'Directive "literalws" allows only '
'`left`, `right`, `both` or `none`, not `%s`' % ", ".join(value))
self.tree.add_error(node, 'Directive "literalws" allows only `left`, `right`, '
'`both` or `none`, not `%s`' % ", ".join(value))
wsp = {'left', 'right'} if 'both' in value \
else {} if 'none' in value else value
self.directives[key] = list(wsp)
......@@ -750,21 +750,21 @@ class EBNFCompiler(Compiler):
tokens = self.compile(node.children[1])
redeclared = self.directives['tokens'] & tokens
if redeclared:
self.add_error(node, 'Tokens %s have already been declared earlier. '
% str(redeclared) + 'Later declaration will be ignored',
code=Error.REDECLARED_TOKEN_WARNING)
self.tree.add_error(node, 'Tokens %s have already been declared earlier. '
% str(redeclared) + 'Later declaration will be ignored',
code=Error.REDECLARED_TOKEN_WARNING)
self.directives['tokens'] |= tokens - redeclared
elif key.endswith('_filter'):
filter_set = self.compile(node.children[1])
if not isinstance(filter_set, set) or len(filter_set) != 1:
self.add_error(node.pos, 'Directive "%s" accepts exactly on symbol, not %s'
% (key, str(filter_set)))
self.tree.add_error(node, 'Directive "%s" accepts exactly on symbol, not %s'
% (key, str(filter_set)))
self.directives['filter'][key[:-7]] = filter_set.pop()
else:
self.add_error(node, 'Unknown directive %s ! (Known ones are %s .)' %
(key, ', '.join(list(self.directives.keys()))))
self.tree.add_error(node, 'Unknown directive %s ! (Known ones are %s .)' %
(key, ', '.join(list(self.directives.keys()))))
return ""
......@@ -794,11 +794,11 @@ class EBNFCompiler(Compiler):
if nd.parser.ptype == TOKEN_PTYPE and nd.content == "§":
mandatory_marker.append(len(filtered_children))
# if len(filtered_children) == 0:
# self.add_error(nd.pos, 'First item of a series should not be mandatory.',
# Error.WARNING)
# self.tree.add_error(nd.pos, 'First item of a series should not be mandatory.',
# Error.WARNING)
if len(mandatory_marker) > 1:
self.add_error(nd, 'One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.', Error.WARNING)
self.tree.add_error(nd, 'One mandatory marker (§) sufficient to declare '
'the rest of the series as mandatory.', Error.WARNING)
else:
filtered_children.append(nd)
saved_result = node.result
......@@ -822,8 +822,8 @@ class EBNFCompiler(Compiler):
assert len(node.children) == 2
arg = node.children[-1]
if arg.parser.name != 'symbol':
self.add_error(node, ('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser)))
self.tree.add_error(node, ('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['filter']:
custom_args = ['rfilter=%s' % self.directives['filter'][str(arg)]]
......@@ -856,14 +856,14 @@ class EBNFCompiler(Compiler):
break
if (nd.parser.name != "regexp" or nd.content[:1] != '/'
or nd.content[-1:] != '/'):
self.add_error(node, "Lookbehind-parser can only be used with RegExp-"
"parsers, not with: " + nd.parser.name + nd.parser.ptype)
self.tree.add_error(node, "Lookbehind-parser can only be used with RegExp"
"-parsers, not: " + nd.parser.name + nd.parser.ptype)
if not result.startswith('RegExp('):
self.deferred_tasks.append(lambda: check(node))
return result
except KeyError:
self.add_error(node, 'Unknown prefix "%s".' % prefix)
self.tree.add_error(node, 'Unknown prefix "%s".' % prefix)
return ""
......@@ -889,15 +889,15 @@ class EBNFCompiler(Compiler):
nd = node.children[0]
for child in nd.children:
if child.parser.ptype == TOKEN_PTYPE and nd.content == "§":
self.add_error(node, "Unordered sequences can't contain mandatory (§) items.")
self.tree.add_error(node, "No mandatory items § allowed in Unordered sequences.")
args = ', '.join(self.compile(child) for child in nd.children)
if nd.parser.name == "term":
return "AllOf(" + args + ")"
elif nd.parser.name == "expression":
return "SomeOf(" + args + ")"
else:
self.add_error(node,
"Unordered sequence or alternative requires at least two elements.")
self.tree.add_error(node, "Unordered sequence or alternative "
"requires at least two elements.")
return ""
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
......@@ -951,7 +951,7 @@ class EBNFCompiler(Compiler):
trace = str(extract_tb(error.__traceback__)[-1])
errmsg = "%s (AttributeError: %s; %s)\n%s" \
% (EBNFCompiler.AST_ERROR, str(error), trace, node.as_sxpr())
self.add_error(node, errmsg)
self.tree.add_error(node, errmsg)
return '"' + errmsg + '"'
return parser + ', '.join([arg] + name) + ')'
......
......@@ -41,7 +41,7 @@ import bisect
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView
from DHParser.toolkit import typing
from typing import Iterable, Iterator, Union, Tuple, List
from typing import Iterable, Iterator, Union, Tuple, List, Optional
__all__ = ('Error',
'is_error',
......@@ -54,7 +54,7 @@ __all__ = ('Error',
class Error:
__slots__ = ['message', 'level', 'code', 'pos', 'orig_pos', 'line', 'column']
__slots__ = ['message', 'level', 'code', '_pos', 'orig_pos', 'line', 'column', '_node']
# error levels
......@@ -75,14 +75,19 @@ class Error:
MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, code: int = ERROR, pos: int = -1,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
orig_pos: int = -1, line: int = -1, column: int = -1,
node: Optional['Node'] = None) -> None:
self.message = message
assert code >= 0
self.code = code
self.pos = pos
self._pos = pos
self.orig_pos = orig_pos
self.line = line
self.column = column
self._node = node
if node is not None:
assert self._pos < 0 or self._pos == node._pos
self._pos = node._pos
def __str__(self):
prefix = ''
......@@ -94,6 +99,15 @@ class Error:
return 'Error("%s", %s, %i, %i, %i, %i)' \
% (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)
@property
def pos(self):
if self._pos < 0:
assert self._node and self._node.pos >= 0
self._pos = self._node.pos # lazy evaluation of position
self._node = None # forget node to allow GC to free memory
return self._pos
@property
def severity(self):
"""Returns a string representation of the error level, e.g. "warning"."""
......
......@@ -432,7 +432,7 @@ class Node(collections.abc.Sized):
return self._pos
def init_pos(self, pos: int, overwrite: bool = True) -> 'Node':
def init_pos(self, pos: int) -> 'Node':
"""
(Re-)initialize position value. Usually, the parser guard
(`parsers.add_parser_guard()`) takes care of assigning the
......@@ -442,16 +442,12 @@ class Node(collections.abc.Sized):
This function recursively reassigns the position values
of the child nodes, too.
"""
if overwrite or self._pos < 0:
self._pos = pos
for err in self.errors:
err.pos = pos
else:
assert self._pos == pos, str("%i != %i" % (self._pos, pos))
assert self._pos < 0 or self.pos == pos, str("pos mismatch %i != %i" % (self._pos, pos))
self._pos = pos
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
child.init_pos(offset, overwrite)
child.init_pos(offset)
offset = child.pos + len(child)
return self
......@@ -534,6 +530,7 @@ class Node(collections.abc.Sized):
"""
left_bracket, right_bracket, density = ('', '', 1) if compact else ('(', '\n)', 0)
lbreaks = linebreaks(src) if src else [] # type: List[int]
def opening(node) -> str:
"""Returns the opening string for the representation of `node`."""
......@@ -542,7 +539,7 @@ class Node(collections.abc.Sized):
if hasattr(node, '_xml_attr'):
txt.extend(' `(%s "%s")' % (k, v) for k, v in node.attributes.items())
if src:
txt.append(" `(pos %i %i %i)" % (node.pos, *line_col(src, node.add_pos)))
txt.append(" `(pos %i %i %i)" % (node.pos, *line_col(lbreaks, node.pos)))
# if node.error_flag: # just for debugging error collecting
# txt += " HAS ERRORS"
if showerrors and node.errors:
......@@ -686,12 +683,13 @@ class RootNode(Node):
error_flag (int): the highest warning or error level of all errors
that occurred.
"""
def __init__(self):
def __init__(self, node: Optional[Node] = None) -> 'RootNode':
super().__init__(ZOMBIE_PARSER, '')
self.all_errors = []
self.err_nodes = []
self.error_flag = 0
self.error_propagation = False
if node is not None:
self.swallow(node)
# def _propagate_errors(self):
# if not self.all_errors or not self.error_propagation:
......@@ -738,7 +736,7 @@ class RootNode(Node):
message(str): A string with the error message.abs
code(int): An error code to identify the kind of error
"""
error = Error(message, code)
error = Error(message, code, node=node)
self.all_errors.append(error)
self.error_flag = max(self.error_flag, code)
node._errors.append(error)
......@@ -748,9 +746,9 @@ class RootNode(Node):
def collect_errors(self, clear_errors=False) -> List[Error]:
"""Returns the list of errors, ordered bv their position.
"""
for node in self.err_nodes: # lazy evaluation of positions
for err in node.errors:
err.pos = node.pos
# for node in self.err_nodes: # lazy evaluation of positions
# for err in node.errors: # moved to error.Error.pos
# err.pos = node.pos
self.all_errors.sort(key=lambda e: e.pos)
errors = self.all_errors
if clear_errors:
......@@ -817,13 +815,9 @@ def parse_sxpr(sxpr: str) -> Node:
tagname = sxpr[:end]
name, class_name = (tagname.split(':') + [''])[:2]
sxpr = sxpr[end:].strip()
pos = 0
attributes = OrderedDict()
if sxpr[0] == '(':
result = tuple(inner_parser(block) for block in next_block(sxpr))
for node in result:
node._pos = pos
pos += len(node)
else:
lines = []
while sxpr and sxpr[0:1] != ')':
......@@ -864,7 +858,6 @@ def parse_sxpr(sxpr: str) -> Node:
node = Node(mock_parsers.setdefault(tagname, MockParser(name, ':' + class_name)), result)
if attributes:
node.attributes.update(attributes)
node._pos = pos
return node
return inner_parser(sxpr)
......
......@@ -39,7 +39,7 @@ import sys
from DHParser.error import is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
from DHParser.parse import UnknownParserError
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
from DHParser.toolkit import re, typing
from typing import Tuple
......@@ -354,7 +354,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try:
cst = parser(test_code, parser_name)
except UnknownParserError as upe:
cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
node = Node(ZOMBIE_PARSER, "").init_pos(0)
cst = RootNode().swallow(node).add_error(node, str(upe))
if not is_error(cst.error_flag):
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
......
......@@ -888,8 +888,8 @@ assert_has_children = error_on(lambda nd: nd.children, 'Element "%s" has no chil
def assert_content(context: List[Node], regexp: str):
node = context[-1]
if not has_content(context, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), node.content))
context[0].add_error(node, 'Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), node.content))
@transformation_factory
......@@ -897,8 +897,8 @@ def require(context: List[Node], child_tags: AbstractSet[str]):
node = context[-1]
for child in node.children:
if child.tag_name not in child_tags:
node.add_error('Element "%s" is not allowed inside "%s".' %
(child.parser.name, node.parser.name))
context[0].add_error(node, 'Element "%s" is not allowed inside "%s".' %
(child.parser.name, node.parser.name))
@transformation_factory
......@@ -906,6 +906,6 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
node = context[-1]
for child in node.children:
if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
context[0].add_error(node, 'Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
......@@ -23,7 +23,7 @@ from DHParser import is_filename, load_if_file, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationDict, TRUE_CONDITION, \
Node, TransformationDict, Whitespace, \
traverse, remove_children_if, merge_children, is_anonymous, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
......@@ -106,13 +106,14 @@ class BibTeXGrammar(Grammar):
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
"""
text = Forward()
source_hash__ = "9c5e3c56e3d420e2ea5885612c07de46"
source_hash__ = "5ce8838ebbb255548cf3e14cd90bae6d"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), RE('(?i)(?=%)')))
NO_BLANK_STRING = RE('(?i)[^ \\t\\n,%]+')
......
......@@ -105,6 +105,7 @@ class EBNFGrammar(Grammar):
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
......
@ whitespace = horizontal
gedicht = bibliographisches { LEERZEILE }+ [serie] §titel text /\s*/ ENDE
bibliographisches = autor §"," [NZ] werk "," [NZ] ort "," [NZ] jahr "."
......
......@@ -23,20 +23,20 @@ limitations under the License.
from DHParser import parse_sxpr, Compiler
class TestCompilerClass:
def test_error_propagations(self):
tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
A = tree
B = next(tree.select(lambda node: str(node) == "1"))
D = next(tree.select(lambda node: node.parser.name == "D"))
F = next(tree.select(lambda node: str(node) == "3"))
B.add_error("Error in child node")
F.add_error("Error in child's child node")
Compiler.propagate_error_flags(tree, lazy=True)
assert A.error_flag
assert not D.error_flag
Compiler.propagate_error_flags(tree, lazy=False)
assert D.error_flag
# class TestCompilerClass:
# def test_error_propagations(self):
# tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
# A = tree
# B = next(tree.select(lambda node: str(node) == "1"))
# D = next(tree.select(lambda node: node.parser.name == "D"))
# F = next(tree.select(lambda node: str(node) == "3"))
# B.add_error("Error in child node")
# F.add_error("Error in child's child node")
# Compiler.propagate_error_flags(tree, lazy=True)
# assert A.error_flag
# assert not D.error_flag
# Compiler.propagate_error_flags(tree, lazy=False)
# assert D.error_flag
if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment