The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 3c8c31d4 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- syntaxtree.py: Error tuple replaces by Error class

- syntaxtree/toolkit: faster error collecting
parent 5927d91d
......@@ -22,3 +22,4 @@ build/
dist/
MANIFEST
playground/*
DevScripts/DHParser.py
......@@ -276,7 +276,7 @@ def add_parser_guard(parser_func):
if location in grammar.recursion_locations__:
if location in parser.visited:
node, rest = parser.visited[location]
# TODO: add a warning about occurence of left-recursion here
# TODO: maybe add a warning about occurrence of left-recursion here?
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
elif grammar.memoization__:
......@@ -704,7 +704,7 @@ class Grammar:
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool
self.left_recursion_handling__ = False # type: bool
self.left_recursion_handling__ = True # type: bool
self._reset__()
# prepare parsers in the class, first
......@@ -866,6 +866,7 @@ class Grammar:
else:
result.add_error(error_str)
result.pos = 0 # calculate all positions
result.finalize_errors(self.document__)
return result
......
......@@ -32,7 +32,7 @@ except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import is_logging, log_dir, StringView, line_col, identity
from DHParser.toolkit import is_logging, log_dir, StringView, linebreaks, line_col, identity
__all__ = ('WHITESPACE_PTYPE',
'MockParser',
......@@ -123,11 +123,31 @@ class ZombieParser(MockParser):
ZOMBIE_PARSER = ZombieParser()
# # Python 3.6:
# class Error(NamedTuple):
# pos: int
# msg: str
Error = NamedTuple('Error', [('pos', int), ('msg', str)])
class Error:
__slots__ = ['message', 'category', 'code', 'pos', 'line', 'column']
ERROR = "error"
WARNING = "warning"
def __init__(self, message: str, category: str='', code: str=''):
self.message = message
self.category = category or Error.ERROR
self.code = code
self.pos = -1
self.line = -1
self.column = -1
def __str__(self):
return ("line: %3i, column: %2i" % (self.line, self.column)
+ ", %s: %s" % (self.category, self.message))
@staticmethod
def from_template(template: str, category: str='', content: Union[tuple, dict]=()):
if isinstance(content, tuple):
return Error(template % content, category, template)
else:
return Error(template.format(**content), category, template)
ChildrenType = Tuple['Node', ...]
StrictResultType = Union[ChildrenType, StringView, str]
......@@ -198,7 +218,7 @@ class Node(collections.abc.Sized):
# self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType
# self.error_flag = False # type: bool
self._errors = [] # type: List[str]
self._errors = [] # type: List[Error]
self.result = result
self._len = len(self._result) if not self.children else \
sum(child._len for child in self.children) # type: int
......@@ -276,26 +296,59 @@ class Node(collections.abc.Sized):
@pos.setter
def pos(self, pos: int):
# assert isinstance(pos, int)
self._pos = pos
offset = 0
# recursively adjust pos-values of all children
for child in self.children:
child.pos = pos + offset
offset += len(child)
# add pos-values to Error-objects
for err in self._errors:
err.pos = pos
@property
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
return self._errors.copy()
# def add_error(self, error_str: str) -> 'Node':
# assert isinstance(error_str, str)
# self._errors.append(error_str)
# self.error_flag = True
# return self
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str)
def add_error(self: 'Node',
template: Union[str, Error],
category: str='',
content: Union[tuple, dict]=()) -> 'Node':
if isinstance(template, Error):
assert not (bool(category) or bool(content))
self._errors.append(template)
else:
self._errors.append(Error.from_template(template, category, content))
self.error_flag = True
return self
def _finalize_errors(self, lbreaks: List[int]):
if self.error_flag:
for err in self._errors:
assert err.pos >= 0
err.line, err.column = line_col(lbreaks, err.pos)
for child in self.children:
child._finalize_errors(lbreaks)
def finalize_errors(self, source_text: Union[StringView, str]):
"""Recursively adds line- and column-numbers to all error objects.
"""
if self.error_flag:
lbreaks = linebreaks(source_text)
self._finalize_errors(lbreaks)
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Returns all errors of this node or any child node in the form
......
......@@ -30,9 +30,11 @@ the directory exists and raises an error if a file with the same name
already exists.
"""
import bisect
import codecs
import collections
import contextlib
import functools
import hashlib
import os
......@@ -58,6 +60,7 @@ __all__ = ('logging',
# 'supress_warnings',
# 'warnings',
# 'repr_call',
'linebreaks',
'line_col',
'error_messages',
'escape_re',
......@@ -165,7 +168,7 @@ class StringView(collections.abc.Sized):
does not work for unicode strings. Hence, the StringView class.
"""
__slots__ = ['text', 'begin', 'end', 'len']
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str
......@@ -173,6 +176,7 @@ class StringView(collections.abc.Sized):
self.end = 0 # type: int
self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0)
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text))
@staticmethod
def real_indices(begin, end, len):
......@@ -190,6 +194,8 @@ class StringView(collections.abc.Sized):
return self.len
def __str__(self):
if self.fullstring_flag: # optimization: avoid slicing/copying
return self.text
return self.text[self.begin:self.end]
def __getitem__(self, index):
......@@ -202,13 +208,33 @@ class StringView(collections.abc.Sized):
def __eq__(self, other):
return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
def count(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.count(sub, start, end)
elif start is None and end is None:
return self.text.count(sub, self.begin, self.end)
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int:
if start is None and end is None:
if self.fullstring_flag:
return self.text.find(sub, start, end)
elif start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if start is None and end is None:
return self.text.rfind(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool:
start += self.begin
end = self.end if end is None else self.begin + end
......@@ -270,15 +296,40 @@ EMPTY_STRING_VIEW = StringView('')
# return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]:
def linebreaks(text: Union[StringView, str]):
lb = [-1]
i = text.find('\n', 0)
while i >= 0:
lb.append(i)
i = text.find('\n', i+1)
lb.append(len(text))
return lb
@functools.singledispatch
def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple.
"""
assert pos <= len(text), str(pos) + " > " + str(len(text)) # can point one character after EOF
if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# assert pos <= len(text), str(pos) + " > " + str(len(text))
line = text.count("\n", 0, pos) + 1
column = pos - text.rfind("\n", 0, pos)
return line, column
@line_col.register(list)
def _line_col(linebreaks: List[int], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF.
"""
if pos < 0 or pos > linebreaks[-1]: # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, linebreaks[-1]))
line = bisect.bisect_left(linebreaks, pos)
column = pos - linebreaks[line-1]
return line, column
def error_messages(source_text, errors) -> List[str]:
"""Returns the sequence or iterator of error objects as an intertor
of error messages with line and column numbers at the beginning.
......@@ -292,8 +343,10 @@ def error_messages(source_text, errors) -> List[str]:
a list that contains all error messages in string form. Each
string starts with "line: [Line-No], column: [Column-No]
"""
return ["line: %3i, column: %2i" % line_col(source_text, err.pos) + ", error: %s" % err.msg
for err in sorted(list(errors))]
for err in errors:
if err.pos >= 0 and err.line < 0:
err.line, err.column = line_col(source_text, err.pos)
return [str(err) for err in sorted(errors, key=lambda err: err.pos)]
def escape_re(s) -> str:
......
......@@ -32,7 +32,7 @@ field = WORD_
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = NESTED_BRACES_STRING
text = { CONTENT_STRING | "{" text "}" }
#######################################################################
......@@ -45,4 +45,4 @@ WORD = /\w+/
WORD_ = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
NESTED_BRACES_STRING = { /(?:\\\{|\\\}|[^}%])+/~ | /\{/ NESTED_BRACES_STRING /\}/ | /(?=%)/~ }
CONTENT_STRING = { [^{}%]+ | /(?=%)/~ }
......@@ -5,7 +5,7 @@
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
definition = symbol §("=" expression)
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
......
......@@ -358,9 +358,28 @@ class TestFlowControlOperators:
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2)
# this should fail, because 'END' is not preceeded by a line feed
# this should fail, because 'END' is not preceded by a line feed
assert cst.error_flag, cst.as_sxpr()
def test_required_error_reporting(self):
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1 = "nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try:
parser_class = grammar_provider(lang1)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
try:
parser_class = grammar_provider(lang2)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -80,7 +80,7 @@ class TestNode:
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree
assert tree == compare_tree, tree.as_sxpr()
def test_copy(self):
cpy = copy.deepcopy(self.unique_tree)
......
......@@ -31,7 +31,7 @@ except ImportError:
sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \
sv_match, sv_search, EMPTY_STRING_VIEW
sv_match, sv_search, EMPTY_STRING_VIEW, linebreaks, line_col, error_messages
class TestStringView:
......@@ -131,7 +131,55 @@ class TestStringView:
assert len(EMPTY_STRING_VIEW[0:1]) == 0
class TestToolkit:
class TestErrorSupport:
def mini_suite(self, s, data, offset):
l, c = line_col(data, 0)
assert (l, c) == (1, 1), str((l, c))
l, c = line_col(data, 0 + offset)
assert (l, c) == (1 + offset, 1), str((l, c))
l, c = line_col(data, 1 + offset)
assert (l, c) == (1 + offset, 2), str((l, c))
l, c = line_col(data, 9 + offset)
assert (l, c) == (1 + offset, 10), str((l, c))
l, c = line_col(data, 10 + offset)
assert (l, c) == (2 + offset, 1), str((l, c))
l, c = line_col(data, 18 + offset)
assert (l, c) == (2 + offset, 9), str((l, c))
l, c = line_col(data, 19 + offset)
assert (l, c) == (2 + offset, 10), str((l, c))
try:
l, c = line_col(data, -1)
assert False, "ValueError expected for negative position."
except ValueError:
pass
try:
l, c = line_col(data, len(s) + 1)
assert False, "ValueError expected for postion > pos(EOF)+1."
except ValueError:
pass
def test_line_col(self):
s = "123456789\n123456789"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789"
self.mini_suite(s, s, 1)
s = "123456789\n123456789\n"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, s, 1)
def test_line_col_bisect(self):
s = "123456789\n123456789"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789"
self.mini_suite(s, linebreaks(s), 1)
s = "123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 1)
class TestLoggingAndLoading:
filename = "tmp/test.py" if os.path.isdir('tmp') else "test/tmp/test.py"
code1 = "x = 46"
code2 = "def f():\n return 46"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment