10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 3c8c31d4 authored by Eckhart Arnold's avatar Eckhart Arnold

- syntaxtree.py: Error tuple replaces by Error class

- syntaxtree/toolkit: faster error collecting
parent 5927d91d
......@@ -22,3 +22,4 @@ build/
dist/
MANIFEST
playground/*
DevScripts/DHParser.py
......@@ -276,7 +276,7 @@ def add_parser_guard(parser_func):
if location in grammar.recursion_locations__:
if location in parser.visited:
node, rest = parser.visited[location]
# TODO: add a warning about occurence of left-recursion here
# TODO: maybe add a warning about occurrence of left-recursion here?
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
elif grammar.memoization__:
......@@ -704,7 +704,7 @@ class Grammar:
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool
self.left_recursion_handling__ = False # type: bool
self.left_recursion_handling__ = True # type: bool
self._reset__()
# prepare parsers in the class, first
......@@ -866,6 +866,7 @@ class Grammar:
else:
result.add_error(error_str)
result.pos = 0 # calculate all positions
result.finalize_errors(self.document__)
return result
......
......@@ -32,7 +32,7 @@ except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import is_logging, log_dir, StringView, line_col, identity
from DHParser.toolkit import is_logging, log_dir, StringView, linebreaks, line_col, identity
__all__ = ('WHITESPACE_PTYPE',
'MockParser',
......@@ -123,11 +123,31 @@ class ZombieParser(MockParser):
ZOMBIE_PARSER = ZombieParser()
# # Python 3.6:
# class Error(NamedTuple):
# pos: int
# msg: str
Error = NamedTuple('Error', [('pos', int), ('msg', str)])
class Error:
__slots__ = ['message', 'category', 'code', 'pos', 'line', 'column']
ERROR = "error"
WARNING = "warning"
def __init__(self, message: str, category: str='', code: str=''):
self.message = message
self.category = category or Error.ERROR
self.code = code
self.pos = -1
self.line = -1
self.column = -1
def __str__(self):
return ("line: %3i, column: %2i" % (self.line, self.column)
+ ", %s: %s" % (self.category, self.message))
@staticmethod
def from_template(template: str, category: str='', content: Union[tuple, dict]=()):
if isinstance(content, tuple):
return Error(template % content, category, template)
else:
return Error(template.format(**content), category, template)
ChildrenType = Tuple['Node', ...]
StrictResultType = Union[ChildrenType, StringView, str]
......@@ -198,7 +218,7 @@ class Node(collections.abc.Sized):
# self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType
# self.error_flag = False # type: bool
self._errors = [] # type: List[str]
self._errors = [] # type: List[Error]
self.result = result
self._len = len(self._result) if not self.children else \
sum(child._len for child in self.children) # type: int
......@@ -276,26 +296,59 @@ class Node(collections.abc.Sized):
@pos.setter
def pos(self, pos: int):
# assert isinstance(pos, int)
self._pos = pos
offset = 0
# recursively adjust pos-values of all children
for child in self.children:
child.pos = pos + offset
offset += len(child)
# add pos-values to Error-objects
for err in self._errors:
err.pos = pos
@property
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
return self._errors.copy()
# def add_error(self, error_str: str) -> 'Node':
# assert isinstance(error_str, str)
# self._errors.append(error_str)
# self.error_flag = True
# return self
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str)
def add_error(self: 'Node',
template: Union[str, Error],
category: str='',
content: Union[tuple, dict]=()) -> 'Node':
if isinstance(template, Error):
assert not (bool(category) or bool(content))
self._errors.append(template)
else:
self._errors.append(Error.from_template(template, category, content))
self.error_flag = True
return self
def _finalize_errors(self, lbreaks: List[int]):
if self.error_flag:
for err in self._errors:
assert err.pos >= 0
err.line, err.column = line_col(lbreaks, err.pos)
for child in self.children:
child._finalize_errors(lbreaks)
def finalize_errors(self, source_text: Union[StringView, str]):
"""Recursively adds line- and column-numbers to all error objects.
"""
if self.error_flag:
lbreaks = linebreaks(source_text)
self._finalize_errors(lbreaks)
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Returns all errors of this node or any child node in the form
......
......@@ -30,9 +30,11 @@ the directory exists and raises an error if a file with the same name
already exists.
"""
import bisect
import codecs
import collections
import contextlib
import functools
import hashlib
import os
......@@ -58,6 +60,7 @@ __all__ = ('logging',
# 'supress_warnings',
# 'warnings',
# 'repr_call',
'linebreaks',
'line_col',
'error_messages',
'escape_re',
......@@ -165,7 +168,7 @@ class StringView(collections.abc.Sized):
does not work for unicode strings. Hence, the StringView class.
"""
__slots__ = ['text', 'begin', 'end', 'len']
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str
......@@ -173,6 +176,7 @@ class StringView(collections.abc.Sized):
self.end = 0 # type: int
self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0)
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text))
@staticmethod
def real_indices(begin, end, len):
......@@ -190,6 +194,8 @@ class StringView(collections.abc.Sized):
return self.len
def __str__(self):
if self.fullstring_flag: # optimization: avoid slicing/copying
return self.text
return self.text[self.begin:self.end]
def __getitem__(self, index):
......@@ -202,13 +208,33 @@ class StringView(collections.abc.Sized):
def __eq__(self, other):
return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
def count(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.count(sub, start, end)
elif start is None and end is None:
return self.text.count(sub, self.begin, self.end)
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int:
if start is None and end is None:
if self.fullstring_flag:
return self.text.find(sub, start, end)
elif start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if start is None and end is None:
return self.text.rfind(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool:
start += self.begin
end = self.end if end is None else self.begin + end
......@@ -270,15 +296,40 @@ EMPTY_STRING_VIEW = StringView('')
# return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]:
def linebreaks(text: Union[StringView, str]):
lb = [-1]
i = text.find('\n', 0)
while i >= 0:
lb.append(i)
i = text.find('\n', i+1)
lb.append(len(text))
return lb
@functools.singledispatch
def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple.
"""
assert pos <= len(text), str(pos) + " > " + str(len(text)) # can point one character after EOF
if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# assert pos <= len(text), str(pos) + " > " + str(len(text))
line = text.count("\n", 0, pos) + 1
column = pos - text.rfind("\n", 0, pos)
return line, column
@line_col.register(list)
def _line_col(linebreaks: List[int], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF.
"""
if pos < 0 or pos > linebreaks[-1]: # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, linebreaks[-1]))
line = bisect.bisect_left(linebreaks, pos)
column = pos - linebreaks[line-1]
return line, column
def error_messages(source_text, errors) -> List[str]:
"""Returns the sequence or iterator of error objects as an intertor
of error messages with line and column numbers at the beginning.
......@@ -292,8 +343,10 @@ def error_messages(source_text, errors) -> List[str]:
a list that contains all error messages in string form. Each
string starts with "line: [Line-No], column: [Column-No]
"""
return ["line: %3i, column: %2i" % line_col(source_text, err.pos) + ", error: %s" % err.msg
for err in sorted(list(errors))]
for err in errors:
if err.pos >= 0 and err.line < 0:
err.line, err.column = line_col(source_text, err.pos)
return [str(err) for err in sorted(errors, key=lambda err: err.pos)]
def escape_re(s) -> str:
......
......@@ -32,7 +32,7 @@ field = WORD_
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = NESTED_BRACES_STRING
text = { CONTENT_STRING | "{" text "}" }
#######################################################################
......@@ -45,4 +45,4 @@ WORD = /\w+/
WORD_ = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
NESTED_BRACES_STRING = { /(?:\\\{|\\\}|[^}%])+/~ | /\{/ NESTED_BRACES_STRING /\}/ | /(?=%)/~ }
CONTENT_STRING = { [^{}%]+ | /(?=%)/~ }
......@@ -5,7 +5,7 @@
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
definition = symbol §("=" expression)
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
......
......@@ -358,9 +358,28 @@ class TestFlowControlOperators:
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2)
# this should fail, because 'END' is not preceeded by a line feed
# this should fail, because 'END' is not preceded by a line feed
assert cst.error_flag, cst.as_sxpr()
def test_required_error_reporting(self):
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1 = "nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try:
parser_class = grammar_provider(lang1)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
try:
parser_class = grammar_provider(lang2)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -80,7 +80,7 @@ class TestNode:
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree
assert tree == compare_tree, tree.as_sxpr()
def test_copy(self):
cpy = copy.deepcopy(self.unique_tree)
......
......@@ -31,7 +31,7 @@ except ImportError:
sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \
sv_match, sv_search, EMPTY_STRING_VIEW
sv_match, sv_search, EMPTY_STRING_VIEW, linebreaks, line_col, error_messages
class TestStringView:
......@@ -131,7 +131,55 @@ class TestStringView:
assert len(EMPTY_STRING_VIEW[0:1]) == 0
class TestToolkit:
class TestErrorSupport:
def mini_suite(self, s, data, offset):
l, c = line_col(data, 0)
assert (l, c) == (1, 1), str((l, c))
l, c = line_col(data, 0 + offset)
assert (l, c) == (1 + offset, 1), str((l, c))
l, c = line_col(data, 1 + offset)
assert (l, c) == (1 + offset, 2), str((l, c))
l, c = line_col(data, 9 + offset)
assert (l, c) == (1 + offset, 10), str((l, c))
l, c = line_col(data, 10 + offset)
assert (l, c) == (2 + offset, 1), str((l, c))
l, c = line_col(data, 18 + offset)
assert (l, c) == (2 + offset, 9), str((l, c))
l, c = line_col(data, 19 + offset)
assert (l, c) == (2 + offset, 10), str((l, c))
try:
l, c = line_col(data, -1)
assert False, "ValueError expected for negative position."
except ValueError:
pass
try:
l, c = line_col(data, len(s) + 1)
assert False, "ValueError expected for postion > pos(EOF)+1."
except ValueError:
pass
def test_line_col(self):
s = "123456789\n123456789"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789"
self.mini_suite(s, s, 1)
s = "123456789\n123456789\n"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, s, 1)
def test_line_col_bisect(self):
s = "123456789\n123456789"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789"
self.mini_suite(s, linebreaks(s), 1)
s = "123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 1)
class TestLoggingAndLoading:
filename = "tmp/test.py" if os.path.isdir('tmp') else "test/tmp/test.py"
code1 = "x = 46"
code2 = "def f():\n return 46"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment