Commit 3c8c31d4 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- syntaxtree.py: Error tuple replaces by Error class

- syntaxtree/toolkit: faster error collecting
parent 5927d91d
...@@ -22,3 +22,4 @@ build/ ...@@ -22,3 +22,4 @@ build/
dist/ dist/
MANIFEST MANIFEST
playground/* playground/*
DevScripts/DHParser.py
...@@ -276,7 +276,7 @@ def add_parser_guard(parser_func): ...@@ -276,7 +276,7 @@ def add_parser_guard(parser_func):
if location in grammar.recursion_locations__: if location in grammar.recursion_locations__:
if location in parser.visited: if location in parser.visited:
node, rest = parser.visited[location] node, rest = parser.visited[location]
# TODO: add a warning about occurence of left-recursion here # TODO: maybe add a warning about occurrence of left-recursion here?
# don't overwrite any positive match (i.e. node not None) in the cache # don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls! # and don't add empty entries for parsers returning from left recursive calls!
elif grammar.memoization__: elif grammar.memoization__:
...@@ -704,7 +704,7 @@ class Grammar: ...@@ -704,7 +704,7 @@ class Grammar:
self._dirty_flag__ = False # type: bool self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool self.memoization__ = True # type: bool
self.left_recursion_handling__ = False # type: bool self.left_recursion_handling__ = True # type: bool
self._reset__() self._reset__()
# prepare parsers in the class, first # prepare parsers in the class, first
...@@ -866,6 +866,7 @@ class Grammar: ...@@ -866,6 +866,7 @@ class Grammar:
else: else:
result.add_error(error_str) result.add_error(error_str)
result.pos = 0 # calculate all positions result.pos = 0 # calculate all positions
result.finalize_errors(self.document__)
return result return result
......
...@@ -32,7 +32,7 @@ except ImportError: ...@@ -32,7 +32,7 @@ except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \ from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import is_logging, log_dir, StringView, line_col, identity from DHParser.toolkit import is_logging, log_dir, StringView, linebreaks, line_col, identity
__all__ = ('WHITESPACE_PTYPE', __all__ = ('WHITESPACE_PTYPE',
'MockParser', 'MockParser',
...@@ -123,11 +123,31 @@ class ZombieParser(MockParser): ...@@ -123,11 +123,31 @@ class ZombieParser(MockParser):
ZOMBIE_PARSER = ZombieParser() ZOMBIE_PARSER = ZombieParser()
# # Python 3.6: class Error:
# class Error(NamedTuple): __slots__ = ['message', 'category', 'code', 'pos', 'line', 'column']
# pos: int
# msg: str ERROR = "error"
Error = NamedTuple('Error', [('pos', int), ('msg', str)]) WARNING = "warning"
def __init__(self, message: str, category: str='', code: str=''):
self.message = message
self.category = category or Error.ERROR
self.code = code
self.pos = -1
self.line = -1
self.column = -1
def __str__(self):
return ("line: %3i, column: %2i" % (self.line, self.column)
+ ", %s: %s" % (self.category, self.message))
@staticmethod
def from_template(template: str, category: str='', content: Union[tuple, dict]=()):
if isinstance(content, tuple):
return Error(template % content, category, template)
else:
return Error(template.format(**content), category, template)
ChildrenType = Tuple['Node', ...] ChildrenType = Tuple['Node', ...]
StrictResultType = Union[ChildrenType, StringView, str] StrictResultType = Union[ChildrenType, StringView, str]
...@@ -198,7 +218,7 @@ class Node(collections.abc.Sized): ...@@ -198,7 +218,7 @@ class Node(collections.abc.Sized):
# self._result = '' # type: StrictResultType # self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType # self.children = () # type: ChildrenType
# self.error_flag = False # type: bool # self.error_flag = False # type: bool
self._errors = [] # type: List[str] self._errors = [] # type: List[Error]
self.result = result self.result = result
self._len = len(self._result) if not self.children else \ self._len = len(self._result) if not self.children else \
sum(child._len for child in self.children) # type: int sum(child._len for child in self.children) # type: int
...@@ -276,26 +296,59 @@ class Node(collections.abc.Sized): ...@@ -276,26 +296,59 @@ class Node(collections.abc.Sized):
@pos.setter @pos.setter
def pos(self, pos: int): def pos(self, pos: int):
# assert isinstance(pos, int)
self._pos = pos self._pos = pos
offset = 0 offset = 0
# recursively adjust pos-values of all children
for child in self.children: for child in self.children:
child.pos = pos + offset child.pos = pos + offset
offset += len(child) offset += len(child)
# add pos-values to Error-objects
for err in self._errors:
err.pos = pos
@property @property
def errors(self) -> List[Error]: def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors] return self._errors.copy()
# def add_error(self, error_str: str) -> 'Node':
# assert isinstance(error_str, str)
# self._errors.append(error_str)
# self.error_flag = True
# return self
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str) def add_error(self: 'Node',
self._errors.append(error_str) template: Union[str, Error],
category: str='',
content: Union[tuple, dict]=()) -> 'Node':
if isinstance(template, Error):
assert not (bool(category) or bool(content))
self._errors.append(template)
else:
self._errors.append(Error.from_template(template, category, content))
self.error_flag = True self.error_flag = True
return self return self
def _finalize_errors(self, lbreaks: List[int]):
if self.error_flag:
for err in self._errors:
assert err.pos >= 0
err.line, err.column = line_col(lbreaks, err.pos)
for child in self.children:
child._finalize_errors(lbreaks)
def finalize_errors(self, source_text: Union[StringView, str]):
"""Recursively adds line- and column-numbers to all error objects.
"""
if self.error_flag:
lbreaks = linebreaks(source_text)
self._finalize_errors(lbreaks)
def collect_errors(self, clear_errors=False) -> List[Error]: def collect_errors(self, clear_errors=False) -> List[Error]:
""" """
Returns all errors of this node or any child node in the form Returns all errors of this node or any child node in the form
......
...@@ -30,9 +30,11 @@ the directory exists and raises an error if a file with the same name ...@@ -30,9 +30,11 @@ the directory exists and raises an error if a file with the same name
already exists. already exists.
""" """
import bisect
import codecs import codecs
import collections import collections
import contextlib import contextlib
import functools
import hashlib import hashlib
import os import os
...@@ -58,6 +60,7 @@ __all__ = ('logging', ...@@ -58,6 +60,7 @@ __all__ = ('logging',
# 'supress_warnings', # 'supress_warnings',
# 'warnings', # 'warnings',
# 'repr_call', # 'repr_call',
'linebreaks',
'line_col', 'line_col',
'error_messages', 'error_messages',
'escape_re', 'escape_re',
...@@ -165,7 +168,7 @@ class StringView(collections.abc.Sized): ...@@ -165,7 +168,7 @@ class StringView(collections.abc.Sized):
does not work for unicode strings. Hence, the StringView class. does not work for unicode strings. Hence, the StringView class.
""" """
__slots__ = ['text', 'begin', 'end', 'len'] __slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str self.text = text # type: str
...@@ -173,6 +176,7 @@ class StringView(collections.abc.Sized): ...@@ -173,6 +176,7 @@ class StringView(collections.abc.Sized):
self.end = 0 # type: int self.end = 0 # type: int
self.begin, self.end = StringView.real_indices(begin, end, len(text)) self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) self.len = max(self.end - self.begin, 0)
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text))
@staticmethod @staticmethod
def real_indices(begin, end, len): def real_indices(begin, end, len):
...@@ -190,6 +194,8 @@ class StringView(collections.abc.Sized): ...@@ -190,6 +194,8 @@ class StringView(collections.abc.Sized):
return self.len return self.len
def __str__(self): def __str__(self):
if self.fullstring_flag: # optimization: avoid slicing/copying
return self.text
return self.text[self.begin:self.end] return self.text[self.begin:self.end]
def __getitem__(self, index): def __getitem__(self, index):
...@@ -202,13 +208,33 @@ class StringView(collections.abc.Sized): ...@@ -202,13 +208,33 @@ class StringView(collections.abc.Sized):
def __eq__(self, other): def __eq__(self, other):
return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
def count(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.count(sub, start, end)
elif start is None and end is None:
return self.text.count(sub, self.begin, self.end)
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int: def find(self, sub, start=None, end=None) -> int:
if start is None and end is None: if self.fullstring_flag:
return self.text.find(sub, start, end)
elif start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin return self.text.find(sub, self.begin, self.end) - self.begin
else: else:
start, end = StringView.real_indices(start, end, self.len) start, end = StringView.real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int:
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if start is None and end is None:
return self.text.rfind(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool: def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool:
start += self.begin start += self.begin
end = self.end if end is None else self.begin + end end = self.end if end is None else self.begin + end
...@@ -270,15 +296,40 @@ EMPTY_STRING_VIEW = StringView('') ...@@ -270,15 +296,40 @@ EMPTY_STRING_VIEW = StringView('')
# return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list)) # return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]: def linebreaks(text: Union[StringView, str]):
lb = [-1]
i = text.find('\n', 0)
while i >= 0:
lb.append(i)
i = text.find('\n', i+1)
lb.append(len(text))
return lb
@functools.singledispatch
def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple. """Returns the position within a text as (line, column)-tuple.
""" """
assert pos <= len(text), str(pos) + " > " + str(len(text)) # can point one character after EOF if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# assert pos <= len(text), str(pos) + " > " + str(len(text))
line = text.count("\n", 0, pos) + 1 line = text.count("\n", 0, pos) + 1
column = pos - text.rfind("\n", 0, pos) column = pos - text.rfind("\n", 0, pos)
return line, column return line, column
@line_col.register(list)
def _line_col(linebreaks: List[int], pos: int) -> Tuple[int, int]:
"""Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF.
"""
if pos < 0 or pos > linebreaks[-1]: # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, linebreaks[-1]))
line = bisect.bisect_left(linebreaks, pos)
column = pos - linebreaks[line-1]
return line, column
def error_messages(source_text, errors) -> List[str]: def error_messages(source_text, errors) -> List[str]:
"""Returns the sequence or iterator of error objects as an intertor """Returns the sequence or iterator of error objects as an intertor
of error messages with line and column numbers at the beginning. of error messages with line and column numbers at the beginning.
...@@ -292,8 +343,10 @@ def error_messages(source_text, errors) -> List[str]: ...@@ -292,8 +343,10 @@ def error_messages(source_text, errors) -> List[str]:
a list that contains all error messages in string form. Each a list that contains all error messages in string form. Each
string starts with "line: [Line-No], column: [Column-No] string starts with "line: [Line-No], column: [Column-No]
""" """
return ["line: %3i, column: %2i" % line_col(source_text, err.pos) + ", error: %s" % err.msg for err in errors:
for err in sorted(list(errors))] if err.pos >= 0 and err.line < 0:
err.line, err.column = line_col(source_text, err.pos)
return [str(err) for err in sorted(errors, key=lambda err: err.pos)]
def escape_re(s) -> str: def escape_re(s) -> str:
......
...@@ -32,7 +32,7 @@ field = WORD_ ...@@ -32,7 +32,7 @@ field = WORD_
content = "{" text "}" | plain_content content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING plain_content = COMMA_TERMINATED_STRING
text = NESTED_BRACES_STRING text = { CONTENT_STRING | "{" text "}" }
####################################################################### #######################################################################
...@@ -45,4 +45,4 @@ WORD = /\w+/ ...@@ -45,4 +45,4 @@ WORD = /\w+/
WORD_ = /\w+/~ WORD_ = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~ NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ } COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
NESTED_BRACES_STRING = { /(?:\\\{|\\\}|[^}%])+/~ | /\{/ NESTED_BRACES_STRING /\}/ | /(?=%)/~ } CONTENT_STRING = { [^{}%]+ | /(?=%)/~ }
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
@ literalws = right # trailing whitespace of literals will be ignored tacitly @ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression definition = symbol §("=" expression)
directive = "@" §symbol §"=" ( regexp | literal | list_ ) directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term } expression = term { "|" term }
......
...@@ -358,9 +358,28 @@ class TestFlowControlOperators: ...@@ -358,9 +358,28 @@ class TestFlowControlOperators:
cst = parser(self.t1) cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr() assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2) cst = parser(self.t2)
# this should fail, because 'END' is not preceeded by a line feed # this should fail, because 'END' is not preceded by a line feed
assert cst.error_flag, cst.as_sxpr() assert cst.error_flag, cst.as_sxpr()
def test_required_error_reporting(self):
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1 = "nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try:
parser_class = grammar_provider(lang1)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
try:
parser_class = grammar_provider(lang2)
assert False, "Compilation error expected."
except CompilationError as error:
pass
# print(error)
if __name__ == "__main__": if __name__ == "__main__":
from DHParser.testing import runner from DHParser.testing import runner
......
...@@ -80,7 +80,7 @@ class TestNode: ...@@ -80,7 +80,7 @@ class TestNode:
tree = parser("20 / 4 * 3") tree = parser("20 / 4 * 3")
traverse(tree, att) traverse(tree, att)
compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))") compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree assert tree == compare_tree, tree.as_sxpr()
def test_copy(self): def test_copy(self):
cpy = copy.deepcopy(self.unique_tree) cpy = copy.deepcopy(self.unique_tree)
......
...@@ -31,7 +31,7 @@ except ImportError: ...@@ -31,7 +31,7 @@ except ImportError:
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \ from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \
sv_match, sv_search, EMPTY_STRING_VIEW sv_match, sv_search, EMPTY_STRING_VIEW, linebreaks, line_col, error_messages
class TestStringView: class TestStringView:
...@@ -131,7 +131,55 @@ class TestStringView: ...@@ -131,7 +131,55 @@ class TestStringView:
assert len(EMPTY_STRING_VIEW[0:1]) == 0 assert len(EMPTY_STRING_VIEW[0:1]) == 0
class TestToolkit: class TestErrorSupport:
def mini_suite(self, s, data, offset):
l, c = line_col(data, 0)
assert (l, c) == (1, 1), str((l, c))
l, c = line_col(data, 0 + offset)
assert (l, c) == (1 + offset, 1), str((l, c))
l, c = line_col(data, 1 + offset)
assert (l, c) == (1 + offset, 2), str((l, c))
l, c = line_col(data, 9 + offset)
assert (l, c) == (1 + offset, 10), str((l, c))
l, c = line_col(data, 10 + offset)
assert (l, c) == (2 + offset, 1), str((l, c))
l, c = line_col(data, 18 + offset)
assert (l, c) == (2 + offset, 9), str((l, c))
l, c = line_col(data, 19 + offset)
assert (l, c) == (2 + offset, 10), str((l, c))
try:
l, c = line_col(data, -1)
assert False, "ValueError expected for negative position."
except ValueError:
pass
try:
l, c = line_col(data, len(s) + 1)
assert False, "ValueError expected for postion > pos(EOF)+1."
except ValueError:
pass
def test_line_col(self):
s = "123456789\n123456789"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789"
self.mini_suite(s, s, 1)
s = "123456789\n123456789\n"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, s, 1)
def test_line_col_bisect(self):
s = "123456789\n123456789"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789"
self.mini_suite(s, linebreaks(s), 1)
s = "123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, linebreaks(s), 1)
class TestLoggingAndLoading:
filename = "tmp/test.py" if os.path.isdir('tmp') else "test/tmp/test.py" filename = "tmp/test.py" if os.path.isdir('tmp') else "test/tmp/test.py"
code1 = "x = 46" code1 = "x = 46"
code2 = "def f():\n return 46" code2 = "def f():\n return 46"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment