Commit 5da469de authored by eckhart's avatar eckhart
Browse files

- correct error locations when compiling with preprocessor

parent 3dab649e
...@@ -17,9 +17,9 @@ permissions and limitations under the License. ...@@ -17,9 +17,9 @@ permissions and limitations under the License.
""" """
import bisect import bisect
import functools
from typing import Iterable, Iterator, Union, Tuple, List from typing import Iterable, Iterator, Union, Tuple, List
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView from DHParser.stringview import StringView
__all__ = ('Error', __all__ = ('Error',
...@@ -28,11 +28,12 @@ __all__ = ('Error', ...@@ -28,11 +28,12 @@ __all__ = ('Error',
'has_errors', 'has_errors',
'only_errors', 'only_errors',
'linebreaks', 'linebreaks',
'line_col') 'line_col',
'remap_error_locations')
class Error: class Error:
__slots__ = ['message', 'level', 'code', 'pos', 'line', 'column'] __slots__ = ['message', 'level', 'code', 'pos', 'orig_pos', 'line', 'column']
# error levels # error levels
...@@ -49,12 +50,13 @@ class Error: ...@@ -49,12 +50,13 @@ class Error:
MANDATORY_CONTINUATION = 1001 MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, code: int = ERROR, def __init__(self, message: str, code: int = ERROR, pos: int = -1,
pos: int = -1, line: int = -1, column: int = -1) -> None: orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
self.message = message self.message = message
assert code >= 0 assert code >= 0
self.code = code self.code = code
self.pos = pos self.pos = pos
self.orig_pos = orig_pos
self.line = line self.line = line
self.column = column self.column = column
...@@ -65,8 +67,8 @@ class Error: ...@@ -65,8 +67,8 @@ class Error:
return prefix + "%s: %s" % (self.level_str, self.message) return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self): def __repr__(self):
return 'Error("%s", %s, %i, %i, %i)' \ return 'Error("%s", %s, %i, %i, %i, %i)' \
% (self.message, repr(self.code), self.pos, self.line, self.column) % (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)
@property @property
def level_str(self): def level_str(self):
...@@ -110,6 +112,13 @@ def only_errors(messages: Iterable[Error], level: int = Error.ERROR) -> Iterator ...@@ -110,6 +112,13 @@ def only_errors(messages: Iterable[Error], level: int = Error.ERROR) -> Iterator
return (err for err in messages if err.code >= level) return (err for err in messages if err.code >= level)
#######################################################################
#
# Setting of line, column and position properties of error messages.
#
#######################################################################
def linebreaks(text: Union[StringView, str]) -> List[int]: def linebreaks(text: Union[StringView, str]) -> List[int]:
""" """
Returns a list of indices all line breaks in the text. Returns a list of indices all line breaks in the text.
...@@ -123,24 +132,13 @@ def linebreaks(text: Union[StringView, str]) -> List[int]: ...@@ -123,24 +132,13 @@ def linebreaks(text: Union[StringView, str]) -> List[int]:
return lbr return lbr
@functools.singledispatch def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
"""
Returns the position within a text as (line, column)-tuple.
"""
if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
line = text.count("\n", 0, pos) + 1
column = pos - text.rfind("\n", 0, pos)
return line, column
@line_col.register(list)
def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
""" """
Returns the position within a text as (line, column)-tuple based Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF. on a list of all line breaks, including -1 and EOF.
""" """
if not lbreaks and pos >= 0:
return 0, pos
if pos < 0 or pos > lbreaks[-1]: # one character behind EOF is still an allowed position! if pos < 0 or pos > lbreaks[-1]: # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, lbreaks[-1])) raise ValueError('Position %i outside text of length %s !' % (pos, lbreaks[-1]))
line = bisect.bisect_left(lbreaks, pos) line = bisect.bisect_left(lbreaks, pos)
...@@ -148,20 +146,37 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]: ...@@ -148,20 +146,37 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
return line, column return line, column
# def error_messages(source_text:str, errors: List[Error]) -> List[str]: # def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
# """Adds line, column information for error messages, if the position # """
# is given. # Returns the position within a text as (line, column)-tuple.
#
# Args:
# source_text (str): The source text on which the errors occurred.
# (Needed in order to determine the line and column numbers.)
# errors (list): The list of errors as returned by the method
# ``collect_errors()`` of a Node object
# Returns:
# The same list of error messages, which now contain line and
# column numbers.
# """ # """
# for err in errors: # if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
# if err.pos >= 0 and err.line <= 0: # raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# err.line, err.column = line_col(source_text, err.pos) # line = text.count("\n", 0, pos) + 1
# return errors # column = pos - text.rfind("\n", 0, pos)
# return line, column
def remap_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
errors: The list of errors as returned by the method
``collect_errors()`` of a Node object
original_text: The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
source_mapping: A function that maps error positions to their
positions in the original source file.
Returns:
The list of errors. (Returning the list of errors is just syntactical
sugar. Be aware that the line, col and orig_pos attributes have been
changed in place.)
"""
line_breaks = linebreaks(original_text)
for err in errors:
assert err.pos >= 0
err.orig_pos = source_mapping(err.pos)
err.line, err.column = line_col(line_breaks, err.orig_pos)
return errors
...@@ -60,7 +60,7 @@ import copy ...@@ -60,7 +60,7 @@ import copy
import html import html
import os import os
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, remap_error_locations
from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \ from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER TOKEN_PTYPE, ZOMBIE_PARSER
...@@ -332,6 +332,7 @@ def add_parser_guard(parser_func): ...@@ -332,6 +332,7 @@ def add_parser_guard(parser_func):
# otherwise also cache None-results # otherwise also cache None-results
parser.visited[location] = (None, rest) parser.visited[location] = (None, rest)
else: else:
assert node._pos < 0
node._pos = grammar.document_length__ - location node._pos = grammar.document_length__ - location
assert node._pos >= 0, str("%i != %i" % (grammar.document_length__, location)) assert node._pos >= 0, str("%i != %i" % (grammar.document_length__, location))
if (grammar.last_rb__loc__ > location if (grammar.last_rb__loc__ > location
...@@ -431,13 +432,13 @@ class Parser(ParserBase): ...@@ -431,13 +432,13 @@ class Parser(ParserBase):
# add "aspect oriented" wrapper around parser calls # add "aspect oriented" wrapper around parser calls
# for memoizing, left recursion and tracing # for memoizing, left recursion and tracing
if not isinstance(self, Forward): # should Forward-Parser no be guarded? Not sure...
guarded_parser_call = add_parser_guard(self.__class__.__call__) guarded_parser_call = add_parser_guard(self.__class__.__call__)
# The following check is necessary for classes that don't override # The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden # the __call__() method, because in these cases the non-overridden
# __call__()-method would be substituted a second time! # __call__()-method would be substituted a second time!
if self.__class__.__call__.__code__ != guarded_parser_call.__code__: if self.__class__.__call__.__code__ != guarded_parser_call.__code__:
self.__class__.__call__ = guarded_parser_call self.__class__.__call__ = guarded_parser_call
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
"""Deepcopy method of the parser. Upon instantiation of a Grammar- """Deepcopy method of the parser. Upon instantiation of a Grammar-
...@@ -2251,6 +2252,7 @@ def compile_source(source: str, ...@@ -2251,6 +2252,7 @@ def compile_source(source: str,
log_file_name = logfile_basename(source, compiler) log_file_name = logfile_basename(source, compiler)
if preprocessor is None: if preprocessor is None:
source_text = original_text source_text = original_text
source_mapping = lambda i: i
else: else:
source_text, source_mapping = with_source_mapping(preprocessor(original_text)) source_text, source_mapping = with_source_mapping(preprocessor(original_text))
syntax_tree = parser(source_text) syntax_tree = parser(source_text)
...@@ -2263,17 +2265,18 @@ def compile_source(source: str, ...@@ -2263,17 +2265,18 @@ def compile_source(source: str,
# likely that error list gets littered with compile error messages # likely that error list gets littered with compile error messages
result = None result = None
efl = syntax_tree.error_flag efl = syntax_tree.error_flag
messages = syntax_tree.collect_errors(source_text, clear_errors=True) messages = syntax_tree.collect_errors(clear_errors=True)
if not is_error(efl): if not is_error(efl):
transformer(syntax_tree) transformer(syntax_tree)
efl = max(efl, syntax_tree.error_flag) efl = max(efl, syntax_tree.error_flag)
messages.extend(syntax_tree.collect_errors(source_text, clear_errors=True)) messages.extend(syntax_tree.collect_errors(clear_errors=True))
if is_logging(): if is_logging():
syntax_tree.log(log_file_name + '.ast') syntax_tree.log(log_file_name + '.ast')
if not is_error(syntax_tree.error_flag): if not is_error(syntax_tree.error_flag):
result = compiler(syntax_tree) result = compiler(syntax_tree)
# print(syntax_tree.as_sxpr()) # print(syntax_tree.as_sxpr())
messages.extend(syntax_tree.collect_errors(source_text)) messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl) syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
remap_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree return result, messages, syntax_tree
...@@ -23,7 +23,7 @@ from functools import partial ...@@ -23,7 +23,7 @@ from functools import partial
from DHParser.error import Error, linebreaks, line_col from DHParser.error import Error, linebreaks, line_col
from DHParser.stringview import StringView from DHParser.stringview import StringView
from DHParser.toolkit import is_logging, log_dir, identity, re, typing from DHParser.toolkit import is_logging, log_dir, re, typing
from typing import Any, Callable, cast, Iterator, List, Union, Tuple, Hashable, Optional from typing import Any, Callable, cast, Iterator, List, Union, Tuple, Hashable, Optional
...@@ -320,8 +320,8 @@ class Node(collections.abc.Sized): ...@@ -320,8 +320,8 @@ class Node(collections.abc.Sized):
self.children = (result,) self.children = (result,)
self._result = self.children self._result = self.children
self.error_flag = result.error_flag self.error_flag = result.error_flag
if self._pos < 0: # if self._pos < 0:
self._pos = result._pos # self._pos = result._pos
else: else:
if isinstance(result, tuple): if isinstance(result, tuple):
self.children = result self.children = result
...@@ -329,8 +329,8 @@ class Node(collections.abc.Sized): ...@@ -329,8 +329,8 @@ class Node(collections.abc.Sized):
if result: if result:
if self.error_flag == 0: if self.error_flag == 0:
self.error_flag = max(child.error_flag for child in self.children) self.error_flag = max(child.error_flag for child in self.children)
if self._pos < 0: # if self._pos < 0:
self._pos = result[0]._pos # self._pos = result[0]._pos
else: else:
self.children = NoChildren self.children = NoChildren
self._result = str(result) self._result = str(result)
...@@ -367,23 +367,10 @@ class Node(collections.abc.Sized): ...@@ -367,23 +367,10 @@ class Node(collections.abc.Sized):
def pos(self) -> int: def pos(self) -> int:
"""Returns the position of the Node's content in the source text.""" """Returns the position of the Node's content in the source text."""
if self._pos < 0: if self._pos < 0:
raise AssertionError("position value not initialized!") raise AssertionError("Position value not initialized!")
return self._pos return self._pos
# @pos.setter
# def pos(self, pos: int):
# assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def init_pos(self, pos: int, overwrite: bool = False) -> 'Node': def init_pos(self, pos: int, overwrite: bool = False) -> 'Node':
""" """
(Re-)initialize position value. Usually, the parser guard (Re-)initialize position value. Usually, the parser guard
...@@ -431,29 +418,19 @@ class Node(collections.abc.Sized): ...@@ -431,29 +418,19 @@ class Node(collections.abc.Sized):
return self return self
def collect_errors(self, document: Union[StringView, str] = '', clear_errors=False) \ def collect_errors(self, clear_errors=False) -> List[Error]:
-> List[Error]:
""" """
Recursively adds line- and column-numbers to all error objects. Recursively adds line- and column-numbers to all error objects.
Returns all errors of this node or any child node in the form Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position of a set of tuples (position, error_message), where position
is always relative to this node. is always relative to this node.
""" """
if self.error_flag:
lbreaks = linebreaks(document) if document else []
return self._collect_errors(lbreaks, clear_errors)
else:
return []
def _collect_errors(self, lbreaks: List[int] = [], clear_errors=False) -> List[Error]:
errors = self.errors errors = self.errors
if errors and lbreaks: for err in errors:
for err in errors: err.pos = self.pos
err.pos = self.pos
err.line, err.column = line_col(lbreaks, err.pos)
if self.children: if self.children:
for child in self.children: for child in self.children:
errors.extend(child._collect_errors(lbreaks, clear_errors)) errors.extend(child.collect_errors(clear_errors))
if clear_errors: if clear_errors:
self._errors = [] self._errors = []
self.error_flag = 0 self.error_flag = 0
...@@ -467,7 +444,7 @@ class Node(collections.abc.Sized): ...@@ -467,7 +444,7 @@ class Node(collections.abc.Sized):
def _tree_repr(self, tab, open_fn, close_fn, data_fn=identity, density=0) -> str: def _tree_repr(self, tab, open_fn, close_fn, data_fn=lambda i: i, density=0) -> str:
""" """
Generates a tree representation of this node and its children Generates a tree representation of this node and its children
in string from. in string from.
...@@ -569,7 +546,7 @@ class Node(collections.abc.Sized): ...@@ -569,7 +546,7 @@ class Node(collections.abc.Sized):
txt = '<' + node.tag_name txt = '<' + node.tag_name
# s += ' pos="%i"' % node.pos # s += ' pos="%i"' % node.pos
if src: if src:
txt += ' line="%i" col="%i"' % line_col(src, node.pos) txt += ' line="%i" col="%i"' % line_col(line_breaks, node.pos)
if showerrors and node.errors: if showerrors and node.errors:
txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors) txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors)
return txt + ">\n" return txt + ">\n"
...@@ -578,6 +555,7 @@ class Node(collections.abc.Sized): ...@@ -578,6 +555,7 @@ class Node(collections.abc.Sized):
"""Returns the closing string for the representation of `node`.""" """Returns the closing string for the representation of `node`."""
return '\n</' + node.tag_name + '>' return '\n</' + node.tag_name + '>'
line_breaks = linebreaks(src) if src else []
return self._tree_repr(' ', opening, closing, density=1) return self._tree_repr(' ', opening, closing, density=1)
...@@ -663,6 +641,10 @@ def mock_syntax_tree(sxpr): ...@@ -663,6 +641,10 @@ def mock_syntax_tree(sxpr):
sxpr = sxpr[match.end():].strip() sxpr = sxpr[match.end():].strip()
if sxpr[0] == '(': if sxpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sxpr)) result = tuple(mock_syntax_tree(block) for block in next_block(sxpr))
pos = 0
for node in result:
node._pos = pos
pos += len(node)
else: else:
lines = [] lines = []
while sxpr and sxpr[0] != ')': while sxpr and sxpr[0] != ')':
...@@ -678,7 +660,9 @@ def mock_syntax_tree(sxpr): ...@@ -678,7 +660,9 @@ def mock_syntax_tree(sxpr):
lines.append(sxpr[:match.end()]) lines.append(sxpr[:match.end()])
sxpr = sxpr[match.end():] sxpr = sxpr[match.end():]
result = "\n".join(lines) result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result) node = Node(MockParser(name, ':' + class_name), result)
node._pos = 0
return node
TransformationFunc = Union[Callable[[Node], Any], partial] TransformationFunc = Union[Callable[[Node], Any], partial]
......
...@@ -25,7 +25,7 @@ import os ...@@ -25,7 +25,7 @@ import os
from DHParser.toolkit import is_logging, clear_logs, re from DHParser.toolkit import is_logging, clear_logs, re
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error from DHParser.error import is_error, remap_error_locations
__all__ = ('unit_from_configfile', __all__ = ('unit_from_configfile',
'unit_from_json', 'unit_from_json',
...@@ -181,10 +181,10 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -181,10 +181,10 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__ast__', {})[test_name] = ast tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name)) ast.log("match_%s_%s.ast" % (parser_name, test_name))
if is_error(cst.error_flag): if is_error(cst.error_flag):
errors = remap_error_locations(cst.collect_errors(), test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' % errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')), (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
cst.collect_errors(test_code))))
tests.setdefault('__err__', {})[test_name] = errata[-1] tests.setdefault('__err__', {})[test_name] = errata[-1]
# write parsing-history log only in case of failure! # write parsing-history log only in case of failure!
if is_logging(): if is_logging():
......
...@@ -66,8 +66,7 @@ __all__ = ('logging', ...@@ -66,8 +66,7 @@ __all__ = ('logging',
'expand_table', 'expand_table',
'compile_python_object', 'compile_python_object',
'smart_list', 'smart_list',
'sane_parser_name', 'sane_parser_name')
'identity')
####################################################################### #######################################################################
...@@ -408,11 +407,6 @@ def sane_parser_name(name) -> bool: ...@@ -408,11 +407,6 @@ def sane_parser_name(name) -> bool:
return name and name[:2] != '__' and name[-2:] != '__' return name and name[:2] != '__' and name[-2:] != '__'
def identity(anything: Any) -> Any:
"""Identity function for functional programming style."""
return anything
####################################################################### #######################################################################
# #
# initialization # initialization
......
...@@ -2,6 +2,13 @@ General TODO-List ...@@ -2,6 +2,13 @@ General TODO-List
================= =================
Readability of Code
-------------------
**direct vs inverse location counting in `parse.py:guarded_call`**:
use location value counting from the beginning rather than the end of
the text
Optimizations Optimizations
------------- -------------
......
...@@ -29,43 +29,33 @@ from DHParser.error import linebreaks, line_col ...@@ -29,43 +29,33 @@ from DHParser.error import linebreaks, line_col
class TestErrorSupport: class TestErrorSupport:
def mini_suite(self, s, data, offset): def mini_suite(self, s, lbreaks, offset):
l, c = line_col(data, 0) l, c = line_col(lbreaks, 0)
assert (l, c) == (1, 1), str((l, c)) assert (l, c) == (1, 1), str((l, c))
l, c = line_col(data, 0 + offset) l, c = line_col(lbreaks, 0 + offset)
assert (l, c) == (1 + offset, 1), str((l, c)) assert (l, c) == (1 + offset, 1), str((l, c))
l, c = line_col(data, 1 + offset) l, c = line_col(lbreaks, 1 + offset)
assert (l, c) == (1 + offset, 2), str((l, c)) assert (l, c) == (1 + offset, 2), str((l, c))
l, c = line_col(data, 9 + offset) l, c = line_col(lbreaks, 9 + offset)
assert (l, c) == (1 + offset, 10), str((l, c)) assert (l, c) == (1 + offset, 10), str((l, c))
l, c = line_col(data, 10 + offset) l, c = line_col(lbreaks, 10 + offset)
assert (l, c) == (2 + offset, 1), str((l, c)) assert (l, c) == (2 + offset, 1), str((l, c))
l, c = line_col(data, 18 + offset) l, c = line_col(lbreaks, 18 + offset)
assert (l, c) == (2 + offset, 9), str((l, c)) assert (l, c) == (2 + offset, 9), str((l, c))
l, c = line_col(data, 19 + offset) l, c = line_col(lbreaks, 19 + offset)
assert (l, c) == (2 + offset, 10), str((l, c)) assert (l, c) == (2 + offset, 10), str((l, c))
try: try:
l, c = line_col(data, -1) l, c = line_col(lbreaks, -1)
assert False, "ValueError expected for negative position." assert False, "ValueError expected for negative position."
except ValueError: except ValueError:
pass pass
try: try:
l, c = line_col(data, len(s) + 1) l, c = line_col(lbreaks, len(s) + 1)
assert False, "ValueError expected for postion > pos(EOF)+1." assert False, "ValueError expected for postion > pos(EOF)+1."
except ValueError: except ValueError:
pass pass