Commit 5da469de authored by eckhart's avatar eckhart

- correct error locations when compiling with preprocessor

parent 3dab649e
......@@ -17,9 +17,9 @@ permissions and limitations under the License.
"""
import bisect
import functools
from typing import Iterable, Iterator, Union, Tuple, List
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView
__all__ = ('Error',
......@@ -28,11 +28,12 @@ __all__ = ('Error',
'has_errors',
'only_errors',
'linebreaks',
'line_col')
'line_col',
'remap_error_locations')
class Error:
__slots__ = ['message', 'level', 'code', 'pos', 'line', 'column']
__slots__ = ['message', 'level', 'code', 'pos', 'orig_pos', 'line', 'column']
# error levels
......@@ -49,12 +50,13 @@ class Error:
MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, code: int = ERROR,
pos: int = -1, line: int = -1, column: int = -1) -> None:
def __init__(self, message: str, code: int = ERROR, pos: int = -1,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
self.message = message
assert code >= 0
self.code = code
self.pos = pos
self.orig_pos = orig_pos
self.line = line
self.column = column
......@@ -65,8 +67,8 @@ class Error:
return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self):
return 'Error("%s", %s, %i, %i, %i)' \
% (self.message, repr(self.code), self.pos, self.line, self.column)
return 'Error("%s", %s, %i, %i, %i, %i)' \
% (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)
@property
def level_str(self):
......@@ -110,6 +112,13 @@ def only_errors(messages: Iterable[Error], level: int = Error.ERROR) -> Iterator
return (err for err in messages if err.code >= level)
#######################################################################
#
# Setting of line, column and position properties of error messages.
#
#######################################################################
def linebreaks(text: Union[StringView, str]) -> List[int]:
"""
Returns a list of indices all line breaks in the text.
......@@ -123,24 +132,13 @@ def linebreaks(text: Union[StringView, str]) -> List[int]:
return lbr
@functools.singledispatch
def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
"""
Returns the position within a text as (line, column)-tuple.
"""
if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
line = text.count("\n", 0, pos) + 1
column = pos - text.rfind("\n", 0, pos)
return line, column
@line_col.register(list)
def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
"""
Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF.
"""
if not lbreaks and pos >= 0:
return 0, pos
if pos < 0 or pos > lbreaks[-1]: # one character behind EOF is still an allowed position!
raise ValueError('Position %i outside text of length %s !' % (pos, lbreaks[-1]))
line = bisect.bisect_left(lbreaks, pos)
......@@ -148,20 +146,37 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
return line, column
# def error_messages(source_text:str, errors: List[Error]) -> List[str]:
# """Adds line, column information for error messages, if the position
# is given.
#
# Args:
# source_text (str): The source text on which the errors occurred.
# (Needed in order to determine the line and column numbers.)
# errors (list): The list of errors as returned by the method
# ``collect_errors()`` of a Node object
# Returns:
# The same list of error messages, which now contain line and
# column numbers.
# def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
# """
# Returns the position within a text as (line, column)-tuple.
# """
# for err in errors:
# if err.pos >= 0 and err.line <= 0:
# err.line, err.column = line_col(source_text, err.pos)
# return errors
# if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
# raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# line = text.count("\n", 0, pos) + 1
# column = pos - text.rfind("\n", 0, pos)
# return line, column
def remap_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
errors: The list of errors as returned by the method
``collect_errors()`` of a Node object
original_text: The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
source_mapping: A function that maps error positions to their
positions in the original source file.
Returns:
The list of errors. (Returning the list of errors is just syntactical
sugar. Be aware that the line, col and orig_pos attributes have been
changed in place.)
"""
line_breaks = linebreaks(original_text)
for err in errors:
assert err.pos >= 0
err.orig_pos = source_mapping(err.pos)
err.line, err.column = line_col(line_breaks, err.orig_pos)
return errors
......@@ -60,7 +60,7 @@ import copy
import html
import os
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, remap_error_locations
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
......@@ -332,6 +332,7 @@ def add_parser_guard(parser_func):
# otherwise also cache None-results
parser.visited[location] = (None, rest)
else:
assert node._pos < 0
node._pos = grammar.document_length__ - location
assert node._pos >= 0, str("%i != %i" % (grammar.document_length__, location))
if (grammar.last_rb__loc__ > location
......@@ -431,13 +432,13 @@ class Parser(ParserBase):
# add "aspect oriented" wrapper around parser calls
# for memoizing, left recursion and tracing
guarded_parser_call = add_parser_guard(self.__class__.__call__)
# The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden
# __call__()-method would be substituted a second time!
if self.__class__.__call__.__code__ != guarded_parser_call.__code__:
self.__class__.__call__ = guarded_parser_call
if not isinstance(self, Forward): # should Forward-Parser no be guarded? Not sure...
guarded_parser_call = add_parser_guard(self.__class__.__call__)
# The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden
# __call__()-method would be substituted a second time!
if self.__class__.__call__.__code__ != guarded_parser_call.__code__:
self.__class__.__call__ = guarded_parser_call
def __deepcopy__(self, memo):
"""Deepcopy method of the parser. Upon instantiation of a Grammar-
......@@ -2251,6 +2252,7 @@ def compile_source(source: str,
log_file_name = logfile_basename(source, compiler)
if preprocessor is None:
source_text = original_text
source_mapping = lambda i: i
else:
source_text, source_mapping = with_source_mapping(preprocessor(original_text))
syntax_tree = parser(source_text)
......@@ -2263,17 +2265,18 @@ def compile_source(source: str,
# likely that error list gets littered with compile error messages
result = None
efl = syntax_tree.error_flag
messages = syntax_tree.collect_errors(source_text, clear_errors=True)
messages = syntax_tree.collect_errors(clear_errors=True)
if not is_error(efl):
transformer(syntax_tree)
efl = max(efl, syntax_tree.error_flag)
messages.extend(syntax_tree.collect_errors(source_text, clear_errors=True))
messages.extend(syntax_tree.collect_errors(clear_errors=True))
if is_logging():
syntax_tree.log(log_file_name + '.ast')
if not is_error(syntax_tree.error_flag):
result = compiler(syntax_tree)
# print(syntax_tree.as_sxpr())
messages.extend(syntax_tree.collect_errors(source_text))
messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
remap_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree
......@@ -23,7 +23,7 @@ from functools import partial
from DHParser.error import Error, linebreaks, line_col
from DHParser.stringview import StringView
from DHParser.toolkit import is_logging, log_dir, identity, re, typing
from DHParser.toolkit import is_logging, log_dir, re, typing
from typing import Any, Callable, cast, Iterator, List, Union, Tuple, Hashable, Optional
......@@ -320,8 +320,8 @@ class Node(collections.abc.Sized):
self.children = (result,)
self._result = self.children
self.error_flag = result.error_flag
if self._pos < 0:
self._pos = result._pos
# if self._pos < 0:
# self._pos = result._pos
else:
if isinstance(result, tuple):
self.children = result
......@@ -329,8 +329,8 @@ class Node(collections.abc.Sized):
if result:
if self.error_flag == 0:
self.error_flag = max(child.error_flag for child in self.children)
if self._pos < 0:
self._pos = result[0]._pos
# if self._pos < 0:
# self._pos = result[0]._pos
else:
self.children = NoChildren
self._result = str(result)
......@@ -367,23 +367,10 @@ class Node(collections.abc.Sized):
def pos(self) -> int:
"""Returns the position of the Node's content in the source text."""
if self._pos < 0:
raise AssertionError("position value not initialized!")
raise AssertionError("Position value not initialized!")
return self._pos
# @pos.setter
# def pos(self, pos: int):
# assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def init_pos(self, pos: int, overwrite: bool = False) -> 'Node':
"""
(Re-)initialize position value. Usually, the parser guard
......@@ -431,29 +418,19 @@ class Node(collections.abc.Sized):
return self
def collect_errors(self, document: Union[StringView, str] = '', clear_errors=False) \
-> List[Error]:
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Recursively adds line- and column-numbers to all error objects.
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
if self.error_flag:
lbreaks = linebreaks(document) if document else []
return self._collect_errors(lbreaks, clear_errors)
else:
return []
def _collect_errors(self, lbreaks: List[int] = [], clear_errors=False) -> List[Error]:
errors = self.errors
if errors and lbreaks:
for err in errors:
err.pos = self.pos
err.line, err.column = line_col(lbreaks, err.pos)
for err in errors:
err.pos = self.pos
if self.children:
for child in self.children:
errors.extend(child._collect_errors(lbreaks, clear_errors))
errors.extend(child.collect_errors(clear_errors))
if clear_errors:
self._errors = []
self.error_flag = 0
......@@ -467,7 +444,7 @@ class Node(collections.abc.Sized):
def _tree_repr(self, tab, open_fn, close_fn, data_fn=identity, density=0) -> str:
def _tree_repr(self, tab, open_fn, close_fn, data_fn=lambda i: i, density=0) -> str:
"""
Generates a tree representation of this node and its children
in string from.
......@@ -569,7 +546,7 @@ class Node(collections.abc.Sized):
txt = '<' + node.tag_name
# s += ' pos="%i"' % node.pos
if src:
txt += ' line="%i" col="%i"' % line_col(src, node.pos)
txt += ' line="%i" col="%i"' % line_col(line_breaks, node.pos)
if showerrors and node.errors:
txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors)
return txt + ">\n"
......@@ -578,6 +555,7 @@ class Node(collections.abc.Sized):
"""Returns the closing string for the representation of `node`."""
return '\n</' + node.tag_name + '>'
line_breaks = linebreaks(src) if src else []
return self._tree_repr(' ', opening, closing, density=1)
......@@ -663,6 +641,10 @@ def mock_syntax_tree(sxpr):
sxpr = sxpr[match.end():].strip()
if sxpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sxpr))
pos = 0
for node in result:
node._pos = pos
pos += len(node)
else:
lines = []
while sxpr and sxpr[0] != ')':
......@@ -678,7 +660,9 @@ def mock_syntax_tree(sxpr):
lines.append(sxpr[:match.end()])
sxpr = sxpr[match.end():]
result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result)
node = Node(MockParser(name, ':' + class_name), result)
node._pos = 0
return node
TransformationFunc = Union[Callable[[Node], Any], partial]
......
......@@ -25,7 +25,7 @@ import os
from DHParser.toolkit import is_logging, clear_logs, re
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error
from DHParser.error import is_error, remap_error_locations
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -181,10 +181,10 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name))
if is_error(cst.error_flag):
errors = remap_error_locations(cst.collect_errors(), test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in
cst.collect_errors(test_code))))
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
tests.setdefault('__err__', {})[test_name] = errata[-1]
# write parsing-history log only in case of failure!
if is_logging():
......
......@@ -66,8 +66,7 @@ __all__ = ('logging',
'expand_table',
'compile_python_object',
'smart_list',
'sane_parser_name',
'identity')
'sane_parser_name')
#######################################################################
......@@ -408,11 +407,6 @@ def sane_parser_name(name) -> bool:
return name and name[:2] != '__' and name[-2:] != '__'
def identity(anything: Any) -> Any:
"""Identity function for functional programming style."""
return anything
#######################################################################
#
# initialization
......
......@@ -2,6 +2,13 @@ General TODO-List
=================
Readability of Code
-------------------
**direct vs inverse location counting in `parse.py:guarded_call`**:
use location value counting from the beginning rather than the end of
the text
Optimizations
-------------
......
......@@ -29,43 +29,33 @@ from DHParser.error import linebreaks, line_col
class TestErrorSupport:
def mini_suite(self, s, data, offset):
l, c = line_col(data, 0)
def mini_suite(self, s, lbreaks, offset):
l, c = line_col(lbreaks, 0)
assert (l, c) == (1, 1), str((l, c))
l, c = line_col(data, 0 + offset)
l, c = line_col(lbreaks, 0 + offset)
assert (l, c) == (1 + offset, 1), str((l, c))
l, c = line_col(data, 1 + offset)
l, c = line_col(lbreaks, 1 + offset)
assert (l, c) == (1 + offset, 2), str((l, c))
l, c = line_col(data, 9 + offset)
l, c = line_col(lbreaks, 9 + offset)
assert (l, c) == (1 + offset, 10), str((l, c))
l, c = line_col(data, 10 + offset)
l, c = line_col(lbreaks, 10 + offset)
assert (l, c) == (2 + offset, 1), str((l, c))
l, c = line_col(data, 18 + offset)
l, c = line_col(lbreaks, 18 + offset)
assert (l, c) == (2 + offset, 9), str((l, c))
l, c = line_col(data, 19 + offset)
l, c = line_col(lbreaks, 19 + offset)
assert (l, c) == (2 + offset, 10), str((l, c))
try:
l, c = line_col(data, -1)
l, c = line_col(lbreaks, -1)
assert False, "ValueError expected for negative position."
except ValueError:
pass
try:
l, c = line_col(data, len(s) + 1)
l, c = line_col(lbreaks, len(s) + 1)
assert False, "ValueError expected for postion > pos(EOF)+1."
except ValueError:
pass
def test_line_col(self):
s = "123456789\n123456789"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789"
self.mini_suite(s, s, 1)
s = "123456789\n123456789\n"
self.mini_suite(s, s, 0)
s = "\n123456789\n123456789\n"
self.mini_suite(s, s, 1)
def test_line_col_bisect(self):
s = "123456789\n123456789"
self.mini_suite(s, linebreaks(s), 0)
s = "\n123456789\n123456789"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment