Commit 4160b2ae authored by eckhart's avatar eckhart
Browse files

- bug fixes source mapping

parent 5da469de
......@@ -29,7 +29,7 @@ __all__ = ('Error',
'only_errors',
'linebreaks',
'line_col',
'remap_error_locations')
'adjust_error_locations')
class Error:
......@@ -63,7 +63,9 @@ class Error:
def __str__(self):
prefix = ''
if self.line > 0:
prefix = "line: %3i, column: %2i, " % (self.line, self.column)
prefix = "line: %s, column: %s, " % \
("%4i" % self.line if self.line >= 0 else ' ???',
"%3i" % self.column if self.column >= 0 else '???')
return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self):
......@@ -157,9 +159,9 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
# return line, column
def remap_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
def adjust_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
......
......@@ -60,12 +60,12 @@ import copy
import html
import os
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, remap_error_locations
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, adjust_error_locations
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
escape_control_characters, load_if_file, re, typing
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
......@@ -2221,7 +2221,7 @@ def compile_source(source: str,
compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any
"""
Compiles a source in four stages:
1. Scanning (if needed)
1. Preprocessing (if needed)
2. Parsing
3. AST-transformation
4. Compiling.
......@@ -2260,7 +2260,7 @@ def compile_source(source: str,
syntax_tree.log(log_file_name + '.cst')
parser.log_parsing_history__(log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == source_text, str(syntax_tree)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
......@@ -2278,5 +2278,5 @@ def compile_source(source: str,
messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
remap_error_locations(messages, original_text, source_mapping)
adjust_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree
......@@ -31,6 +31,7 @@ __all__ = ('RX_TOKEN_NAME',
'PreprocessorFunc',
'PreprocessorResult',
'make_token',
'strip_tokens',
'nil_preprocessor',
'chain_preprocessors',
'prettyprint_tokenized',
......@@ -127,6 +128,22 @@ def prettyprint_tokenized(tokenized: str) -> str:
return tokenized.replace('\x1b', '<').replace('\x1c', '|').replace('\x1d', '>')
def strip_tokens(tokenized: str) -> str:
"""Replaces all tokens with the token's arguments."""
result = []
pos = 0
match = RX_TOKEN.search(tokenized, pos)
while match:
start, end = match.span()
result.append(tokenized[pos:start])
result.append(match.groupdict()['argument'])
pos = end
match = RX_TOKEN.search(tokenized, pos)
result.append(tokenized[pos:])
return ''.join(result)
#######################################################################
#
# Source Maps - mapping source code positions between different
......@@ -160,7 +177,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
d = tokenized_source.find(TOKEN_DELIMITER, i)
e = tokenized_source.find(END_TOKEN, i)
assert 0 <= d < e
o -= (d - i + 3)
o -= (d - i + 2)
positions.extend([d + 1, e + 1])
offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1)
......
......@@ -25,7 +25,7 @@ import os
from DHParser.toolkit import is_logging, clear_logs, re
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error, remap_error_locations
from DHParser.error import is_error, adjust_error_locations
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -181,7 +181,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name))
if is_error(cst.error_flag):
errors = remap_error_locations(cst.collect_errors(), test_code)
errors = adjust_error_locations(cst.collect_errors(), test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
......
......@@ -25,8 +25,10 @@ limitations under the License.
from functools import partial
from DHParser.dsl import grammar_provider
from DHParser.parse import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \
strip_tokens
from DHParser.toolkit import lstrip_docstring, typing
from typing import Tuple
......@@ -78,17 +80,17 @@ class TestTokenParsing:
if indent > indent_level * 4:
assert indent == (indent_level + 1) * 4, str(indent) # indent must be 4 spaces
indent_level += 1
transformed.append(make_token('BEGIN_INDENT'))
line = make_token('BEGIN_INDENT') + line
elif indent <= (indent_level - 1) * 4:
while indent <= (indent_level - 1) * 4:
transformed.append(make_token('END_INDENT'))
line = make_token('END_INDENT') + line
indent_level -= 1
assert indent == (indent_level + 1) * 4 # indent must be 4 spaces
else:
assert indent == indent_level * 4
transformed.append(line)
while indent_level > 0:
transformed.append(make_token('END_INDENT'))
transformed[-1] += make_token('END_INDENT')
indent_level -= 1
tokenized = '\n'.join(transformed)
# print(prettyprint_tokenized(tokenized))
......@@ -101,11 +103,11 @@ class TestTokenParsing:
for i, line in enumerate(lines):
comment_pos = line.find('#')
if comment_pos >= 0:
lines[i] = line[:comment_pos]
positions.append(pos + comment_pos)
offsets.append(len(line) - comment_pos)
pos += comment_pos
pos += len(line)
lines[i] = line[:comment_pos]
positions.append(pos - offsets[-1])
offsets.append(offsets[-1] + len(line) - comment_pos)
pos += len(lines[i])
positions.append(pos)
offsets.append(offsets[-1])
return '\n'.join(lines), partial(source_map, srcmap=SourceMap(positions, offsets))
......@@ -121,9 +123,9 @@ class TestTokenParsing:
self.grammar = grammar_provider(self.ebnf)()
self.code = lstrip_docstring("""
def func(x, y):
if x > 0:
if x > 0: # a comment
if y > 0:
print(x) # a comment
print(x) # another comment
print(y)
""")
self.tokenized = self.preprocess_indentation(self.code)
......@@ -138,6 +140,9 @@ class TestTokenParsing:
'"%s" (%i) wrongly mapped onto "%s" (%i)' % \
(teststr, mapped_pos, orig_text[original_pos:original_pos + len(teststr)], original_pos)
def test_strip_tokens(self):
assert self.code == strip_tokens(self.tokenized)
def test_parse_tokenized(self):
cst = self.grammar(self.tokenized)
# for e in cst.collect_errors(self.tokenized):
......@@ -179,6 +184,17 @@ class TestTokenParsing:
self.verify_mapping("print(x)", self.code, tokenized, mapping)
self.verify_mapping("print(y)", self.code, tokenized, mapping)
def test_error_position(self):
orig_src = self.code.replace('#', '\x1b')
prepr = chain_preprocessors(self.preprocess_comments, self.preprocess_indentation)
result, messages, syntaxtree = compile_source(orig_src, prepr, self.grammar,
lambda i: i, lambda i: i)
for err in messages:
if self.code[err.orig_pos] == "#":
break
else:
assert False, "wrong error positions"
if __name__ == "__main__":
# tp = TestTokenParsing()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment