Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 4160b2ae authored by eckhart's avatar eckhart
Browse files

- bug fixes source mapping

parent 5da469de
......@@ -29,7 +29,7 @@ __all__ = ('Error',
'only_errors',
'linebreaks',
'line_col',
'remap_error_locations')
'adjust_error_locations')
class Error:
......@@ -63,7 +63,9 @@ class Error:
def __str__(self):
prefix = ''
if self.line > 0:
prefix = "line: %3i, column: %2i, " % (self.line, self.column)
prefix = "line: %s, column: %s, " % \
("%4i" % self.line if self.line >= 0 else ' ???',
"%3i" % self.column if self.column >= 0 else '???')
return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self):
......@@ -157,7 +159,7 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
# return line, column
def remap_error_locations(errors: List[Error],
def adjust_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place.
......
......@@ -60,12 +60,12 @@ import copy
import html
import os
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, remap_error_locations
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, adjust_error_locations
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
escape_control_characters, load_if_file, re, typing
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
......@@ -2221,7 +2221,7 @@ def compile_source(source: str,
compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any
"""
Compiles a source in four stages:
1. Scanning (if needed)
1. Preprocessing (if needed)
2. Parsing
3. AST-transformation
4. Compiling.
......@@ -2260,7 +2260,7 @@ def compile_source(source: str,
syntax_tree.log(log_file_name + '.cst')
parser.log_parsing_history__(log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == source_text, str(syntax_tree)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
......@@ -2278,5 +2278,5 @@ def compile_source(source: str,
messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
remap_error_locations(messages, original_text, source_mapping)
adjust_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree
......@@ -31,6 +31,7 @@ __all__ = ('RX_TOKEN_NAME',
'PreprocessorFunc',
'PreprocessorResult',
'make_token',
'strip_tokens',
'nil_preprocessor',
'chain_preprocessors',
'prettyprint_tokenized',
......@@ -127,6 +128,22 @@ def prettyprint_tokenized(tokenized: str) -> str:
return tokenized.replace('\x1b', '<').replace('\x1c', '|').replace('\x1d', '>')
def strip_tokens(tokenized: str) -> str:
"""Replaces all tokens with the token's arguments."""
result = []
pos = 0
match = RX_TOKEN.search(tokenized, pos)
while match:
start, end = match.span()
result.append(tokenized[pos:start])
result.append(match.groupdict()['argument'])
pos = end
match = RX_TOKEN.search(tokenized, pos)
result.append(tokenized[pos:])
return ''.join(result)
#######################################################################
#
# Source Maps - mapping source code positions between different
......@@ -160,7 +177,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
d = tokenized_source.find(TOKEN_DELIMITER, i)
e = tokenized_source.find(END_TOKEN, i)
assert 0 <= d < e
o -= (d - i + 3)
o -= (d - i + 2)
positions.extend([d + 1, e + 1])
offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1)
......
......@@ -25,7 +25,7 @@ import os
from DHParser.toolkit import is_logging, clear_logs, re
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error, remap_error_locations
from DHParser.error import is_error, adjust_error_locations
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -181,7 +181,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name))
if is_error(cst.error_flag):
errors = remap_error_locations(cst.collect_errors(), test_code)
errors = adjust_error_locations(cst.collect_errors(), test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
......
......@@ -25,8 +25,10 @@ limitations under the License.
from functools import partial
from DHParser.dsl import grammar_provider
from DHParser.parse import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \
strip_tokens
from DHParser.toolkit import lstrip_docstring, typing
from typing import Tuple
......@@ -78,17 +80,17 @@ class TestTokenParsing:
if indent > indent_level * 4:
assert indent == (indent_level + 1) * 4, str(indent) # indent must be 4 spaces
indent_level += 1
transformed.append(make_token('BEGIN_INDENT'))
line = make_token('BEGIN_INDENT') + line
elif indent <= (indent_level - 1) * 4:
while indent <= (indent_level - 1) * 4:
transformed.append(make_token('END_INDENT'))
line = make_token('END_INDENT') + line
indent_level -= 1
assert indent == (indent_level + 1) * 4 # indent must be 4 spaces
else:
assert indent == indent_level * 4
transformed.append(line)
while indent_level > 0:
transformed.append(make_token('END_INDENT'))
transformed[-1] += make_token('END_INDENT')
indent_level -= 1
tokenized = '\n'.join(transformed)
# print(prettyprint_tokenized(tokenized))
......@@ -101,11 +103,11 @@ class TestTokenParsing:
for i, line in enumerate(lines):
comment_pos = line.find('#')
if comment_pos >= 0:
lines[i] = line[:comment_pos]
positions.append(pos + comment_pos)
offsets.append(len(line) - comment_pos)
pos += comment_pos
pos += len(line)
lines[i] = line[:comment_pos]
positions.append(pos - offsets[-1])
offsets.append(offsets[-1] + len(line) - comment_pos)
pos += len(lines[i])
positions.append(pos)
offsets.append(offsets[-1])
return '\n'.join(lines), partial(source_map, srcmap=SourceMap(positions, offsets))
......@@ -121,9 +123,9 @@ class TestTokenParsing:
self.grammar = grammar_provider(self.ebnf)()
self.code = lstrip_docstring("""
def func(x, y):
if x > 0:
if x > 0: # a comment
if y > 0:
print(x) # a comment
print(x) # another comment
print(y)
""")
self.tokenized = self.preprocess_indentation(self.code)
......@@ -138,6 +140,9 @@ class TestTokenParsing:
'"%s" (%i) wrongly mapped onto "%s" (%i)' % \
(teststr, mapped_pos, orig_text[original_pos:original_pos + len(teststr)], original_pos)
def test_strip_tokens(self):
assert self.code == strip_tokens(self.tokenized)
def test_parse_tokenized(self):
cst = self.grammar(self.tokenized)
# for e in cst.collect_errors(self.tokenized):
......@@ -179,6 +184,17 @@ class TestTokenParsing:
self.verify_mapping("print(x)", self.code, tokenized, mapping)
self.verify_mapping("print(y)", self.code, tokenized, mapping)
def test_error_position(self):
orig_src = self.code.replace('#', '\x1b')
prepr = chain_preprocessors(self.preprocess_comments, self.preprocess_indentation)
result, messages, syntaxtree = compile_source(orig_src, prepr, self.grammar,
lambda i: i, lambda i: i)
for err in messages:
if self.code[err.orig_pos] == "#":
break
else:
assert False, "wrong error positions"
if __name__ == "__main__":
# tp = TestTokenParsing()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment