Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 4160b2ae authored by eckhart's avatar eckhart
Browse files

- bug fixes source mapping

parent 5da469de
...@@ -29,7 +29,7 @@ __all__ = ('Error', ...@@ -29,7 +29,7 @@ __all__ = ('Error',
'only_errors', 'only_errors',
'linebreaks', 'linebreaks',
'line_col', 'line_col',
'remap_error_locations') 'adjust_error_locations')
class Error: class Error:
...@@ -63,7 +63,9 @@ class Error: ...@@ -63,7 +63,9 @@ class Error:
def __str__(self): def __str__(self):
prefix = '' prefix = ''
if self.line > 0: if self.line > 0:
prefix = "line: %3i, column: %2i, " % (self.line, self.column) prefix = "line: %s, column: %s, " % \
("%4i" % self.line if self.line >= 0 else ' ???',
"%3i" % self.column if self.column >= 0 else '???')
return prefix + "%s: %s" % (self.level_str, self.message) return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self): def __repr__(self):
...@@ -157,9 +159,9 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]: ...@@ -157,9 +159,9 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
# return line, column # return line, column
def remap_error_locations(errors: List[Error], def adjust_error_locations(errors: List[Error],
original_text: Union[StringView, str], original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]: source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place. """Adds (or adjusts) line and column numbers of error messages in place.
Args: Args:
......
...@@ -60,12 +60,12 @@ import copy ...@@ -60,12 +60,12 @@ import copy
import html import html
import os import os
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, remap_error_locations from DHParser.error import Error, is_error, has_errors, linebreaks, line_col, adjust_error_locations
from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \ from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \ from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
escape_control_characters, load_if_file, re, typing escape_control_characters, load_if_file, re, typing
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
...@@ -2221,7 +2221,7 @@ def compile_source(source: str, ...@@ -2221,7 +2221,7 @@ def compile_source(source: str,
compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any
""" """
Compiles a source in four stages: Compiles a source in four stages:
1. Scanning (if needed) 1. Preprocessing (if needed)
2. Parsing 2. Parsing
3. AST-transformation 3. AST-transformation
4. Compiling. 4. Compiling.
...@@ -2260,7 +2260,7 @@ def compile_source(source: str, ...@@ -2260,7 +2260,7 @@ def compile_source(source: str,
syntax_tree.log(log_file_name + '.cst') syntax_tree.log(log_file_name + '.cst')
parser.log_parsing_history__(log_file_name) parser.log_parsing_history__(log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == source_text, str(syntax_tree) assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
# only compile if there were no syntax errors, for otherwise it is # only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages # likely that error list gets littered with compile error messages
result = None result = None
...@@ -2278,5 +2278,5 @@ def compile_source(source: str, ...@@ -2278,5 +2278,5 @@ def compile_source(source: str,
messages.extend(syntax_tree.collect_errors()) messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl) syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
remap_error_locations(messages, original_text, source_mapping) adjust_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree return result, messages, syntax_tree
...@@ -31,6 +31,7 @@ __all__ = ('RX_TOKEN_NAME', ...@@ -31,6 +31,7 @@ __all__ = ('RX_TOKEN_NAME',
'PreprocessorFunc', 'PreprocessorFunc',
'PreprocessorResult', 'PreprocessorResult',
'make_token', 'make_token',
'strip_tokens',
'nil_preprocessor', 'nil_preprocessor',
'chain_preprocessors', 'chain_preprocessors',
'prettyprint_tokenized', 'prettyprint_tokenized',
...@@ -127,6 +128,22 @@ def prettyprint_tokenized(tokenized: str) -> str: ...@@ -127,6 +128,22 @@ def prettyprint_tokenized(tokenized: str) -> str:
return tokenized.replace('\x1b', '<').replace('\x1c', '|').replace('\x1d', '>') return tokenized.replace('\x1b', '<').replace('\x1c', '|').replace('\x1d', '>')
def strip_tokens(tokenized: str) -> str:
"""Replaces all tokens with the token's arguments."""
result = []
pos = 0
match = RX_TOKEN.search(tokenized, pos)
while match:
start, end = match.span()
result.append(tokenized[pos:start])
result.append(match.groupdict()['argument'])
pos = end
match = RX_TOKEN.search(tokenized, pos)
result.append(tokenized[pos:])
return ''.join(result)
####################################################################### #######################################################################
# #
# Source Maps - mapping source code positions between different # Source Maps - mapping source code positions between different
...@@ -160,7 +177,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap: ...@@ -160,7 +177,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
d = tokenized_source.find(TOKEN_DELIMITER, i) d = tokenized_source.find(TOKEN_DELIMITER, i)
e = tokenized_source.find(END_TOKEN, i) e = tokenized_source.find(END_TOKEN, i)
assert 0 <= d < e assert 0 <= d < e
o -= (d - i + 3) o -= (d - i + 2)
positions.extend([d + 1, e + 1]) positions.extend([d + 1, e + 1])
offsets.extend([o + 1, o]) offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1) i = tokenized_source.find(BEGIN_TOKEN, e + 1)
......
...@@ -25,7 +25,7 @@ import os ...@@ -25,7 +25,7 @@ import os
from DHParser.toolkit import is_logging, clear_logs, re from DHParser.toolkit import is_logging, clear_logs, re
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error, remap_error_locations from DHParser.error import is_error, adjust_error_locations
__all__ = ('unit_from_configfile', __all__ = ('unit_from_configfile',
'unit_from_json', 'unit_from_json',
...@@ -181,7 +181,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -181,7 +181,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__ast__', {})[test_name] = ast tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name)) ast.log("match_%s_%s.ast" % (parser_name, test_name))
if is_error(cst.error_flag): if is_error(cst.error_flag):
errors = remap_error_locations(cst.collect_errors(), test_code) errors = adjust_error_locations(cst.collect_errors(), test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' % errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')), (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors))) '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
......
...@@ -25,8 +25,10 @@ limitations under the License. ...@@ -25,8 +25,10 @@ limitations under the License.
from functools import partial from functools import partial
from DHParser.dsl import grammar_provider from DHParser.dsl import grammar_provider
from DHParser.parse import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \ from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \
strip_tokens
from DHParser.toolkit import lstrip_docstring, typing from DHParser.toolkit import lstrip_docstring, typing
from typing import Tuple from typing import Tuple
...@@ -78,17 +80,17 @@ class TestTokenParsing: ...@@ -78,17 +80,17 @@ class TestTokenParsing:
if indent > indent_level * 4: if indent > indent_level * 4:
assert indent == (indent_level + 1) * 4, str(indent) # indent must be 4 spaces assert indent == (indent_level + 1) * 4, str(indent) # indent must be 4 spaces
indent_level += 1 indent_level += 1
transformed.append(make_token('BEGIN_INDENT')) line = make_token('BEGIN_INDENT') + line
elif indent <= (indent_level - 1) * 4: elif indent <= (indent_level - 1) * 4:
while indent <= (indent_level - 1) * 4: while indent <= (indent_level - 1) * 4:
transformed.append(make_token('END_INDENT')) line = make_token('END_INDENT') + line
indent_level -= 1 indent_level -= 1
assert indent == (indent_level + 1) * 4 # indent must be 4 spaces assert indent == (indent_level + 1) * 4 # indent must be 4 spaces
else: else:
assert indent == indent_level * 4 assert indent == indent_level * 4
transformed.append(line) transformed.append(line)
while indent_level > 0: while indent_level > 0:
transformed.append(make_token('END_INDENT')) transformed[-1] += make_token('END_INDENT')
indent_level -= 1 indent_level -= 1
tokenized = '\n'.join(transformed) tokenized = '\n'.join(transformed)
# print(prettyprint_tokenized(tokenized)) # print(prettyprint_tokenized(tokenized))
...@@ -101,11 +103,11 @@ class TestTokenParsing: ...@@ -101,11 +103,11 @@ class TestTokenParsing:
for i, line in enumerate(lines): for i, line in enumerate(lines):
comment_pos = line.find('#') comment_pos = line.find('#')
if comment_pos >= 0: if comment_pos >= 0:
lines[i] = line[:comment_pos]
positions.append(pos + comment_pos)
offsets.append(len(line) - comment_pos)
pos += comment_pos pos += comment_pos
pos += len(line) lines[i] = line[:comment_pos]
positions.append(pos - offsets[-1])
offsets.append(offsets[-1] + len(line) - comment_pos)
pos += len(lines[i])
positions.append(pos) positions.append(pos)
offsets.append(offsets[-1]) offsets.append(offsets[-1])
return '\n'.join(lines), partial(source_map, srcmap=SourceMap(positions, offsets)) return '\n'.join(lines), partial(source_map, srcmap=SourceMap(positions, offsets))
...@@ -121,9 +123,9 @@ class TestTokenParsing: ...@@ -121,9 +123,9 @@ class TestTokenParsing:
self.grammar = grammar_provider(self.ebnf)() self.grammar = grammar_provider(self.ebnf)()
self.code = lstrip_docstring(""" self.code = lstrip_docstring("""
def func(x, y): def func(x, y):
if x > 0: if x > 0: # a comment
if y > 0: if y > 0:
print(x) # a comment print(x) # another comment
print(y) print(y)
""") """)
self.tokenized = self.preprocess_indentation(self.code) self.tokenized = self.preprocess_indentation(self.code)
...@@ -138,6 +140,9 @@ class TestTokenParsing: ...@@ -138,6 +140,9 @@ class TestTokenParsing:
'"%s" (%i) wrongly mapped onto "%s" (%i)' % \ '"%s" (%i) wrongly mapped onto "%s" (%i)' % \
(teststr, mapped_pos, orig_text[original_pos:original_pos + len(teststr)], original_pos) (teststr, mapped_pos, orig_text[original_pos:original_pos + len(teststr)], original_pos)
def test_strip_tokens(self):
assert self.code == strip_tokens(self.tokenized)
def test_parse_tokenized(self): def test_parse_tokenized(self):
cst = self.grammar(self.tokenized) cst = self.grammar(self.tokenized)
# for e in cst.collect_errors(self.tokenized): # for e in cst.collect_errors(self.tokenized):
...@@ -179,6 +184,17 @@ class TestTokenParsing: ...@@ -179,6 +184,17 @@ class TestTokenParsing:
self.verify_mapping("print(x)", self.code, tokenized, mapping) self.verify_mapping("print(x)", self.code, tokenized, mapping)
self.verify_mapping("print(y)", self.code, tokenized, mapping) self.verify_mapping("print(y)", self.code, tokenized, mapping)
def test_error_position(self):
orig_src = self.code.replace('#', '\x1b')
prepr = chain_preprocessors(self.preprocess_comments, self.preprocess_indentation)
result, messages, syntaxtree = compile_source(orig_src, prepr, self.grammar,
lambda i: i, lambda i: i)
for err in messages:
if self.code[err.orig_pos] == "#":
break
else:
assert False, "wrong error positions"
if __name__ == "__main__": if __name__ == "__main__":
# tp = TestTokenParsing() # tp = TestTokenParsing()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment