The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 309c7376 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- proper source mapping of destination characters that are mapped onto the same source

parent 663e5268
...@@ -1127,8 +1127,8 @@ class RegExp(Parser): ...@@ -1127,8 +1127,8 @@ class RegExp(Parser):
if match: if match:
capture = match.group(0) capture = match.group(0)
end = text.index(match.end()) end = text.index(match.end())
# regular expresseion must never match preprocessor-tokens! # regular expression must never match preprocessor-tokens!
# TODO: Find a better solution here, e.g. static checking/re-mangling at compile time # TODO: Find a better solution here? e.g. static checking/re-mangling at compile time
i = capture.find(BEGIN_TOKEN) i = capture.find(BEGIN_TOKEN)
if i >= 0: if i >= 0:
capture = capture[:i] capture = capture[:i]
......
...@@ -106,14 +106,17 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap: ...@@ -106,14 +106,17 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
positions.extend([d + 1, e + 1]) positions.extend([d + 1, e + 1])
offsets.extend([o + 1, o]) offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1) i = tokenized_source.find(BEGIN_TOKEN, e + 1)
if e + 1 < len(tokenized_source):
positions.append(len(tokenized_source))
offsets.append(offsets[-1])
# post conditions # post conditions
assert len(positions) == len(offsets), '\n' + str(positions) + '\n' + str(offsets) assert len(positions) == len(offsets), '\n' + str(positions) + '\n' + str(offsets)
assert positions[0] == 0 assert positions[0] == 0
assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1)) assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1))
assert all(offsets[i] > offsets[i + 1] for i in range(len(offsets) - 1)) assert all(offsets[i] >= offsets[i + 1] for i in range(len(offsets) - 1))
return SourceMap(positions, offsets) return SourceMap(positions, offsets, len(positions))
def source_map(position: int, srcmap: SourceMap) -> int: def source_map(position: int, srcmap: SourceMap) -> int:
...@@ -128,7 +131,10 @@ def source_map(position: int, srcmap: SourceMap) -> int: ...@@ -128,7 +131,10 @@ def source_map(position: int, srcmap: SourceMap) -> int:
Returns: Returns:
the mapped position the mapped position
""" """
i = bisect.bisect_right(srcmap[0], position) i = bisect.bisect_right(srcmap.positions, position)
if i: if i:
return position + srcmap[1][i - 1] return min(position + srcmap.offsets[i - 1], srcmap.positions[i] + srcmap.offsets[i])
raise ValueError raise ValueError
# TODO: allow preprocessors to return their own source map (really a map or a function (easier)?)
# TODO: apply source maps in sequence.
...@@ -62,12 +62,12 @@ def tst_func(): ...@@ -62,12 +62,12 @@ def tst_func():
doc = f.read() doc = f.read()
print('\n\nParsing document: "%s"\n' % file) print('\n\nParsing document: "%s"\n' % file)
result = parser(doc) result = parser(doc)
with open('REPORT/' + file[:-4]+'.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
transformer(result)
with open('REPORT/' + file[:-4]+'.ast', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
if toolkit.is_logging(): if toolkit.is_logging():
with open('REPORT/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
transformer(result)
with open('REPORT/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
parser.log_parsing_history__() parser.log_parsing_history__()
fail_on_error(doc, result) fail_on_error(doc, result)
transformer(result) transformer(result)
......
...@@ -60,7 +60,7 @@ class TestSourceMapping: ...@@ -60,7 +60,7 @@ class TestSourceMapping:
assert len(positions) == len(offsets) assert len(positions) == len(offsets)
assert positions[0] == 0 assert positions[0] == 0
assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1)) assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1))
assert all(offsets[i] > offsets[i + 1] for i in range(len(offsets) - 1)) assert all(offsets[i] >= offsets[i + 1] for i in range(len(offsets) - 1))
assert self.tokenized.find('AND') == self.code.find('AND') + len('CONJUNCTION') + 2 assert self.tokenized.find('AND') == self.code.find('AND') + len('CONJUNCTION') + 2
...@@ -124,13 +124,22 @@ class TestTokenParsing: ...@@ -124,13 +124,22 @@ class TestTokenParsing:
# print() # print()
assert not cst.error_flag assert not cst.error_flag
def test_source_mapping(self): def test_source_mapping_1(self):
self.verify_mapping("def func", self.code, self.tokenized) self.verify_mapping("def func", self.code, self.tokenized)
self.verify_mapping("x > 0:", self.code, self.tokenized) self.verify_mapping("x > 0:", self.code, self.tokenized)
self.verify_mapping("if y > 0:", self.code, self.tokenized) self.verify_mapping("if y > 0:", self.code, self.tokenized)
self.verify_mapping("print(x)", self.code, self.tokenized) self.verify_mapping("print(x)", self.code, self.tokenized)
self.verify_mapping("print(y)", self.code, self.tokenized) self.verify_mapping("print(y)", self.code, self.tokenized)
def test_source_mapping_2(self):
previous_index = 0
L = len(self.code)
for mapped_index in range(len(self.tokenized)):
index = source_map(mapped_index, self.srcmap)
assert previous_index <= index <= L, \
"%i <= %i <= %i violated" % (previous_index, index, L)
previous_index = index
if __name__ == "__main__": if __name__ == "__main__":
# tp = TestTokenParsing() # tp = TestTokenParsing()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment