Commit 309c7376 authored by Eckhart Arnold's avatar Eckhart Arnold

- proper source mapping of destination characters that are mapped onto the same source

parent 663e5268
......@@ -1127,8 +1127,8 @@ class RegExp(Parser):
if match:
capture = match.group(0)
end = text.index(match.end())
# regular expresseion must never match preprocessor-tokens!
# TODO: Find a better solution here, e.g. static checking/re-mangling at compile time
# regular expression must never match preprocessor-tokens!
# TODO: Find a better solution here? e.g. static checking/re-mangling at compile time
i = capture.find(BEGIN_TOKEN)
if i >= 0:
capture = capture[:i]
......
......@@ -106,14 +106,17 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
positions.extend([d + 1, e + 1])
offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1)
if e + 1 < len(tokenized_source):
positions.append(len(tokenized_source))
offsets.append(offsets[-1])
# post conditions
assert len(positions) == len(offsets), '\n' + str(positions) + '\n' + str(offsets)
assert positions[0] == 0
assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1))
assert all(offsets[i] > offsets[i + 1] for i in range(len(offsets) - 1))
assert all(offsets[i] >= offsets[i + 1] for i in range(len(offsets) - 1))
return SourceMap(positions, offsets)
return SourceMap(positions, offsets, len(positions))
def source_map(position: int, srcmap: SourceMap) -> int:
......@@ -128,7 +131,10 @@ def source_map(position: int, srcmap: SourceMap) -> int:
Returns:
the mapped position
"""
i = bisect.bisect_right(srcmap[0], position)
i = bisect.bisect_right(srcmap.positions, position)
if i:
return position + srcmap[1][i - 1]
return min(position + srcmap.offsets[i - 1], srcmap.positions[i] + srcmap.offsets[i])
raise ValueError
# TODO: allow preprocessors to return their own source map (really a map or a function (easier)?)
# TODO: apply source maps in sequence.
......@@ -62,12 +62,12 @@ def tst_func():
doc = f.read()
print('\n\nParsing document: "%s"\n' % file)
result = parser(doc)
with open('REPORT/' + file[:-4]+'.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
transformer(result)
with open('REPORT/' + file[:-4]+'.ast', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
if toolkit.is_logging():
with open('REPORT/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
transformer(result)
with open('REPORT/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
parser.log_parsing_history__()
fail_on_error(doc, result)
transformer(result)
......
......@@ -60,7 +60,7 @@ class TestSourceMapping:
assert len(positions) == len(offsets)
assert positions[0] == 0
assert all(positions[i] < positions[i + 1] for i in range(len(positions) - 1))
assert all(offsets[i] > offsets[i + 1] for i in range(len(offsets) - 1))
assert all(offsets[i] >= offsets[i + 1] for i in range(len(offsets) - 1))
assert self.tokenized.find('AND') == self.code.find('AND') + len('CONJUNCTION') + 2
......@@ -124,13 +124,22 @@ class TestTokenParsing:
# print()
assert not cst.error_flag
def test_source_mapping(self):
def test_source_mapping_1(self):
self.verify_mapping("def func", self.code, self.tokenized)
self.verify_mapping("x > 0:", self.code, self.tokenized)
self.verify_mapping("if y > 0:", self.code, self.tokenized)
self.verify_mapping("print(x)", self.code, self.tokenized)
self.verify_mapping("print(y)", self.code, self.tokenized)
def test_source_mapping_2(self):
previous_index = 0
L = len(self.code)
for mapped_index in range(len(self.tokenized)):
index = source_map(mapped_index, self.srcmap)
assert previous_index <= index <= L, \
"%i <= %i <= %i violated" % (previous_index, index, L)
previous_index = index
if __name__ == "__main__":
# tp = TestTokenParsing()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment