Commit 05e4860e authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

preprocess.py: includes: work in progress

parent e867a83d
...@@ -30,7 +30,7 @@ cannot completely be described entirely with context-free grammars. ...@@ -30,7 +30,7 @@ cannot completely be described entirely with context-free grammars.
import bisect import bisect
import functools import functools
from typing import Union, Optional, Callable, Tuple, NamedTuple, List from typing import Union, Optional, Callable, Tuple, NamedTuple, List, Any
from DHParser.toolkit import re, dataclasses from DHParser.toolkit import re, dataclasses
...@@ -126,7 +126,7 @@ def nil_preprocessor(source_text: str, source_name: str) -> Preprocessed: ...@@ -126,7 +126,7 @@ def nil_preprocessor(source_text: str, source_name: str) -> Preprocessed:
""" """
A preprocessor that does nothing, i.e. just returns the input. A preprocessor that does nothing, i.e. just returns the input.
""" """
return source_text, lambda i: SourceLocation(source_name, i) return Preprocessed(source_text, lambda i: SourceLocation(source_name, i))
def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocation: def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocation:
...@@ -136,6 +136,7 @@ def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocat ...@@ -136,6 +136,7 @@ def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocat
position within a preprocessed source text and mappings should therefore position within a preprocessed source text and mappings should therefore
be a list of reverse-mappings in reversed order. be a list of reverse-mappings in reversed order.
""" """
filename = ''
for mapping in mappings: for mapping in mappings:
filename, position = mapping(position) filename, position = mapping(position)
return SourceLocation(filename, position) return SourceLocation(filename, position)
...@@ -156,7 +157,7 @@ def _apply_preprocessors(source_text: str, source_name: str, ...@@ -156,7 +157,7 @@ def _apply_preprocessors(source_text: str, source_name: str,
processed, mapping_func = with_source_mapping(prep(processed, source_name)) processed, mapping_func = with_source_mapping(prep(processed, source_name))
mapping_chain.append(mapping_func) mapping_chain.append(mapping_func)
mapping_chain.reverse() mapping_chain.reverse()
return processed, functools.partial(_apply_mappings, mappings=mapping_chain) return Preprocessed(processed, functools.partial(_apply_mappings, mappings=mapping_chain))
def chain_preprocessors(*preprocessors) -> PreprocessorFunc: def chain_preprocessors(*preprocessors) -> PreprocessorFunc:
...@@ -315,6 +316,22 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed: ...@@ -315,6 +316,22 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
####################################################################### #######################################################################
def generate_find_include_func(rx: Union[str, Any]) -> FindIncludeFunc:
if isinstance(rx, str): rx = re.compile(rx)
def find_include(text: str, begin: int) -> IncludeInfo:
nonlocal rx
iterator = rx.finditer(text, begin)
try:
m = next(iterator)
begin = m.start()
return IncludeInfo(begin, m.end() - begin, m.group('name'))
except StopIteration:
return IncludeInfo(-1, 0, '')
return find_include
def generate_include_map(source_name: str, def generate_include_map(source_name: str,
source_text: str, source_text: str,
find_next_include: FindIncludeFunc) -> Tuple[IncludeMap, str]: find_next_include: FindIncludeFunc) -> Tuple[IncludeMap, str]:
...@@ -324,49 +341,58 @@ def generate_include_map(source_name: str, ...@@ -324,49 +341,58 @@ def generate_include_map(source_name: str,
nonlocal file_names nonlocal file_names
map: IncludeMap = IncludeMap(source_name, [0], [0], [source_name]) map: IncludeMap = IncludeMap(source_name, [0], [0], [source_name])
text_chunks: List[str] = [] text_chunks: List[str] = []
source_offset: int = 0
if source_name in file_names: if source_name in file_names:
raise ValueError(f'Circular include of {source_name} detected!') raise ValueError(f'Circular include of {source_name} detected!')
file_names.add(source_name) file_names.add(source_name)
last_begin = -1 last_begin = -1
last_end = 0 last_end = 0
lengths = 0
begin, length, include_name = find_next(source_text, 0) begin, length, include_name = find_next(source_text, 0)
while begin >= 0: while begin >= 0:
assert begin > last_begin assert begin > last_begin
with open(include_name, 'r', encoding='utf-8') as f: with open(include_name, 'r', encoding='utf-8') as f:
include_text = f.read() include_text = f.read()
inner_map, inner_text = generate_map(include_name, include_text, find_next) inner_map, inner_text = generate_map(include_name, include_text, find_next)
inner_map.positions = [pos + begin for pos in inner_map.positions] inner_map.positions = [pos + begin - lengths + source_offset for pos in inner_map.positions]
inner_map.offsets = [offset - begin for offset in inner_map.offsets] inner_map.offsets = [offset - (source_offset + begin - lengths) for offset in inner_map.offsets]
if begin == map.positions[-1]: if begin == map.positions[-1]: # FEHLER!
map.file_names = map.file_names[:-1] + inner_map.file_names map.file_names = map.file_names[:-1] + inner_map.file_names[:-1]
map.positions = map.positions[:-1] + inner_map.positions map.positions = map.positions[:-1] + inner_map.positions[:-1]
map.offsets = map.offsets[:-1] + inner_map.offsets map.offsets = map.offsets[:-1] + inner_map.offsets[:-1]
text_chunks.append(inner_text) text_chunks.append(inner_text)
else: else:
text_chunks.append(source_text[last_end:begin]) text_chunks.append(source_text[last_end:begin])
map.file_names.append(include_name) source_offset += begin - last_end
map.positions += inner_map.positions map.file_names += inner_map.file_names[:-1]
map.offsets += inner_map.offsets map.positions += inner_map.positions[:-1]
map.offsets += inner_map.offsets[:-1]
text_chunks.append(inner_text) text_chunks.append(inner_text)
lengths += length
map.file_names.append(source_name) map.file_names.append(source_name)
map.positions.append(begin + inner_map.positions[-1]) map.positions.append(inner_map.positions[-1])
map.offsets.append(map.offsets[-1] - inner_map.positions[-1] + length) map.offsets.append(source_offset + lengths - inner_map.positions[-1])
last_end = begin + length last_end = begin + length
last_begin = begin last_begin = begin
begin, length, include_name = find_next(source_text, last_end) begin, length, include_name = find_next(source_text, last_end)
text_chunks.append(source_text[last_end:]) rest = source_text[last_end:]
if rest:
text_chunks.append(rest)
map.positions.append(map.positions[-1] + len(rest))
map.offsets.append(map.offsets[-1])
map.file_names.append(source_name)
file_names.remove(source_name)
return map, ''.join(text_chunks) return map, ''.join(text_chunks)
return generate_map(source_name, source_text, find_next_include) return generate_map(source_name, source_text, find_next_include)
def includes_map(position: int, inclmap: IncludeMap) -> SourceLocation: def srcmap_includes(position: int, inclmap: IncludeMap) -> SourceLocation:
i = bisect.bisect_right(inclmap.positions, position) i = bisect.bisect_right(inclmap.positions, position)
if i: if i:
return SourceLocation( return SourceLocation(
inclmap.file_names[i], inclmap.file_names[i - 1],
# min(position + inclmap.offsets[i - 1], inclmap.positions[i] + inclmap.offsets[i]) position + inclmap.offsets[i - 1])
inclmap.positions[i] + inclmap.offsets[i])
raise ValueError raise ValueError
...@@ -377,5 +403,7 @@ def preprocess_includes(source_name: str, ...@@ -377,5 +403,7 @@ def preprocess_includes(source_name: str,
with open(source_name, 'r', encoding='utf-8') as f: with open(source_name, 'r', encoding='utf-8') as f:
source_text = f.read() source_text = f.read()
include_map, result = generate_include_map(source_name, source_text, find_next_include) include_map, result = generate_include_map(source_name, source_text, find_next_include)
mapping_func = functools.partial(includes_map, inclmap=include_map) mapping_func = functools.partial(srcmap_includes, inclmap=include_map)
return Preprocessed(result, mapping_func) return Preprocessed(result, mapping_func)
...@@ -20,7 +20,11 @@ limitations under the License. ...@@ -20,7 +20,11 @@ limitations under the License.
""" """
import os import os
import platform
import shutil
import subprocess
import sys import sys
import time
scriptpath = os.path.dirname(__file__) or '.' scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..'))) sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
...@@ -32,9 +36,10 @@ from DHParser.dsl import grammar_provider ...@@ -32,9 +36,10 @@ from DHParser.dsl import grammar_provider
from DHParser import compile_source from DHParser import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \ from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \ BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \
strip_tokens strip_tokens, generate_find_include_func, preprocess_includes, IncludeInfo
from DHParser.toolkit import lstrip_docstring, typing from DHParser.toolkit import lstrip_docstring, typing, re
from typing import Tuple from DHParser.testing import TFFN
from typing import Tuple, Dict
class TestMakeToken: class TestMakeToken:
...@@ -211,6 +216,72 @@ class TestTokenParsing: ...@@ -211,6 +216,72 @@ class TestTokenParsing:
assert False, "wrong error positions" assert False, "wrong error positions"
class TestHelpers:
def test_generate_find_include_func(self):
rx = re.compile(r'include\((?P<name>[^)\n]*)\)')
find = generate_find_include_func(rx)
info = find('''321include(sub.txt)xyz''', 0)
assert info == IncludeInfo(3, 16, 'sub.txt')
def system(s: str) -> int:
# return os.system(s)
return subprocess.call(s, shell=True)
class TestIncludes:
def setup(self):
self.cwd = os.getcwd()
os.chdir(scriptpath)
# avoid race-condition
counter = 10
while counter > 0:
try:
self.dirname = TFFN('test_preprocess_data')
os.mkdir(TFFN('test_preprocess_data'))
counter = 0
except FileExistsError:
time.sleep(1)
counter -= 1
os.chdir(os.path.join(scriptpath, self.dirname))
def teardown(self):
os.chdir(scriptpath)
if os.path.exists(self.dirname) and os.path.isdir(self.dirname):
shutil.rmtree(self.dirname)
if os.path.exists(self.dirname) and not os.listdir(self.dirname):
os.rmdir(self.dirname)
os.chdir(self.cwd)
def create_files(self, files: Dict[str, str]):
for name, content in files.items():
with open(name, 'w', encoding='utf-8') as f:
f.write(content)
def test_simple_include(self):
def perform(main, sub):
self.create_files({'main.txt': main, 'sub.txt': sub})
find_func = generate_find_include_func(r'include\((?P<name>[^)\n]*)\)')
text, mapping = preprocess_includes('main.txt', None, find_func)
print(mapping)
assert text == main.replace('include(sub.txt)', 'abc'), text
for i in range(len(text)):
name, k = mapping(i)
print(i, k, name)
txt = main if name == 'main.txt' else sub
assert text[i] == txt[k], f'{i}: {text[i]} != {txt[k]} in {name}'
perform('include(sub.txt)xyz', 'abc')
perform('012include(sub.txt)xyz', 'abc')
perform('012xyzinclude(sub.txt)', 'abc')
perform('01include(sub.txt)2xyz', 'abc')
perform('012include(sub.txt)xyzinclude(sub.txt)hij', 'abc')
perform('012include(sub.txt)include(sub.txt)hij', 'abc')
perform('include(sub.txt)include(sub.txt)hijinclude(sub.txt)', 'abc')
if __name__ == "__main__": if __name__ == "__main__":
# tp = TestTokenParsing() # tp = TestTokenParsing()
# tp.setup() # tp.setup()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment