Commit e867a83d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

preprocess.py: preprocessor for includes added (not yet tested!)

parent 16a4c0de
# cython.* namespace for pure mode.
# copied from: https:/cython.org
# by Stefan Behnel
# copied from: https://cython.org
# License: Apache 2
from __future__ import absolute_import
......
#cython: infer_types=True
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
......@@ -30,15 +30,16 @@ cannot completely be described entirely with context-free grammars.
import bisect
import functools
from typing import Union, Callable, Tuple, NamedTuple, List
from typing import Union, Optional, Callable, Tuple, NamedTuple, List
from DHParser.toolkit import re
from DHParser.toolkit import re, dataclasses
__all__ = ('RX_TOKEN_NAME',
'BEGIN_TOKEN',
'TOKEN_DELIMITER',
'END_TOKEN',
'SourceMap',
'SourceMapFunc',
'PreprocessorFunc',
'PreprocessorResult',
......@@ -47,12 +48,18 @@ __all__ = ('RX_TOKEN_NAME',
'nil_preprocessor',
'chain_preprocessors',
'prettyprint_tokenized',
'SourceMap',
'neutral_mapping',
'tokenized_to_original_mapping',
'source_map',
'with_source_mapping')
#######################################################################
#
# Types and constants
#
#######################################################################
BEGIN_TOKEN = '\x1b'
TOKEN_DELIMITER = '\x1c'
END_TOKEN = '\x1d'
......@@ -63,23 +70,59 @@ RX_TOKEN_ARGUMENT = re.compile(r'[^\x1b\x1c\x1d]*')
RX_TOKEN = re.compile(r'\x1b(?P<name>\w+)\x1c(?P<argument>[^\x1b\x1c\x1d]*)\x1d')
class SourceMap(NamedTuple):
source_name: str # nome or path or uri of the original source file
positions: List[int] # a list of locations
offsets: List[int] # the corresponding offsets to be added from these locations onward
@dataclasses.dataclass
class SourceMap:
source_name: str # nome or path or uri of the original source file
positions: List[int] # a list of locations
offsets: List[int] # the corresponding offsets to be added from these locations onward
class SourceLocation(NamedTuple):
name: str # the file name (or path or uri) of the source code
source_name: str # the file name (or path or uri) of the source code
pos: int # a position within this file
SourceMapFunc = Union[Callable[[int], SourceLocation], functools.partial]
PreprocessorResult = Union[str, Tuple[str, SourceMapFunc]]
PreprocessorFunc = Union[Callable[[str, str], PreprocessorResult], functools.partial]
SourceMapFunc = Union[Callable[[int], SourceLocation],
functools.partial]
class Preprocessed(NamedTuple):
preprocessed_text: str
back_mapping: SourceMapFunc
@dataclasses.dataclass
class IncludeMap(SourceMap):
file_names: List[str] # list of file_names to which the source locations relate
def has_includes(self) -> bool:
L = len(self.file_names)
return L > 1 or (L == 1 and self.file_names[0] != self.source_name)
class IncludeInfo(NamedTuple):
begin: int
length: int
file_name: str
PreprocessorResult = Union[str, Preprocessed]
def nil_preprocessor(source_text: str, source_name: str) -> Tuple[str, SourceMapFunc]:
FindIncludeFunc = Union[Callable[[str, int], IncludeInfo], # (document: str, start: int)
functools.partial]
PreprocessorFunc = Union[Callable[[str, str], PreprocessorResult],
functools.partial]
#######################################################################
#
# Chaining of preprocessors
#
#######################################################################
def nil_preprocessor(source_text: str, source_name: str) -> Preprocessed:
"""
A preprocessor that does nothing, i.e. just returns the input.
"""
......@@ -100,7 +143,7 @@ def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocat
def _apply_preprocessors(source_text: str, source_name: str,
preprocessors: Tuple[PreprocessorFunc, ...]) \
-> Tuple[str, SourceMapFunc]:
-> Preprocessed:
"""
Applies several preprocessing functions sequentially to a source text
and returns the preprocessed text as well as a function that maps text-
......@@ -240,7 +283,7 @@ def source_map(position: int, srcmap: SourceMap) -> SourceLocation:
raise ValueError
def with_source_mapping(result: PreprocessorResult) -> Tuple[str, SourceMapFunc]:
def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
"""
Normalizes preprocessors results, by adding a mapping if a preprocessor
only returns the transformed source code and no mapping by itself. It is
......@@ -259,7 +302,7 @@ def with_source_mapping(result: PreprocessorResult) -> Tuple[str, SourceMapFunc]
if isinstance(result, str):
srcmap = tokenized_to_original_mapping(result)
mapping_func = functools.partial(source_map, srcmap=srcmap)
return result, mapping_func
return Preprocessed(result, mapping_func)
return result
......@@ -267,5 +310,72 @@ def with_source_mapping(result: PreprocessorResult) -> Tuple[str, SourceMapFunc]
#
# Includes - support for chaining source texts
#
# NOT YET TESTED!!!
#
#######################################################################
def generate_include_map(source_name: str,
source_text: str,
find_next_include: FindIncludeFunc) -> Tuple[IncludeMap, str]:
file_names: set = set()
def generate_map(source_name, source_text, find_next) -> Tuple[IncludeMap, str]:
nonlocal file_names
map: IncludeMap = IncludeMap(source_name, [0], [0], [source_name])
text_chunks: List[str] = []
if source_name in file_names:
raise ValueError(f'Circular include of {source_name} detected!')
file_names.add(source_name)
last_begin = -1
last_end = 0
begin, length, include_name = find_next(source_text, 0)
while begin >= 0:
assert begin > last_begin
with open(include_name, 'r', encoding='utf-8') as f:
include_text = f.read()
inner_map, inner_text = generate_map(include_name, include_text, find_next)
inner_map.positions = [pos + begin for pos in inner_map.positions]
inner_map.offsets = [offset - begin for offset in inner_map.offsets]
if begin == map.positions[-1]:
map.file_names = map.file_names[:-1] + inner_map.file_names
map.positions = map.positions[:-1] + inner_map.positions
map.offsets = map.offsets[:-1] + inner_map.offsets
text_chunks.append(inner_text)
else:
text_chunks.append(source_text[last_end:begin])
map.file_names.append(include_name)
map.positions += inner_map.positions
map.offsets += inner_map.offsets
text_chunks.append(inner_text)
map.file_names.append(source_name)
map.positions.append(begin + inner_map.positions[-1])
map.offsets.append(map.offsets[-1] - inner_map.positions[-1] + length)
last_end = begin + length
last_begin = begin
begin, length, include_name = find_next(source_text, last_end)
text_chunks.append(source_text[last_end:])
return map, ''.join(text_chunks)
return generate_map(source_name, source_text, find_next_include)
def includes_map(position: int, inclmap: IncludeMap) -> SourceLocation:
i = bisect.bisect_right(inclmap.positions, position)
if i:
return SourceLocation(
inclmap.file_names[i],
# min(position + inclmap.offsets[i - 1], inclmap.positions[i] + inclmap.offsets[i])
inclmap.positions[i] + inclmap.offsets[i])
raise ValueError
def preprocess_includes(source_name: str,
source_text: Optional[str],
find_next_include: FindIncludeFunc) -> Preprocessed:
if not source_text:
with open(source_name, 'r', encoding='utf-8') as f:
source_text = f.read()
include_map, result = generate_include_map(source_name, source_text, find_next_include)
mapping_func = functools.partial(includes_map, inclmap=include_map)
return Preprocessed(result, mapping_func)
......@@ -40,7 +40,7 @@ try:
except ImportError:
# import DHParser.Shadow as cython
cython_optimized = False
import DHParser.shadow_cython as cython
import DHParser.externallibs.shadow_cython as cython
__all__ = ('StringView', 'real_indices', 'EMPTY_STRING_VIEW', 'TextBuffer')
......
......@@ -43,6 +43,11 @@ try:
except ImportError:
import re
try:
import dataclasses
except ImportError:
from DHParser.externallibs import dataclasses36 as dataclasses
import typing
from typing import Any, Iterable, Sequence, Set, AbstractSet, Union, Dict, List, Tuple, \
Optional, Type
......@@ -56,16 +61,18 @@ try:
import cython
cython_optimized = cython.compiled # type: bool
if cython_optimized: # not ?
import DHParser.shadow_cython as cython
import DHParser.externallibs.shadow_cython as cython
except ImportError:
cython_optimized = False
import DHParser.shadow_cython as cython
import DHParser.externallibs.shadow_cython as cython
from DHParser.configuration import access_thread_locals, get_config_value, NEVER_MATCH_PATTERN
from DHParser.stringview import StringView
__all__ = ('typing',
're',
'dataclasses',
'Protocol',
'cython',
'cython_optimized',
......
......@@ -13,7 +13,6 @@ from DHParser.versionnumber import __version__
cythonize_modules = [
'DHParser/stringview.py',
'DHParser/toolkit.py',
'DHParser/preprocess.py',
'DHParser/error.py',
'DHParser/syntaxtree.py',
'DHParser/log.py',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment