Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit afcac5fd authored by di68kap's avatar di68kap
Browse files

preprocessor.py: Präprozessoren können Fehler zurückliefern

parent eed91a47
......@@ -355,12 +355,15 @@ def compile_source(source: str,
# preprocessing
errors =[]
if preprocessor is None:
source_text = original_text # type: str
source_mapping = gen_neutral_srcmap_func(source_text, source_name)
# lambda i: SourceLocation(source_name, 0, i) # type: SourceMapFunc
else:
_, source_text, source_mapping = preprocessor(original_text, source_name)
_, source_text, source_mapping, errors = preprocessor(original_text, source_name)
# TODO: Preprocessor-Fehler verarbeiten.
# parsing
......
......@@ -1881,10 +1881,10 @@ RE_INCLUDE = NEVER_MATCH_PATTERN
# by a pattern with group "name" here, e.g. r'\\input{{(?P<name>.*)}}'
def {NAME}Tokenizer(original_text) -> str:
def {NAME}Tokenizer(original_text) -> Tuple[str, List[Error]]:
# Here, a function body can be filled in that adds preprocessor tokens
# to the source code and returns the modified source.
return original_text
return original_text, []
def preprocessor_factory() -> PreprocessorFunc:
......
......@@ -46,16 +46,18 @@ However, in order to report errors, usually at least a line and
column-number
"""
import functools
import os
from typing import Iterable, Iterator, Union, List, Optional, Sequence, Tuple
from typing import Iterable, Iterator, Union, List, Sequence, NamedTuple, Dict, Callable
from DHParser.preprocess import SourceMapFunc, SourceLocation
from DHParser.stringview import StringView
from DHParser.toolkit import linebreaks, line_col, is_filename
__all__ = ('ErrorCode',
__all__ = ('SourceMap',
'SourceLocation',
'SourceMapFunc',
'ErrorCode',
'Error',
'is_fatal',
'is_error',
......@@ -109,6 +111,37 @@ __all__ = ('ErrorCode',
'RECURSION_DEPTH_LIMIT_HIT')
#######################################################################
#
# source mapping
#
#######################################################################
class SourceMap(NamedTuple):
original_name: str # nome or path or uri of the original source file
positions: List[int] # a list of locations
offsets: List[int] # the corresponding offsets to be added from these locations onward
file_names: List[str] # list of file_names to which the source locations relate
originals_dict: Dict[str, Union[str, StringView]] # File names => (included) source texts
class SourceLocation(NamedTuple):
original_name: str # the file name (or path or uri) of the source code
original_text: Union[str, StringView] # the source code itself
pos: int # a position within the code
SourceMapFunc = Union[Callable[[int], SourceLocation],
functools.partial]
#######################################################################
#
# error codes
#
#######################################################################
class ErrorCode(int):
pass
......@@ -177,6 +210,13 @@ AST_TRANSFORM_CRASH = ErrorCode(10300)
RECURSION_DEPTH_LIMIT_HIT = ErrorCode(10400)
#######################################################################
#
# class Error
#
#######################################################################
class Error:
"""The Error class encapsulates the all information for a single
error.
......@@ -394,7 +434,8 @@ def add_source_locations(errors: List[Error], source_mapping: SourceMapFunc):
if err.pos < 0:
raise ValueError(f'Illegal error position: {err.pos} Must be >= 0!')
if err.orig_pos >= 0:
raise ValueError('Source location must not be assigned more than once!')
raise ValueError('Source location must not be assigned more than once! '
'This can be circumvented by re-assigning "error.pos" first!')
err.orig_doc, orig_text, err.orig_pos = source_mapping(err.pos)
lbreaks = lb_dict.setdefault(orig_text, linebreaks(orig_text))
err.line, err.column = line_col(lbreaks, err.orig_pos)
......@@ -423,3 +464,4 @@ def canonical_error_strings(errors: List[Error]) -> List[str]:
else:
error_strings = []
return error_strings
......@@ -43,9 +43,9 @@ from DHParser.error import Error, ErrorCode, MANDATORY_CONTINUATION, \
CAPTURE_WITHOUT_PARSERNAME, CAPTURE_DROPPED_CONTENT_WARNING, LOOKAHEAD_WITH_OPTIONAL_PARSER, \
BADLY_NESTED_OPTIONAL_PARSER, BAD_ORDER_OF_ALTERNATIVES, BAD_MANDATORY_SETUP, \
OPTIONAL_REDUNDANTLY_NESTED_WARNING, CAPTURE_STACK_NOT_EMPTY, BAD_REPETITION_COUNT, \
AUTORETRIEVED_SYMBOL_NOT_CLEARED, RECURSION_DEPTH_LIMIT_HIT
AUTORETRIEVED_SYMBOL_NOT_CLEARED, RECURSION_DEPTH_LIMIT_HIT, SourceMapFunc
from DHParser.log import CallItem, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, SourceMapFunc
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import ChildrenType, Node, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, EMPTY_NODE, ResultType
......
......@@ -31,8 +31,9 @@ cannot completely be described entirely with context-free grammars.
import bisect
import functools
import os
from typing import Union, Optional, Callable, Tuple, NamedTuple, List, Dict, Any
from typing import Union, Optional, Callable, Tuple, NamedTuple, List, Any
from DHParser.error import Error, SourceMap, SourceLocation, SourceMapFunc
from DHParser.stringview import StringView
from DHParser.toolkit import re
......@@ -41,8 +42,6 @@ __all__ = ('RX_TOKEN_NAME',
'BEGIN_TOKEN',
'TOKEN_DELIMITER',
'END_TOKEN',
'SourceMap',
'SourceMapFunc',
'PreprocessorFunc',
'PreprocessorResult',
'Tokenizer',
......@@ -80,39 +79,22 @@ class IncludeInfo(NamedTuple):
file_name: str
class SourceMap(NamedTuple):
original_name: str # nome or path or uri of the original source file
positions: List[int] # a list of locations
offsets: List[int] # the corresponding offsets to be added from these locations onward
file_names: List[str] # list of file_names to which the source locations relate
originals_dict: Dict[str, Union[str, StringView]] # File names => (included) source texts
def has_includes(sm: SourceMap) -> bool:
return any(fname != sm.original_name for fname in sm.file_names)
class SourceLocation(NamedTuple):
original_name: str # the file name (or path or uri) of the source code
original_text: Union[str, StringView] # the source code itself
pos: int # a position within the code
SourceMapFunc = Union[Callable[[int], SourceLocation],
functools.partial]
class PreprocessorResult(NamedTuple):
original_text: Union[str, StringView]
preprocessed_text: Union[str, StringView]
back_mapping: SourceMapFunc
errors: List[Error]
FindIncludeFunc = Union[Callable[[str, int], IncludeInfo], # (document: str, start: int)
functools.partial]
PreprocessorFunc = Union[Callable[[str, str], PreprocessorResult], # text: str, filename: str
functools.partial]
Tokenizer = Union[Callable[[str], str], functools.partial]
Tokenizer = Union[Callable[[str], Tuple[str, List[Error]]], functools.partial]
# a functions that merely adds preprocessor tokens to a source text
......@@ -129,7 +111,8 @@ def nil_preprocessor(original_text: str, original_name: str) -> PreprocessorResu
"""
return PreprocessorResult(original_text,
original_text,
lambda i: SourceLocation(original_name, original_text, i))
lambda i: SourceLocation(original_name, original_text, i),
[])
def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocation:
......@@ -157,12 +140,13 @@ def _apply_preprocessors(original_text: str, original_name: str,
processed = original_text
mapping_chain = []
for prep in preprocessors:
_, processed, mapping_func = prep(processed, original_name)
_, processed, mapping_func, _ = prep(processed, original_name)
mapping_chain.append(mapping_func)
mapping_chain.reverse()
return PreprocessorResult(original_text,
processed,
functools.partial(_apply_mappings, mappings=mapping_chain))
functools.partial(_apply_mappings, mappings=mapping_chain),
[])
def chain_preprocessors(*preprocessors) -> PreprocessorFunc:
......@@ -305,11 +289,12 @@ def make_preprocessor(tokenizer: Tokenizer) -> PreprocessorFunc:
a function that merely adds preprocessor tokens to a source text and
returns the modified source.
"""
def preprocessor(original_text: str, original_name: str, *args) -> PreprocessorResult:
tokenized_text = tokenizer(original_text)
def preprocessor(original_text: str, original_name: str, *args) \
-> PreprocessorResult:
tokenized_text, errors = tokenizer(original_text)
srcmap = tokenized_to_original_mapping(tokenized_text, original_text, original_name)
mapping = functools.partial(source_map, srcmap=srcmap)
return PreprocessorResult(original_text, tokenized_text, mapping)
return PreprocessorResult(original_text, tokenized_text, mapping, errors)
return preprocessor
......@@ -439,6 +424,6 @@ def preprocess_includes(original_text: Optional[str],
original_text = f.read()
include_map, result = generate_include_map(original_name, original_text, find_next_include)
mapping_func = functools.partial(srcmap_includes, inclmap=include_map)
return PreprocessorResult(original_text, result, mapping_func)
return PreprocessorResult(original_text, result, mapping_func, [])
......@@ -591,8 +591,8 @@ from typing import Callable, cast, Iterator, Sequence, List, Set, Union, \
from DHParser.configuration import get_config_value, ALLOWED_PRESET_VALUES
from DHParser.error import Error, ErrorCode, ERROR, PARSER_STOPPED_BEFORE_END, \
add_source_locations
from DHParser.preprocess import SourceMapFunc, SourceLocation, gen_neutral_srcmap_func
add_source_locations, SourceLocation, SourceMapFunc
from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.stringview import StringView # , real_indices
from DHParser.toolkit import re, cython, linebreaks, line_col, JSONnull, \
validate_XML_attribute_value, fix_XML_attribute_value, lxml_XML_attribute_value, \
......
......@@ -63,8 +63,8 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
RE_INCLUDE = r'\\input{(?P<name>.*)}'
def LaTeXTokenizer(original_text) -> str:
return original_text
def LaTeXTokenizer(original_text) -> Tuple[str, List[Error]]:
return original_text, []
def preprocessor_factory() -> PreprocessorFunc:
......
......@@ -35,11 +35,12 @@ from DHParser.configuration import set_config_value
from DHParser.dsl import grammar_provider
from DHParser import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, PreprocessorResult, SourceMap, chain_preprocessors, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, PreprocessorResult, chain_preprocessors, \
strip_tokens, gen_find_include_func, preprocess_includes, IncludeInfo, make_preprocessor
from DHParser.error import SourceMap, Error
from DHParser.toolkit import lstrip_docstring, typing, re
from DHParser.testing import unique_name
from typing import Tuple, Dict
from typing import Tuple, Dict, List
class TestMakeToken:
......@@ -89,7 +90,7 @@ class TestSourceMapping:
pos = source_map(0, srcmap)
def tokenize_indentation(src: str) -> str:
def tokenize_indentation(src: str) -> Tuple[str, List[Error]]:
transformed = []
indent_level = 0
for line in src.split('\n'):
......@@ -112,7 +113,7 @@ def tokenize_indentation(src: str) -> str:
indent_level -= 1
tokenized = '\n'.join(transformed)
# print(prettyprint_tokenized(tokenized))
return tokenized
return tokenized, []
preprocess_indentation = make_preprocessor(tokenize_indentation)
......@@ -137,7 +138,8 @@ def preprocess_comments(src: str, src_name: str) -> PreprocessorResult:
positions,
offsets,
[src_name] * len(positions),
{src_name: src})))
{src_name: src})),
[])
class TestTokenParsing:
......@@ -157,7 +159,7 @@ class TestTokenParsing:
print(x) # another comment
print(y)
""")
tokenized = tokenize_indentation(code)
tokenized, _ = tokenize_indentation(code)
srcmap = tokenized_to_original_mapping(tokenized, code)
def verify_mapping(self, teststr, orig_text, preprocessed_text, mapping):
......@@ -194,7 +196,7 @@ class TestTokenParsing:
previous_index = index
def test_non_token_preprocessor(self):
_, tokenized, mapping = preprocess_comments(self.code, 'no_uri')
_, tokenized, mapping, _ = preprocess_comments(self.code, 'no_uri')
self.verify_mapping("def func", self.code, tokenized, mapping)
self.verify_mapping("x > 0:", self.code, tokenized, mapping)
self.verify_mapping("if y > 0:", self.code, tokenized, mapping)
......@@ -203,7 +205,7 @@ class TestTokenParsing:
def test_chained_preprocessors(self):
pchain = chain_preprocessors(preprocess_comments, preprocess_indentation)
_, tokenized, mapping = pchain(self.code, 'no_uri')
_, tokenized, mapping, _ = pchain(self.code, 'no_uri')
self.verify_mapping("def func", self.code, tokenized, mapping)
self.verify_mapping("x > 0:", self.code, tokenized, mapping)
self.verify_mapping("if y > 0:", self.code, tokenized, mapping)
......@@ -283,7 +285,7 @@ class TestIncludes:
def perform(main, sub):
self.create_files({'main.txt': main, 'sub.txt': sub})
find_func = gen_find_include_func(r'include\((?P<name>[^)\n]*)\)')
_, text, mapping = preprocess_includes(None, 'main.txt', find_func)
_, text, mapping, _ = preprocess_includes(None, 'main.txt', find_func)
# print(mapping)
assert text == main.replace('include(sub.txt)', 'abc'), text
for i in range(len(text)):
......@@ -306,7 +308,7 @@ class TestIncludes:
def perform(**ensemble):
self.create_files(ensemble)
find_func = gen_find_include_func(r'#include\((?P<name>[^)\n]*)\)')
_, text, mapping = preprocess_includes(None, 'main', find_func)
_, text, mapping, _ = preprocess_includes(None, 'main', find_func)
substrings = {}
for k, v in reversed(list(ensemble.items())):
for name, content in substrings.items():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment