Commit 19cfca77 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

preprocess.py: refactoring

parent 04c03c7c
......@@ -46,10 +46,10 @@ from DHParser.preprocess import with_source_mapping, PreprocessorFunc, SourceMap
from DHParser.syntaxtree import Node, RootNode, EMPTY_PTYPE, TreeContext
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, is_fatal, Error, \
from DHParser.error import is_error, is_fatal, Error, \
TREE_PROCESSING_CRASH, COMPILER_CRASH, AST_TRANSFORM_CRASH
from DHParser.log import log_parsing_history, log_ST, is_logging
from DHParser.toolkit import load_if_file, is_filename, identity
from DHParser.toolkit import load_if_file, is_filename
__all__ = ('CompilerError',
......
......@@ -377,9 +377,7 @@ def only_errors(messages: Iterable[Error], level: int = ERROR) -> Iterator[Error
#######################################################################
def adjust_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: Optional[SourceMapFunc] = None):
def adjust_error_locations(errors: List[Error], source_mapping: SourceMapFunc):
"""Adds (or adjusts) line and column numbers of error messages inplace.
Args:
......@@ -390,21 +388,6 @@ def adjust_error_locations(errors: List[Error],
source_mapping: A function that maps error positions to their
positions in the original source file.
"""
def relative_lc(lbreaks: List[int], pos: int, offset: int) -> Tuple[int, int]:
if offset == 0:
return line_col(lbreaks, pos)
else:
# assert pos >= offset, f"Precondition pos: {pos} >= offset: {offset} violated!"
base_l, base_c = line_col(lbreaks, offset)
l, c = line_col(lbreaks, offset + pos)
if l > base_l:
return l - base_l + 1, c
else:
return 1, c - base_c + 1
line_breaks = linebreaks(original_text)
if not source_mapping:
source_mapping = lambda pos: SourceLocation('', line_breaks, pos)
for err in errors:
assert err.pos >= 0
err.orig_doc, lbreaks, err.orig_pos = source_mapping(err.pos)
......
......@@ -33,6 +33,7 @@ import functools
import os
from typing import Union, Optional, Callable, Tuple, NamedTuple, List, Dict, Any
from DHParser.stringview import StringView
from DHParser.toolkit import re, linebreaks
......@@ -50,8 +51,9 @@ __all__ = ('RX_TOKEN_NAME',
'nil_preprocessor',
'chain_preprocessors',
'prettyprint_tokenized',
'gen_neutral_srcmap_func',
'tokenized_to_original_mapping',
'source_map',
# 'source_map',
'with_source_mapping',
'gen_find_include_func',
'preprocess_includes')
......@@ -223,6 +225,13 @@ def strip_tokens(tokenized: str) -> str:
#######################################################################
def gen_neutral_srcmap_func(source_text: Union[StringView, str], source_name: str='') -> SourceMapFunc:
"""Generates a source map functions that maps positions to itself."""
line_breaks = linebreaks(source_text or ' ')
if not source_name: source_name = 'UNKNOWN_FILE'
return lambda pos: SourceLocation(source_name, line_breaks, pos)
def tokenized_to_original_mapping(tokenized_text: str, source_name: str='UNKNOWN_FILE') -> SourceMap:
"""
Generates a source map for mapping positions in a text that has
......
......@@ -592,7 +592,7 @@ from typing import Callable, cast, Iterator, Sequence, List, Set, Union, \
from DHParser.configuration import get_config_value, ALLOWED_PRESET_VALUES
from DHParser.error import Error, ErrorCode, ERROR, PARSER_STOPPED_BEFORE_END, \
adjust_error_locations
from DHParser.preprocess import SourceMapFunc, SourceLocation
from DHParser.preprocess import SourceMapFunc, SourceLocation, gen_neutral_srcmap_func
from DHParser.stringview import StringView # , real_indices
from DHParser.toolkit import re, cython, linebreaks, line_col, JSONnull, \
validate_XML_attribute_value, fix_XML_attribute_value, lxml_XML_attribute_value, \
......@@ -2697,8 +2697,7 @@ class RootNode(Node):
# info on source code (to be carried along all stages of tree-processing)
self.source = source # type: str
if source_mapping is None:
line_breaks = linebreaks(source)
self.source_mapping = lambda pos: SourceLocation('', line_breaks, pos)
self.source_mapping = gen_neutral_srcmap_func(source)
else:
self.source_mapping = source_mapping # type: SourceMapFunc
self.lbreaks = linebreaks(source) # List[int]
......@@ -2775,8 +2774,7 @@ class RootNode(Node):
self.source = source
self.lbreaks = linebreaks(source)
if source_mapping is None:
line_breaks = linebreaks(source)
self.source_mapping = lambda pos: SourceLocation('', line_breaks, pos)
self.source_mapping = gen_neutral_srcmap_func(source)
else:
self.source_mapping = source_mapping # type: SourceMapFunc
if self.tag_name != '__not_yet_ready__':
......@@ -2796,7 +2794,7 @@ class RootNode(Node):
if id(node) in self.error_nodes:
self.error_nodes[id(self)] = self.error_nodes[id(node)]
if self.source:
adjust_error_locations(self.errors, self.source, self.source_mapping)
adjust_error_locations(self.errors, self.source_mapping)
return self
def add_error(self, node: Optional[Node], error: Error) -> 'RootNode':
......@@ -2834,7 +2832,7 @@ class RootNode(Node):
if node.pos == error.pos:
self.error_positions.setdefault(error.pos, set()).add(id(node))
if self.source:
adjust_error_locations([error], self.source, self.source_mapping)
adjust_error_locations([error], self.source_mapping)
self.errors.append(error)
self.error_flag = max(self.error_flag, error.code)
return self
......
......@@ -44,6 +44,7 @@ from DHParser.error import Error, is_error, adjust_error_locations, PARSER_LOOKA
PARSER_LOOKAHEAD_FAILURE_ONLY, MANDATORY_CONTINUATION_AT_EOF, AUTORETRIEVED_SYMBOL_NOT_CLEARED
from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_history
from DHParser.parse import Lookahead
from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.server import RX_CONTENT_LENGTH, RE_DATA_START, JSONRPC_HEADER_BYTES
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG
from DHParser.trace import set_tracer, all_descendants, trace_history
......@@ -435,10 +436,11 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
tests.setdefault('__cst__', {})[test_name] = cst
source_mapper = gen_neutral_srcmap_func(test_code)
errors = [] # type: List[Error]
if is_error(cst.error_flag) and not lookahead_artifact(cst):
errors = [e for e in cst.errors_sorted if e.code not in POSSIBLE_ARTIFACTS]
adjust_error_locations(errors, test_code)
adjust_error_locations(errors, source_mapper)
errata.append('Match test "%s" for parser "%s" failed:'
'\nExpr.: %s\n\n%s\n\n' %
(test_name, parser_name, md_codeblock(test_code),
......@@ -457,7 +459,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
ast_errors = [e for e in ast.errors if e not in old_errors]
ast_errors.sort(key=lambda e: e.pos)
if is_error(max(e.code for e in ast_errors) if ast_errors else 0):
adjust_error_locations(ast_errors, test_code)
adjust_error_locations(ast_errors, source_mapper)
if ast_errors:
if errata: errata[-1] = errata[-1].rstrip('\n')
ast_errors.append('\n')
......@@ -538,7 +540,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
with local_log_dir('./LOGS'):
log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
if cst.error_flag:
adjust_error_locations(cst.errors, test_code)
adjust_error_locations(cst.errors, source_mapper)
tests.setdefault('__msg__', {})[test_name] = \
"\n".join(str(e) for e in cst.errors_sorted)
if verbose:
......
......@@ -32,6 +32,7 @@ except ImportError:
import re
from DHParser.error import Error, ERROR, adjust_error_locations
from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.toolkit import linebreaks, line_col
......@@ -74,11 +75,12 @@ class TestErrorSupport:
def test_boundary_cases(self):
err = Error('Error-Test', 0, ERROR)
adjust_error_locations([err], '')
source_mapping = gen_neutral_srcmap_func(' ')
adjust_error_locations([err], source_mapping)
err = Error('Error-Test', 1, ERROR)
try:
adjust_error_locations([err], '')
adjust_error_locations([err], source_mapping)
assert False, "Error-location outside text. ValueError was expected but not raised"
except ValueError:
pass
......
......@@ -41,7 +41,8 @@ from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMor
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
parse_ebnf, DHPARSER_IMPORTS, compile_ebnf
from DHParser.dsl import grammar_provider, create_parser, raw_compileEBNF
from DHParser.dsl import grammar_provider, create_parser
from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.syntaxtree import Node, parse_sxpr
from DHParser.stringview import StringView
from DHParser.trace import set_tracer, trace_history, resume_notices_on
......@@ -1374,7 +1375,7 @@ def next_valid_letter(text, start, end):
gr = copy.deepcopy(get_ebnf_grammar())
resume_notices_on(gr)
cst = gr(EBNF_with_Errors)
adjust_error_locations(cst.errors, EBNF_with_Errors)
adjust_error_locations(cst.errors, gen_neutral_srcmap_func(EBNF_with_Errors))
locations = []
for error in cst.errors_sorted:
locations.append((error.line, error.column))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment