Commit db24cec1 authored by Eckhart Arnold's avatar Eckhart Arnold

- refactoring: Scanner now named Preprocessor

parent e2d7ea45
......@@ -30,10 +30,10 @@ except ImportError:
from .typing34 import Any, cast, Tuple, Union
from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \
get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object
from DHParser.parsers import Grammar, Compiler, compile_source, nil_scanner, ScannerFunc
from DHParser.parsers import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc
......@@ -59,7 +59,7 @@ RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?'))
RX_WHITESPACE = re.compile('\s*')
SYMBOLS_SECTION = "SYMBOLS SECTION - Can be edited. Changes will be preserved."
SCANNER_SECTION = "SCANNER SECTION - Can be edited. Changes will be preserved."
PREPROCESSOR_SECTION = "PREPROCESSOR SECTION - Can be edited. Changes will be preserved."
PARSER_SECTION = "PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!"
AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
......@@ -75,11 +75,11 @@ try:
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
from DHParser.parsers import Grammar, Compiler, nil_preprocessor, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, ScannerFunc
last_value, counterpart, accumulate, PreprocessorFunc
from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
......@@ -98,7 +98,7 @@ def compile_src(source):
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_scanner(),
result = compile_source(source, get_preprocessor(),
get_grammar(),
get_transformer(), compiler)
return result
......@@ -176,7 +176,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def compileDSL(text_or_file: str,
scanner: ScannerFunc,
preprocessor: PreprocessorFunc,
dsl_grammar: Union[str, Grammar],
ast_transformation: TransformationFunc,
compiler: Compiler) -> Any:
......@@ -192,7 +192,7 @@ def compileDSL(text_or_file: str,
assert isinstance(compiler, Compiler)
parser, grammar_src = grammar_instance(dsl_grammar)
result, errors, AST = compile_source(text_or_file, scanner, parser,
result, errors, AST = compile_source(text_or_file, preprocessor, parser,
ast_transformation, compiler)
if errors:
src = load_if_file(text_or_file)
......@@ -204,7 +204,7 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
"""
Compiles an EBNF grammar file and returns the compiler object
that was used and which can now be queried for the result as well
as skeleton code for scanner, transformer and compiler objects.
as skeleton code for preprocessor, transformer and compiler objects.
Args:
ebnf_src(str): Either the file name of an EBNF grammar or
......@@ -218,14 +218,14 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
"""
grammar = get_ebnf_grammar()
compiler = get_ebnf_compiler(branding , ebnf_src)
compileDSL(ebnf_src, nil_scanner, grammar, EBNFTransformer, compiler)
compileDSL(ebnf_src, nil_preprocessor, grammar, EBNFTransformer, compiler)
return compiler
def compileEBNF(ebnf_src: str, branding="DSL") -> str:
"""
Compiles an EBNF source file and returns the source code of a
compiler suite with skeletons for scanner, transformer and
compiler suite with skeletons for preprocessor, transformer and
compiler.
Args:
......@@ -241,7 +241,7 @@ def compileEBNF(ebnf_src: str, branding="DSL") -> str:
compiler = raw_compileEBNF(ebnf_src, branding)
src = ["#/usr/bin/python\n",
SECTION_MARKER.format(marker=SYMBOLS_SECTION), DHPARSER_IMPORTS,
SECTION_MARKER.format(marker=SCANNER_SECTION), compiler.gen_scanner_skeleton(),
SECTION_MARKER.format(marker=PREPROCESSOR_SECTION), compiler.gen_preprocessor_skeleton(),
SECTION_MARKER.format(marker=PARSER_SECTION), compiler.result,
SECTION_MARKER.format(marker=AST_SECTION), compiler.gen_transformer_skeleton(),
SECTION_MARKER.format(marker=COMPILER_SECTION), compiler.gen_compiler_skeleton(),
......@@ -264,32 +264,32 @@ def parser_factory(ebnf_src: str, branding="DSL") -> Grammar:
A factory function for a grammar-parser for texts in the
language defined by ``ebnf_src``.
"""
grammar_src = compileDSL(ebnf_src, nil_scanner, get_ebnf_grammar(),
grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler(branding))
return compile_python_object(DHPARSER_IMPORTS + grammar_src, 'get_(?:\w+_)?grammar$')
def load_compiler_suite(compiler_suite: str) -> \
Tuple[ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]:
Tuple[PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]:
"""
Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
and returns it as a tuple (preprocessor, parser, ast, compiler).
Returns:
4-tuple (scanner function, parser class, ast transformer function, compiler class)
4-tuple (preprocessor function, parser class, ast transformer function, compiler class)
"""
global RX_SECTION_MARKER
assert isinstance(compiler_suite, str)
source = load_if_file(compiler_suite)
if is_python_code(compiler_suite):
try:
intro, imports, scanner_py, parser_py, ast_py, compiler_py, outro = \
intro, imports, preprocessor_py, parser_py, ast_py, compiler_py, outro = \
RX_SECTION_MARKER.split(source)
except ValueError as error:
raise AssertionError('File "' + compiler_suite + '" seems to be corrupted. '
'Please delete or repair file manually.')
# TODO: Compile in one step and pick parts from namespace later ?
scanner = compile_python_object(imports + scanner_py, 'get_(?:\w+_)?scanner$')
preprocessor = compile_python_object(imports + preprocessor_py, 'get_(?:\w+_)?preprocessor$')
parser = compile_python_object(imports + parser_py, 'get_(?:\w+_)?grammar$')
ast = compile_python_object(imports + ast_py, 'get_(?:\w+_)?transformer$')
else:
......@@ -299,12 +299,12 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), source)
scanner = get_ebnf_scanner
preprocessor = get_ebnf_preprocessor
parser = get_ebnf_grammar
ast = get_ebnf_transformer
compiler = compile_python_object(imports + compiler_py, 'get_(?:\w+_)?compiler$')
return scanner, parser, ast, compiler
return preprocessor, parser, ast, compiler
def is_outdated(compiler_suite: str, grammar_source: str) -> bool:
......@@ -327,7 +327,7 @@ def is_outdated(compiler_suite: str, grammar_source: str) -> bool:
True, if ``compiler_suite`` seems to be out of date.
"""
try:
scanner, grammar, ast, compiler = load_compiler_suite(compiler_suite)
preprocessor, grammar, ast, compiler = load_compiler_suite(compiler_suite)
return grammar_changed(grammar(), grammar_source)
except ValueError:
return True
......@@ -352,8 +352,8 @@ def run_compiler(text_or_file: str, compiler_suite: str) -> Any:
Raises:
CompilerError
"""
scanner, parser, ast, compiler = load_compiler_suite(compiler_suite)
return compileDSL(text_or_file, scanner(), parser(), ast(), compiler())
preprocessor, parser, ast, compiler = load_compiler_suite(compiler_suite)
return compileDSL(text_or_file, preprocessor(), parser(), ast(), compiler())
def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
......@@ -364,7 +364,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler.
skeletons for a preprocessor, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `compile_on_disk()`
......@@ -396,7 +396,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
if compiler_suite:
sfactory, pfactory, tfactory, cfactory = load_compiler_suite(compiler_suite)
else:
sfactory = get_ebnf_scanner
sfactory = get_ebnf_preprocessor
pfactory = get_ebnf_grammar
tfactory = get_ebnf_transformer
cfactory = get_ebnf_compiler
......@@ -408,7 +408,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
elif cfactory == get_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given
ebnf_compiler = cast(EBNFCompiler, compiler1)
global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \
global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \
AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \
DHPARSER_MAIN, DHPARSER_IMPORTS
f = None
......@@ -416,9 +416,9 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f = open(rootname + 'Compiler.py', 'r', encoding="utf-8")
source = f.read()
sections = RX_SECTION_MARKER.split(source)
intro, imports, scanner, parser, ast, compiler, outro = sections
intro, imports, preprocessor, parser, ast, compiler, outro = sections
except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, scanner, parser, ast, compiler, outro = '', '', '', '', '', '', ''
intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error:
name = '"' + rootname + 'Compiler.py"'
raise ValueError('Could not identify all required sections in ' + name +
......@@ -434,8 +434,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(scanner):
scanner = ebnf_compiler.gen_scanner_skeleton()
if RX_WHITESPACE.fullmatch(preprocessor):
preprocessor = ebnf_compiler.gen_preprocessor_skeleton()
if RX_WHITESPACE.fullmatch(ast):
ast = ebnf_compiler.gen_transformer_skeleton()
if RX_WHITESPACE.fullmatch(compiler):
......@@ -446,8 +446,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f.write(intro)
f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION))
f.write(imports)
f.write(SECTION_MARKER.format(marker=SCANNER_SECTION))
f.write(scanner)
f.write(SECTION_MARKER.format(marker=PREPROCESSOR_SECTION))
f.write(preprocessor)
f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
f.write(result)
f.write(SECTION_MARKER.format(marker=AST_SECTION))
......
......@@ -29,17 +29,16 @@ except ImportError:
from .typing34 import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
from DHParser.parsers import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
PreprocessorFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc
from DHParser.versionnumber import __version__
__all__ = ['get_ebnf_scanner',
__all__ = ['get_ebnf_preprocessor',
'get_ebnf_grammar',
'get_ebnf_transformer',
'get_ebnf_compiler',
......@@ -48,7 +47,7 @@ __all__ = ['get_ebnf_scanner',
'EBNFCompilerError',
'EBNFCompiler',
'grammar_changed',
'ScannerFactoryFunc',
'PreprocessorFactoryFunc',
'ParserFactoryFunc',
'TransformerFactoryFunc',
'CompilerFactoryFunc']
......@@ -61,8 +60,8 @@ __all__ = ['get_ebnf_scanner',
########################################################################
def get_ebnf_scanner() -> ScannerFunc:
return nil_scanner
def get_ebnf_preprocessor() -> PreprocessorFunc:
return nil_preprocessor
########################################################################
......@@ -247,15 +246,14 @@ def get_ebnf_transformer() -> TransformationFunc:
########################################################################
ScannerFactoryFunc = Callable[[], ScannerFunc]
PreprocessorFactoryFunc = Callable[[], PreprocessorFunc]
ParserFactoryFunc = Callable[[], Grammar]
TransformerFactoryFunc = Callable[[], TransformationFunc]
CompilerFactoryFunc = Callable[[], Compiler]
SCANNER_FACTORY = '''
def get_scanner() -> ScannerFunc:
return {NAME}Scanner
PREPROCESSOR_FACTORY = '''
def get_preprocessor() -> PreprocessorFunc:
return {NAME}Preprocessor
'''
......@@ -335,21 +333,20 @@ class EBNFCompiler(Compiler):
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'filter': dict(), # alt. 'filter'
'testing': False }
'tokens': set(), # alt. 'preprocessor_tokens'
'filter': dict(), # alt. 'filter'
'testing': False}
@property
def result(self) -> str:
return self._result
# methods for generating skeleton code for preprocessor, transformer, and compiler
# methods for generating skeleton code for scanner, transformer, and compiler
def gen_scanner_skeleton(self) -> str:
name = self.grammar_name + "Scanner"
def gen_preprocessor_skeleton(self) -> str:
name = self.grammar_name + "Preprocessor"
return "def %s(text):\n return text\n" % name \
+ SCANNER_FACTORY.format(NAME=self.grammar_name)
+ PREPROCESSOR_FACTORY.format(NAME=self.grammar_name)
def gen_transformer_skeleton(self) -> str:
......@@ -515,7 +512,7 @@ class EBNFCompiler(Compiler):
' end with a doube underscore "__".' % rule)
elif rule in self.directives['tokens']:
node.add_error('Symbol "%s" has already been defined as '
'a scanner token.' % rule)
'a preprocessor token.' % rule)
elif keyword.iskeyword(rule):
node.add_error('Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the future.)')
......@@ -595,7 +592,7 @@ class EBNFCompiler(Compiler):
else {} if 'none' in value else value
self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}:
elif key in {'tokens', 'preprocessor_tokens'}:
self.directives['tokens'] |= self.compile(node.children[1])
elif key.endswith('_filter'):
......@@ -687,7 +684,7 @@ class EBNFCompiler(Compiler):
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result)
if symbol in self.directives['tokens']:
return 'ScannerToken("' + symbol + '")'
return 'PreprocessorToken("' + symbol + '")'
else:
self.current_symbols.append(node)
if symbol not in self.symbols:
......
......@@ -73,17 +73,16 @@ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Pa
Node, TransformationFunc
from DHParser.toolkit import load_if_file, error_messages
__all__ = ['ScannerFunc',
__all__ = ['PreprocessorFunc',
'HistoryRecord',
'Parser',
'Grammar',
'RX_SCANNER_TOKEN',
'BEGIN_SCANNER_TOKEN',
'END_SCANNER_TOKEN',
'RX_PREPROCESSOR_TOKEN',
'BEGIN_TOKEN',
'END_TOKEN',
'make_token',
'nil_scanner',
'ScannerToken',
'nil_preprocessor',
'PreprocessorToken',
'RegExp',
'RE',
'Token',
......@@ -121,7 +120,7 @@ __all__ = ['ScannerFunc',
########################################################################
ScannerFunc = Union[Callable[[str], str], partial]
PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH = 20 if platform.python_implementation() == "PyPy" \
......@@ -610,66 +609,65 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
RX_SCANNER_TOKEN = re.compile('\w+')
BEGIN_SCANNER_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c'
RX_PREPROCESSOR_TOKEN = re.compile('\w+')
BEGIN_TOKEN = '\x1b'
END_TOKEN = '\x1c'
def make_token(token: str, argument: str = '') -> str:
"""
Turns the ``token`` and ``argument`` into a special token that
will be caught by the `ScannerToken`-parser.
will be caught by the `PreprocessorToken`-parser.
This function is a support function that should be used by scanners
to inject scanner tokens into the source text.
This function is a support function that should be used by
preprocessors to inject preprocessor tokens into the source text.
"""
assert RX_SCANNER_TOKEN.match(token)
assert argument.find(BEGIN_SCANNER_TOKEN) < 0
assert argument.find(END_SCANNER_TOKEN) < 0
assert RX_PREPROCESSOR_TOKEN.match(token)
assert argument.find(BEGIN_TOKEN) < 0
assert argument.find(END_TOKEN) < 0
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN
return BEGIN_TOKEN + token + argument + END_TOKEN
def nil_scanner(text: str) -> str:
def nil_preprocessor(text: str) -> str:
return text
class ScannerToken(Parser):
class PreprocessorToken(Parser):
"""
Parses tokens that have been inserted by a Scanner.
Parses tokens that have been inserted by a preprocessor.
Scanners can generate Tokens with the ``make_token``-function.
Preprocessors can generate Tokens with the ``make_token``-function.
These tokens start and end with magic characters that can only be
matched by the ScannerToken Parser. Scanner tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of an
indented block. Otherwise indented block are difficult to handle
with parsing expression grammars.
matched by the PreprocessorToken Parser. Such tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of a
quoted block, for example.
"""
def __init__(self, scanner_token: str) -> None:
assert scanner_token and scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token)
def __init__(self, token: str) -> None:
assert token and token.isupper()
assert RX_PREPROCESSOR_TOKEN.match(token)
super(PreprocessorToken, self).__init__(token)
def __call__(self, text: str) -> Tuple[Node, str]:
if text[0:1] == BEGIN_SCANNER_TOKEN:
end = text.find(END_SCANNER_TOKEN, 1)
if text[0:1] == BEGIN_TOKEN:
end = text.find(END_TOKEN, 1)
if end < 0:
node = Node(self, '').add_error(
'END_SCANNER_TOKEN delimiter missing from scanner token. '
'(Most likely due to a scanner bug!)') # type: Node
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)') # type: Node
return node, text[1:]
elif end == 0:
node = Node(self, '').add_error(
'Scanner token cannot have zero length. '
'(Most likely due to a scanner bug!)')
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)')
return node, text[2:]
elif text.find(BEGIN_SCANNER_TOKEN, 1, end) >= 0:
elif text.find(BEGIN_TOKEN, 1, end) >= 0:
node = Node(self, text[len(self.name) + 1:end])
node.add_error(
'Scanner tokens must not be nested or contain '
'BEGIN_SCANNER_TOKEN delimiter as part of their argument. '
'(Most likely due to a scanner bug!)')
'Preprocessor-tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a preprocessor bug!)')
return node, text[end:]
if text[1:len(self.name) + 1] == self.name:
return Node(self, text[len(self.name) + 1:end]), \
......@@ -700,7 +698,7 @@ class RegExp(Parser):
return RegExp(regexp, self.name)
def __call__(self, text: str) -> Tuple[Node, str]:
match = text[0:1] != BEGIN_SCANNER_TOKEN and self.regexp.match(text) # ESC starts a scanner token.
match = text[0:1] != BEGIN_TOKEN and self.regexp.match(text) # ESC starts a preprocessor token.
if match:
end = match.end()
return Node(self, text[:end]), text[end:]
......@@ -1400,7 +1398,7 @@ class Compiler:
def compile_source(source: str,
scanner: ScannerFunc, # str -> str
preprocessor: PreprocessorFunc, # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler): # Node (AST) -> Any
......@@ -1416,8 +1414,8 @@ def compile_source(source: str,
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
scanner (function): text -> text. A scanner function or None,
if no scanner is needed.
preprocessor (function): text -> text. A preprocessor function
or None, if no preprocessor is needed.
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
......@@ -1435,8 +1433,8 @@ def compile_source(source: str,
"""
source_text = load_if_file(source)
log_file_name = logfile_basename(source, compiler)
if scanner is not None:
source_text = scanner(source_text)
if preprocessor is not None:
source_text = preprocessor(source_text)
syntax_tree = parser(source_text)
if is_logging():
syntax_tree.log(log_file_name + '.cst')
......
......@@ -98,6 +98,7 @@ class ParserBase:
def repr(self) -> str:
return self.name if self.name else repr(self)
class MockParser(ParserBase):
"""
MockParser objects can be used to reconstruct syntax trees from a
......@@ -583,7 +584,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback-functions registered
in the ``calltable``-dictionary.
The most important use case is the transformation of a concrete
syntax tree into an abstract tree (AST). But it is also imaginable
to employ tree-traversal for the semantic analysis of the AST.
......@@ -598,16 +599,16 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
'~': always called (after any other processing function)
Args:
root_node (Node): The root-node of the syntax tree to be traversed
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): node key -> sequence of functions that
will be applied to matching nodes in order. This dictionary
is interpreted as a ``compact_table``. See
is interpreted as a ``compact_table``. See
``toolkit.expand_table`` or ``EBNFCompiler.EBNFTransTable``
key_func (function): A mapping key_func(node) -> keystr. The default
key_func yields node.parser.name.
Example:
table = { "term": [replace_by_single_child, flatten],
table = { "term": [replace_by_single_child, flatten],
"factor, flowmarker, retrieveop": replace_by_single_child }
traverse(node, table)
"""
......@@ -656,19 +657,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# ------------------------------------------------
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
def replace_by_single_child(node):
"""Remove single branch node, replacing it by its immediate descendant.
(In case the descendant's name is empty (i.e. anonymous) the
......@@ -691,6 +679,19 @@ def reduce_single_child(node):
node.result = node.result[0].result
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
@transformation_factory(Callable)
def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
"""Flattens all children, that fulfil the given `condition`
......
......@@ -30,6 +30,7 @@ the directory exists and raises an error if a file with the same name
already exists.
"""
import codecs
import collections
import contextlib
import hashlib
......@@ -38,6 +39,7 @@ try:
import regex as re
except ImportError:
import re
import sys
try:
from typing import Any, List, Tuple
except ImportError:
......@@ -389,3 +391,13 @@ def compile_python_object(python_src, catch_obj_regex=""):
return namespace[matches[0]] if matches else None
else:
return namespace
try:
if sys.stdout.encoding.upper() != "UTF-8":
# make sure that `print()` does not raise an error on
# non-ASCII characters:
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
except AttributeError:
# somebody has already taken care of this !?
pass
This diff is collapsed.
......@@ -18,7 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
# TODO: This is still a stub...
# TODO: This is still a stub...
import os
import sys
......@@ -26,7 +26,7 @@ from functools import partial
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.parsers import compile_source, nil_scanner
from DHParser.parsers import compile_source, nil_preprocessor
from DHParser.toolkit import logging
......@@ -53,7 +53,7 @@ def selftest(file_name):
# compile the grammar again using the result of the previous
# compilation as parser
for i in range(1):
result = compileDSL(grammar, nil_scanner, result, transformer, compiler)
result = compileDSL(grammar, nil_preprocessor, result, transformer, compiler)
print(result)
return result
......
......@@ -193,7 +193,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Optional', 'mixin_comment',
'ZeroOrMore', 'OneOrMore', 'Sequence', 'Alternative', 'Forward',