11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit db24cec1 authored by Eckhart Arnold's avatar Eckhart Arnold

- refactoring: Scanner now named Preprocessor

parent e2d7ea45
...@@ -30,10 +30,10 @@ except ImportError: ...@@ -30,10 +30,10 @@ except ImportError:
from .typing34 import Any, cast, Tuple, Union from .typing34 import Any, cast, Tuple, Union
from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \ from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \
get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \ get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object
from DHParser.parsers import Grammar, Compiler, compile_source, nil_scanner, ScannerFunc from DHParser.parsers import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc from DHParser.syntaxtree import Node, TransformationFunc
...@@ -59,7 +59,7 @@ RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?')) ...@@ -59,7 +59,7 @@ RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?'))
RX_WHITESPACE = re.compile('\s*') RX_WHITESPACE = re.compile('\s*')
SYMBOLS_SECTION = "SYMBOLS SECTION - Can be edited. Changes will be preserved." SYMBOLS_SECTION = "SYMBOLS SECTION - Can be edited. Changes will be preserved."
SCANNER_SECTION = "SCANNER SECTION - Can be edited. Changes will be preserved." PREPROCESSOR_SECTION = "PREPROCESSOR SECTION - Can be edited. Changes will be preserved."
PARSER_SECTION = "PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!" PARSER_SECTION = "PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!"
AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved." AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved." COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
...@@ -75,11 +75,11 @@ try: ...@@ -75,11 +75,11 @@ try:
except ImportError: except ImportError:
import re import re
from DHParser.toolkit import logging, is_filename, load_if_file from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \\ from DHParser.parsers import Grammar, Compiler, nil_preprocessor, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, ScannerFunc last_value, counterpart, accumulate, PreprocessorFunc
from DHParser.syntaxtree import Node, traverse, remove_children_if, \\ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\ reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
...@@ -98,7 +98,7 @@ def compile_src(source): ...@@ -98,7 +98,7 @@ def compile_src(source):
cname = compiler.__class__.__name__ cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \\ log_file_name = os.path.basename(os.path.splitext(source)[0]) \\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out' if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_scanner(), result = compile_source(source, get_preprocessor(),
get_grammar(), get_grammar(),
get_transformer(), compiler) get_transformer(), compiler)
return result return result
...@@ -176,7 +176,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]: ...@@ -176,7 +176,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def compileDSL(text_or_file: str, def compileDSL(text_or_file: str,
scanner: ScannerFunc, preprocessor: PreprocessorFunc,
dsl_grammar: Union[str, Grammar], dsl_grammar: Union[str, Grammar],
ast_transformation: TransformationFunc, ast_transformation: TransformationFunc,
compiler: Compiler) -> Any: compiler: Compiler) -> Any:
...@@ -192,7 +192,7 @@ def compileDSL(text_or_file: str, ...@@ -192,7 +192,7 @@ def compileDSL(text_or_file: str,
assert isinstance(compiler, Compiler) assert isinstance(compiler, Compiler)
parser, grammar_src = grammar_instance(dsl_grammar) parser, grammar_src = grammar_instance(dsl_grammar)
result, errors, AST = compile_source(text_or_file, scanner, parser, result, errors, AST = compile_source(text_or_file, preprocessor, parser,
ast_transformation, compiler) ast_transformation, compiler)
if errors: if errors:
src = load_if_file(text_or_file) src = load_if_file(text_or_file)
...@@ -204,7 +204,7 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler: ...@@ -204,7 +204,7 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
""" """
Compiles an EBNF grammar file and returns the compiler object Compiles an EBNF grammar file and returns the compiler object
that was used and which can now be queried for the result as well that was used and which can now be queried for the result as well
as skeleton code for scanner, transformer and compiler objects. as skeleton code for preprocessor, transformer and compiler objects.
Args: Args:
ebnf_src(str): Either the file name of an EBNF grammar or ebnf_src(str): Either the file name of an EBNF grammar or
...@@ -218,14 +218,14 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler: ...@@ -218,14 +218,14 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
""" """
grammar = get_ebnf_grammar() grammar = get_ebnf_grammar()
compiler = get_ebnf_compiler(branding , ebnf_src) compiler = get_ebnf_compiler(branding , ebnf_src)
compileDSL(ebnf_src, nil_scanner, grammar, EBNFTransformer, compiler) compileDSL(ebnf_src, nil_preprocessor, grammar, EBNFTransformer, compiler)
return compiler return compiler
def compileEBNF(ebnf_src: str, branding="DSL") -> str: def compileEBNF(ebnf_src: str, branding="DSL") -> str:
""" """
Compiles an EBNF source file and returns the source code of a Compiles an EBNF source file and returns the source code of a
compiler suite with skeletons for scanner, transformer and compiler suite with skeletons for preprocessor, transformer and
compiler. compiler.
Args: Args:
...@@ -241,7 +241,7 @@ def compileEBNF(ebnf_src: str, branding="DSL") -> str: ...@@ -241,7 +241,7 @@ def compileEBNF(ebnf_src: str, branding="DSL") -> str:
compiler = raw_compileEBNF(ebnf_src, branding) compiler = raw_compileEBNF(ebnf_src, branding)
src = ["#/usr/bin/python\n", src = ["#/usr/bin/python\n",
SECTION_MARKER.format(marker=SYMBOLS_SECTION), DHPARSER_IMPORTS, SECTION_MARKER.format(marker=SYMBOLS_SECTION), DHPARSER_IMPORTS,
SECTION_MARKER.format(marker=SCANNER_SECTION), compiler.gen_scanner_skeleton(), SECTION_MARKER.format(marker=PREPROCESSOR_SECTION), compiler.gen_preprocessor_skeleton(),
SECTION_MARKER.format(marker=PARSER_SECTION), compiler.result, SECTION_MARKER.format(marker=PARSER_SECTION), compiler.result,
SECTION_MARKER.format(marker=AST_SECTION), compiler.gen_transformer_skeleton(), SECTION_MARKER.format(marker=AST_SECTION), compiler.gen_transformer_skeleton(),
SECTION_MARKER.format(marker=COMPILER_SECTION), compiler.gen_compiler_skeleton(), SECTION_MARKER.format(marker=COMPILER_SECTION), compiler.gen_compiler_skeleton(),
...@@ -264,32 +264,32 @@ def parser_factory(ebnf_src: str, branding="DSL") -> Grammar: ...@@ -264,32 +264,32 @@ def parser_factory(ebnf_src: str, branding="DSL") -> Grammar:
A factory function for a grammar-parser for texts in the A factory function for a grammar-parser for texts in the
language defined by ``ebnf_src``. language defined by ``ebnf_src``.
""" """
grammar_src = compileDSL(ebnf_src, nil_scanner, get_ebnf_grammar(), grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler(branding)) get_ebnf_transformer(), get_ebnf_compiler(branding))
return compile_python_object(DHPARSER_IMPORTS + grammar_src, 'get_(?:\w+_)?grammar$') return compile_python_object(DHPARSER_IMPORTS + grammar_src, 'get_(?:\w+_)?grammar$')
def load_compiler_suite(compiler_suite: str) -> \ def load_compiler_suite(compiler_suite: str) -> \
Tuple[ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]: Tuple[PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]:
""" """
Extracts a compiler suite from file or string ``compiler suite`` Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler). and returns it as a tuple (preprocessor, parser, ast, compiler).
Returns: Returns:
4-tuple (scanner function, parser class, ast transformer function, compiler class) 4-tuple (preprocessor function, parser class, ast transformer function, compiler class)
""" """
global RX_SECTION_MARKER global RX_SECTION_MARKER
assert isinstance(compiler_suite, str) assert isinstance(compiler_suite, str)
source = load_if_file(compiler_suite) source = load_if_file(compiler_suite)
if is_python_code(compiler_suite): if is_python_code(compiler_suite):
try: try:
intro, imports, scanner_py, parser_py, ast_py, compiler_py, outro = \ intro, imports, preprocessor_py, parser_py, ast_py, compiler_py, outro = \
RX_SECTION_MARKER.split(source) RX_SECTION_MARKER.split(source)
except ValueError as error: except ValueError as error:
raise AssertionError('File "' + compiler_suite + '" seems to be corrupted. ' raise AssertionError('File "' + compiler_suite + '" seems to be corrupted. '
'Please delete or repair file manually.') 'Please delete or repair file manually.')
# TODO: Compile in one step and pick parts from namespace later ? # TODO: Compile in one step and pick parts from namespace later ?
scanner = compile_python_object(imports + scanner_py, 'get_(?:\w+_)?scanner$') preprocessor = compile_python_object(imports + preprocessor_py, 'get_(?:\w+_)?preprocessor$')
parser = compile_python_object(imports + parser_py, 'get_(?:\w+_)?grammar$') parser = compile_python_object(imports + parser_py, 'get_(?:\w+_)?grammar$')
ast = compile_python_object(imports + ast_py, 'get_(?:\w+_)?transformer$') ast = compile_python_object(imports + ast_py, 'get_(?:\w+_)?transformer$')
else: else:
...@@ -299,12 +299,12 @@ def load_compiler_suite(compiler_suite: str) -> \ ...@@ -299,12 +299,12 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler()) get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors: if errors:
raise GrammarError('\n\n'.join(errors), source) raise GrammarError('\n\n'.join(errors), source)
scanner = get_ebnf_scanner preprocessor = get_ebnf_preprocessor
parser = get_ebnf_grammar parser = get_ebnf_grammar
ast = get_ebnf_transformer ast = get_ebnf_transformer
compiler = compile_python_object(imports + compiler_py, 'get_(?:\w+_)?compiler$') compiler = compile_python_object(imports + compiler_py, 'get_(?:\w+_)?compiler$')
return scanner, parser, ast, compiler return preprocessor, parser, ast, compiler
def is_outdated(compiler_suite: str, grammar_source: str) -> bool: def is_outdated(compiler_suite: str, grammar_source: str) -> bool:
...@@ -327,7 +327,7 @@ def is_outdated(compiler_suite: str, grammar_source: str) -> bool: ...@@ -327,7 +327,7 @@ def is_outdated(compiler_suite: str, grammar_source: str) -> bool:
True, if ``compiler_suite`` seems to be out of date. True, if ``compiler_suite`` seems to be out of date.
""" """
try: try:
scanner, grammar, ast, compiler = load_compiler_suite(compiler_suite) preprocessor, grammar, ast, compiler = load_compiler_suite(compiler_suite)
return grammar_changed(grammar(), grammar_source) return grammar_changed(grammar(), grammar_source)
except ValueError: except ValueError:
return True return True
...@@ -352,8 +352,8 @@ def run_compiler(text_or_file: str, compiler_suite: str) -> Any: ...@@ -352,8 +352,8 @@ def run_compiler(text_or_file: str, compiler_suite: str) -> Any:
Raises: Raises:
CompilerError CompilerError
""" """
scanner, parser, ast, compiler = load_compiler_suite(compiler_suite) preprocessor, parser, ast, compiler = load_compiler_suite(compiler_suite)
return compileDSL(text_or_file, scanner(), parser(), ast(), compiler()) return compileDSL(text_or_file, preprocessor(), parser(), ast(), compiler())
def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
...@@ -364,7 +364,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -364,7 +364,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
If no ``compiler_suite`` is given it is assumed that the source If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler. skeletons for a preprocessor, AST transformation table, and compiler.
If the Python script already exists only the parser name in the If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `compile_on_disk()` need to be delimited section marker blocks.). `compile_on_disk()`
...@@ -396,7 +396,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -396,7 +396,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
if compiler_suite: if compiler_suite:
sfactory, pfactory, tfactory, cfactory = load_compiler_suite(compiler_suite) sfactory, pfactory, tfactory, cfactory = load_compiler_suite(compiler_suite)
else: else:
sfactory = get_ebnf_scanner sfactory = get_ebnf_preprocessor
pfactory = get_ebnf_grammar pfactory = get_ebnf_grammar
tfactory = get_ebnf_transformer tfactory = get_ebnf_transformer
cfactory = get_ebnf_compiler cfactory = get_ebnf_compiler
...@@ -408,7 +408,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -408,7 +408,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
elif cfactory == get_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given elif cfactory == get_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given
ebnf_compiler = cast(EBNFCompiler, compiler1) ebnf_compiler = cast(EBNFCompiler, compiler1)
global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \ global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \
AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \ AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \
DHPARSER_MAIN, DHPARSER_IMPORTS DHPARSER_MAIN, DHPARSER_IMPORTS
f = None f = None
...@@ -416,9 +416,9 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -416,9 +416,9 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f = open(rootname + 'Compiler.py', 'r', encoding="utf-8") f = open(rootname + 'Compiler.py', 'r', encoding="utf-8")
source = f.read() source = f.read()
sections = RX_SECTION_MARKER.split(source) sections = RX_SECTION_MARKER.split(source)
intro, imports, scanner, parser, ast, compiler, outro = sections intro, imports, preprocessor, parser, ast, compiler, outro = sections
except (PermissionError, FileNotFoundError, IOError) as error: except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, scanner, parser, ast, compiler, outro = '', '', '', '', '', '', '' intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error: except ValueError as error:
name = '"' + rootname + 'Compiler.py"' name = '"' + rootname + 'Compiler.py"'
raise ValueError('Could not identify all required sections in ' + name + raise ValueError('Could not identify all required sections in ' + name +
...@@ -434,8 +434,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -434,8 +434,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
outro = DHPARSER_MAIN.format(NAME=compiler_name) outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports): if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS imports = DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(scanner): if RX_WHITESPACE.fullmatch(preprocessor):
scanner = ebnf_compiler.gen_scanner_skeleton() preprocessor = ebnf_compiler.gen_preprocessor_skeleton()
if RX_WHITESPACE.fullmatch(ast): if RX_WHITESPACE.fullmatch(ast):
ast = ebnf_compiler.gen_transformer_skeleton() ast = ebnf_compiler.gen_transformer_skeleton()
if RX_WHITESPACE.fullmatch(compiler): if RX_WHITESPACE.fullmatch(compiler):
...@@ -446,8 +446,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): ...@@ -446,8 +446,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f.write(intro) f.write(intro)
f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION)) f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION))
f.write(imports) f.write(imports)
f.write(SECTION_MARKER.format(marker=SCANNER_SECTION)) f.write(SECTION_MARKER.format(marker=PREPROCESSOR_SECTION))
f.write(scanner) f.write(preprocessor)
f.write(SECTION_MARKER.format(marker=PARSER_SECTION)) f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
f.write(result) f.write(result)
f.write(SECTION_MARKER.format(marker=AST_SECTION)) f.write(SECTION_MARKER.format(marker=AST_SECTION))
......
...@@ -29,17 +29,16 @@ except ImportError: ...@@ -29,17 +29,16 @@ except ImportError:
from .typing34 import Callable, Dict, List, Set, Tuple from .typing34 import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \ from DHParser.parsers import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc PreprocessorFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, \ from DHParser.syntaxtree import Node, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \ reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \ remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc TransformationFunc
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
__all__ = ['get_ebnf_preprocessor',
__all__ = ['get_ebnf_scanner',
'get_ebnf_grammar', 'get_ebnf_grammar',
'get_ebnf_transformer', 'get_ebnf_transformer',
'get_ebnf_compiler', 'get_ebnf_compiler',
...@@ -48,7 +47,7 @@ __all__ = ['get_ebnf_scanner', ...@@ -48,7 +47,7 @@ __all__ = ['get_ebnf_scanner',
'EBNFCompilerError', 'EBNFCompilerError',
'EBNFCompiler', 'EBNFCompiler',
'grammar_changed', 'grammar_changed',
'ScannerFactoryFunc', 'PreprocessorFactoryFunc',
'ParserFactoryFunc', 'ParserFactoryFunc',
'TransformerFactoryFunc', 'TransformerFactoryFunc',
'CompilerFactoryFunc'] 'CompilerFactoryFunc']
...@@ -61,8 +60,8 @@ __all__ = ['get_ebnf_scanner', ...@@ -61,8 +60,8 @@ __all__ = ['get_ebnf_scanner',
######################################################################## ########################################################################
def get_ebnf_scanner() -> ScannerFunc: def get_ebnf_preprocessor() -> PreprocessorFunc:
return nil_scanner return nil_preprocessor
######################################################################## ########################################################################
...@@ -247,15 +246,14 @@ def get_ebnf_transformer() -> TransformationFunc: ...@@ -247,15 +246,14 @@ def get_ebnf_transformer() -> TransformationFunc:
######################################################################## ########################################################################
ScannerFactoryFunc = Callable[[], ScannerFunc] PreprocessorFactoryFunc = Callable[[], PreprocessorFunc]
ParserFactoryFunc = Callable[[], Grammar] ParserFactoryFunc = Callable[[], Grammar]
TransformerFactoryFunc = Callable[[], TransformationFunc] TransformerFactoryFunc = Callable[[], TransformationFunc]
CompilerFactoryFunc = Callable[[], Compiler] CompilerFactoryFunc = Callable[[], Compiler]
PREPROCESSOR_FACTORY = '''
SCANNER_FACTORY = ''' def get_preprocessor() -> PreprocessorFunc:
def get_scanner() -> ScannerFunc: return {NAME}Preprocessor
return {NAME}Scanner
''' '''
...@@ -335,21 +333,20 @@ class EBNFCompiler(Compiler): ...@@ -335,21 +333,20 @@ class EBNFCompiler(Compiler):
self.directives = {'whitespace': self.WHITESPACE['horizontal'], self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '', 'comment': '',
'literalws': ['right'], 'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens' 'tokens': set(), # alt. 'preprocessor_tokens'
'filter': dict(), # alt. 'filter' 'filter': dict(), # alt. 'filter'
'testing': False } 'testing': False}
@property @property
def result(self) -> str: def result(self) -> str:
return self._result return self._result
# methods for generating skeleton code for preprocessor, transformer, and compiler
# methods for generating skeleton code for scanner, transformer, and compiler def gen_preprocessor_skeleton(self) -> str:
name = self.grammar_name + "Preprocessor"
def gen_scanner_skeleton(self) -> str:
name = self.grammar_name + "Scanner"
return "def %s(text):\n return text\n" % name \ return "def %s(text):\n return text\n" % name \
+ SCANNER_FACTORY.format(NAME=self.grammar_name) + PREPROCESSOR_FACTORY.format(NAME=self.grammar_name)
def gen_transformer_skeleton(self) -> str: def gen_transformer_skeleton(self) -> str:
...@@ -515,7 +512,7 @@ class EBNFCompiler(Compiler): ...@@ -515,7 +512,7 @@ class EBNFCompiler(Compiler):
' end with a doube underscore "__".' % rule) ' end with a doube underscore "__".' % rule)
elif rule in self.directives['tokens']: elif rule in self.directives['tokens']:
node.add_error('Symbol "%s" has already been defined as ' node.add_error('Symbol "%s" has already been defined as '
'a scanner token.' % rule) 'a preprocessor token.' % rule)
elif keyword.iskeyword(rule): elif keyword.iskeyword(rule):
node.add_error('Python keyword "%s" may not be used as a symbol. ' node.add_error('Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the future.)') % rule + '(This may change in the future.)')
...@@ -595,7 +592,7 @@ class EBNFCompiler(Compiler): ...@@ -595,7 +592,7 @@ class EBNFCompiler(Compiler):
else {} if 'none' in value else value else {} if 'none' in value else value
self.directives[key] = list(ws) self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}: elif key in {'tokens', 'preprocessor_tokens'}:
self.directives['tokens'] |= self.compile(node.children[1]) self.directives['tokens'] |= self.compile(node.children[1])
elif key.endswith('_filter'): elif key.endswith('_filter'):
...@@ -687,7 +684,7 @@ class EBNFCompiler(Compiler): ...@@ -687,7 +684,7 @@ class EBNFCompiler(Compiler):
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side! def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result) symbol = str(node) # ; assert result == cast(str, node.result)
if symbol in self.directives['tokens']: if symbol in self.directives['tokens']:
return 'ScannerToken("' + symbol + '")' return 'PreprocessorToken("' + symbol + '")'
else: else:
self.current_symbols.append(node) self.current_symbols.append(node)
if symbol not in self.symbols: if symbol not in self.symbols:
......
...@@ -73,17 +73,16 @@ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Pa ...@@ -73,17 +73,16 @@ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Pa
Node, TransformationFunc Node, TransformationFunc
from DHParser.toolkit import load_if_file, error_messages from DHParser.toolkit import load_if_file, error_messages
__all__ = ['PreprocessorFunc',
__all__ = ['ScannerFunc',
'HistoryRecord', 'HistoryRecord',
'Parser', 'Parser',
'Grammar', 'Grammar',
'RX_SCANNER_TOKEN', 'RX_PREPROCESSOR_TOKEN',
'BEGIN_SCANNER_TOKEN', 'BEGIN_TOKEN',
'END_SCANNER_TOKEN', 'END_TOKEN',
'make_token', 'make_token',
'nil_scanner', 'nil_preprocessor',
'ScannerToken', 'PreprocessorToken',
'RegExp', 'RegExp',
'RE', 'RE',
'Token', 'Token',
...@@ -121,7 +120,7 @@ __all__ = ['ScannerFunc', ...@@ -121,7 +120,7 @@ __all__ = ['ScannerFunc',
######################################################################## ########################################################################
ScannerFunc = Union[Callable[[str], str], partial] PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH = 20 if platform.python_implementation() == "PyPy" \ LEFT_RECURSION_DEPTH = 20 if platform.python_implementation() == "PyPy" \
...@@ -610,66 +609,65 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str: ...@@ -610,66 +609,65 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
######################################################################## ########################################################################
RX_SCANNER_TOKEN = re.compile('\w+') RX_PREPROCESSOR_TOKEN = re.compile('\w+')
BEGIN_SCANNER_TOKEN = '\x1b' BEGIN_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c' END_TOKEN = '\x1c'
def make_token(token: str, argument: str = '') -> str: def make_token(token: str, argument: str = '') -> str:
""" """
Turns the ``token`` and ``argument`` into a special token that Turns the ``token`` and ``argument`` into a special token that
will be caught by the `ScannerToken`-parser. will be caught by the `PreprocessorToken`-parser.
This function is a support function that should be used by scanners This function is a support function that should be used by
to inject scanner tokens into the source text. preprocessors to inject preprocessor tokens into the source text.
""" """
assert RX_SCANNER_TOKEN.match(token) assert RX_PREPROCESSOR_TOKEN.match(token)
assert argument.find(BEGIN_SCANNER_TOKEN) < 0 assert argument.find(BEGIN_TOKEN) < 0
assert argument.find(END_SCANNER_TOKEN) < 0 assert argument.find(END_TOKEN) < 0
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN return BEGIN_TOKEN + token + argument + END_TOKEN
def nil_scanner(text: str) -> str: def nil_preprocessor(text: str) -> str:
return text return text
class ScannerToken(Parser): class PreprocessorToken(Parser):
""" """
Parses tokens that have been inserted by a Scanner. Parses tokens that have been inserted by a preprocessor.
Scanners can generate Tokens with the ``make_token``-function. Preprocessors can generate Tokens with the ``make_token``-function.
These tokens start and end with magic characters that can only be These tokens start and end with magic characters that can only be
matched by the ScannerToken Parser. Scanner tokens can be used to matched by the PreprocessorToken Parser. Such tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of an insert BEGIN - END delimiters at the beginning or ending of a
indented block. Otherwise indented block are difficult to handle quoted block, for example.
with parsing expression grammars.
"""