10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit db24cec1 authored by Eckhart Arnold's avatar Eckhart Arnold

- refactoring: Scanner now named Preprocessor

parent e2d7ea45
This diff is collapsed.
......@@ -29,17 +29,16 @@ except ImportError:
from .typing34 import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
from DHParser.parsers import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
PreprocessorFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc
from DHParser.versionnumber import __version__
__all__ = ['get_ebnf_scanner',
__all__ = ['get_ebnf_preprocessor',
'get_ebnf_grammar',
'get_ebnf_transformer',
'get_ebnf_compiler',
......@@ -48,7 +47,7 @@ __all__ = ['get_ebnf_scanner',
'EBNFCompilerError',
'EBNFCompiler',
'grammar_changed',
'ScannerFactoryFunc',
'PreprocessorFactoryFunc',
'ParserFactoryFunc',
'TransformerFactoryFunc',
'CompilerFactoryFunc']
......@@ -61,8 +60,8 @@ __all__ = ['get_ebnf_scanner',
########################################################################
def get_ebnf_scanner() -> ScannerFunc:
return nil_scanner
def get_ebnf_preprocessor() -> PreprocessorFunc:
return nil_preprocessor
########################################################################
......@@ -247,15 +246,14 @@ def get_ebnf_transformer() -> TransformationFunc:
########################################################################
ScannerFactoryFunc = Callable[[], ScannerFunc]
PreprocessorFactoryFunc = Callable[[], PreprocessorFunc]
ParserFactoryFunc = Callable[[], Grammar]
TransformerFactoryFunc = Callable[[], TransformationFunc]
CompilerFactoryFunc = Callable[[], Compiler]
SCANNER_FACTORY = '''
def get_scanner() -> ScannerFunc:
return {NAME}Scanner
PREPROCESSOR_FACTORY = '''
def get_preprocessor() -> PreprocessorFunc:
return {NAME}Preprocessor
'''
......@@ -335,21 +333,20 @@ class EBNFCompiler(Compiler):
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'filter': dict(), # alt. 'filter'
'testing': False }
'tokens': set(), # alt. 'preprocessor_tokens'
'filter': dict(), # alt. 'filter'
'testing': False}
@property
def result(self) -> str:
return self._result
# methods for generating skeleton code for preprocessor, transformer, and compiler
# methods for generating skeleton code for scanner, transformer, and compiler
def gen_scanner_skeleton(self) -> str:
name = self.grammar_name + "Scanner"
def gen_preprocessor_skeleton(self) -> str:
name = self.grammar_name + "Preprocessor"
return "def %s(text):\n return text\n" % name \
+ SCANNER_FACTORY.format(NAME=self.grammar_name)
+ PREPROCESSOR_FACTORY.format(NAME=self.grammar_name)
def gen_transformer_skeleton(self) -> str:
......@@ -515,7 +512,7 @@ class EBNFCompiler(Compiler):
' end with a doube underscore "__".' % rule)
elif rule in self.directives['tokens']:
node.add_error('Symbol "%s" has already been defined as '
'a scanner token.' % rule)
'a preprocessor token.' % rule)
elif keyword.iskeyword(rule):
node.add_error('Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the future.)')
......@@ -595,7 +592,7 @@ class EBNFCompiler(Compiler):
else {} if 'none' in value else value
self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}:
elif key in {'tokens', 'preprocessor_tokens'}:
self.directives['tokens'] |= self.compile(node.children[1])
elif key.endswith('_filter'):
......@@ -687,7 +684,7 @@ class EBNFCompiler(Compiler):
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result)
if symbol in self.directives['tokens']:
return 'ScannerToken("' + symbol + '")'
return 'PreprocessorToken("' + symbol + '")'
else:
self.current_symbols.append(node)
if symbol not in self.symbols:
......
......@@ -73,17 +73,16 @@ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Pa
Node, TransformationFunc
from DHParser.toolkit import load_if_file, error_messages
__all__ = ['ScannerFunc',
__all__ = ['PreprocessorFunc',
'HistoryRecord',
'Parser',
'Grammar',
'RX_SCANNER_TOKEN',
'BEGIN_SCANNER_TOKEN',
'END_SCANNER_TOKEN',
'RX_PREPROCESSOR_TOKEN',
'BEGIN_TOKEN',
'END_TOKEN',
'make_token',
'nil_scanner',
'ScannerToken',
'nil_preprocessor',
'PreprocessorToken',
'RegExp',
'RE',
'Token',
......@@ -121,7 +120,7 @@ __all__ = ['ScannerFunc',
########################################################################
ScannerFunc = Union[Callable[[str], str], partial]
PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH = 20 if platform.python_implementation() == "PyPy" \
......@@ -610,66 +609,65 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
RX_SCANNER_TOKEN = re.compile('\w+')
BEGIN_SCANNER_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c'
RX_PREPROCESSOR_TOKEN = re.compile('\w+')
BEGIN_TOKEN = '\x1b'
END_TOKEN = '\x1c'
def make_token(token: str, argument: str = '') -> str:
"""
Turns the ``token`` and ``argument`` into a special token that
will be caught by the `ScannerToken`-parser.
will be caught by the `PreprocessorToken`-parser.
This function is a support function that should be used by scanners
to inject scanner tokens into the source text.
This function is a support function that should be used by
preprocessors to inject preprocessor tokens into the source text.
"""
assert RX_SCANNER_TOKEN.match(token)
assert argument.find(BEGIN_SCANNER_TOKEN) < 0
assert argument.find(END_SCANNER_TOKEN) < 0
assert RX_PREPROCESSOR_TOKEN.match(token)
assert argument.find(BEGIN_TOKEN) < 0
assert argument.find(END_TOKEN) < 0
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN
return BEGIN_TOKEN + token + argument + END_TOKEN
def nil_scanner(text: str) -> str:
def nil_preprocessor(text: str) -> str:
return text
class ScannerToken(Parser):
class PreprocessorToken(Parser):
"""
Parses tokens that have been inserted by a Scanner.
Parses tokens that have been inserted by a preprocessor.
Scanners can generate Tokens with the ``make_token``-function.
Preprocessors can generate Tokens with the ``make_token``-function.
These tokens start and end with magic characters that can only be
matched by the ScannerToken Parser. Scanner tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of an
indented block. Otherwise indented block are difficult to handle
with parsing expression grammars.
matched by the PreprocessorToken Parser. Such tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of a
quoted block, for example.
"""
def __init__(self, scanner_token: str) -> None:
assert scanner_token and scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token)
def __init__(self, token: str) -> None:
assert token and token.isupper()
assert RX_PREPROCESSOR_TOKEN.match(token)
super(PreprocessorToken, self).__init__(token)
def __call__(self, text: str) -> Tuple[Node, str]:
if text[0:1] == BEGIN_SCANNER_TOKEN:
end = text.find(END_SCANNER_TOKEN, 1)
if text[0:1] == BEGIN_TOKEN:
end = text.find(END_TOKEN, 1)
if end < 0:
node = Node(self, '').add_error(
'END_SCANNER_TOKEN delimiter missing from scanner token. '
'(Most likely due to a scanner bug!)') # type: Node
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)') # type: Node
return node, text[1:]
elif end == 0:
node = Node(self, '').add_error(
'Scanner token cannot have zero length. '
'(Most likely due to a scanner bug!)')
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)')
return node, text[2:]
elif text.find(BEGIN_SCANNER_TOKEN, 1, end) >= 0:
elif text.find(BEGIN_TOKEN, 1, end) >= 0:
node = Node(self, text[len(self.name) + 1:end])
node.add_error(
'Scanner tokens must not be nested or contain '
'BEGIN_SCANNER_TOKEN delimiter as part of their argument. '
'(Most likely due to a scanner bug!)')
'Preprocessor-tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a preprocessor bug!)')
return node, text[end:]
if text[1:len(self.name) + 1] == self.name:
return Node(self, text[len(self.name) + 1:end]), \
......@@ -700,7 +698,7 @@ class RegExp(Parser):
return RegExp(regexp, self.name)
def __call__(self, text: str) -> Tuple[Node, str]:
match = text[0:1] != BEGIN_SCANNER_TOKEN and self.regexp.match(text) # ESC starts a scanner token.
match = text[0:1] != BEGIN_TOKEN and self.regexp.match(text) # ESC starts a preprocessor token.
if match:
end = match.end()
return Node(self, text[:end]), text[end:]
......@@ -1400,7 +1398,7 @@ class Compiler:
def compile_source(source: str,
scanner: ScannerFunc, # str -> str
preprocessor: PreprocessorFunc, # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler): # Node (AST) -> Any
......@@ -1416,8 +1414,8 @@ def compile_source(source: str,
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
scanner (function): text -> text. A scanner function or None,
if no scanner is needed.
preprocessor (function): text -> text. A preprocessor function
or None, if no preprocessor is needed.
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
......@@ -1435,8 +1433,8 @@ def compile_source(source: str,
"""
source_text = load_if_file(source)
log_file_name = logfile_basename(source, compiler)
if scanner is not None:
source_text = scanner(source_text)
if preprocessor is not None:
source_text = preprocessor(source_text)
syntax_tree = parser(source_text)
if is_logging():
syntax_tree.log(log_file_name + '.cst')
......
......@@ -98,6 +98,7 @@ class ParserBase:
def repr(self) -> str:
return self.name if self.name else repr(self)
class MockParser(ParserBase):
"""
MockParser objects can be used to reconstruct syntax trees from a
......@@ -583,7 +584,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback-functions registered
in the ``calltable``-dictionary.
The most important use case is the transformation of a concrete
syntax tree into an abstract tree (AST). But it is also imaginable
to employ tree-traversal for the semantic analysis of the AST.
......@@ -598,16 +599,16 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
'~': always called (after any other processing function)
Args:
root_node (Node): The root-node of the syntax tree to be traversed
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): node key -> sequence of functions that
will be applied to matching nodes in order. This dictionary
is interpreted as a ``compact_table``. See
is interpreted as a ``compact_table``. See
``toolkit.expand_table`` or ``EBNFCompiler.EBNFTransTable``
key_func (function): A mapping key_func(node) -> keystr. The default
key_func yields node.parser.name.
Example:
table = { "term": [replace_by_single_child, flatten],
table = { "term": [replace_by_single_child, flatten],
"factor, flowmarker, retrieveop": replace_by_single_child }
traverse(node, table)
"""
......@@ -656,19 +657,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# ------------------------------------------------
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
def replace_by_single_child(node):
"""Remove single branch node, replacing it by its immediate descendant.
(In case the descendant's name is empty (i.e. anonymous) the
......@@ -691,6 +679,19 @@ def reduce_single_child(node):
node.result = node.result[0].result
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
@transformation_factory(Callable)
def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
"""Flattens all children, that fulfil the given `condition`
......
......@@ -30,6 +30,7 @@ the directory exists and raises an error if a file with the same name
already exists.
"""
import codecs
import collections
import contextlib
import hashlib
......@@ -38,6 +39,7 @@ try:
import regex as re
except ImportError:
import re
import sys
try:
from typing import Any, List, Tuple
except ImportError:
......@@ -389,3 +391,13 @@ def compile_python_object(python_src, catch_obj_regex=""):
return namespace[matches[0]] if matches else None
else:
return namespace
try:
if sys.stdout.encoding.upper() != "UTF-8":
# make sure that `print()` does not raise an error on
# non-ASCII characters:
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
except AttributeError:
# somebody has already taken care of this !?
pass
This diff is collapsed.
......@@ -18,7 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
# TODO: This is still a stub...
# TODO: This is still a stub...
import os
import sys
......@@ -26,7 +26,7 @@ from functools import partial
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.parsers import compile_source, nil_scanner
from DHParser.parsers import compile_source, nil_preprocessor
from DHParser.toolkit import logging
......@@ -53,7 +53,7 @@ def selftest(file_name):
# compile the grammar again using the result of the previous
# compilation as parser
for i in range(1):
result = compileDSL(grammar, nil_scanner, result, transformer, compiler)
result = compileDSL(grammar, nil_preprocessor, result, transformer, compiler)
print(result)
return result
......
......@@ -193,7 +193,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Optional', 'mixin_comment',
'ZeroOrMore', 'OneOrMore', 'Sequence', 'Alternative', 'Forward',
'NegativeLookahead', 'PositiveLookahead', 'ScannerToken', 'Grammar'}}
'NegativeLookahead', 'PositiveLookahead', 'PreprocessorToken', 'Grammar'}}
exec(code, name_space)
parser = name_space['Grammar']()
......
......@@ -19,7 +19,7 @@ from DHParser.toolkit import logging, is_filename
from DHParser.parsers import Grammar, Compiler, Alternative, Pop, Required, Token, Synonym, \
Optional, OneOrMore, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
ScannerFunc
PreprocessorFunc
from DHParser.syntaxtree import traverse, remove_brackets, reduce_single_child, replace_by_single_child, \
remove_expendables, flatten, join, \
collapse, replace_content, TransformationFunc, \
......@@ -35,7 +35,8 @@ from DHParser.syntaxtree import traverse, remove_brackets, reduce_single_child,
def LaTeXScanner(text):
return text
def get_scanner() -> ScannerFunc:
def get_scanner() -> PreprocessorFunc:
return LaTeXScanner
......
......@@ -7,9 +7,9 @@
#######################################################################
from functools import partial
import os
import sys
from functools import partial
sys.path.append('../../')
......@@ -17,12 +17,12 @@ try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
ScannerFunc, Synonym
PreprocessorFunc, Synonym
from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
......@@ -39,7 +39,8 @@ from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, \
def LyrikScanner(text):
return text
def get_scanner() -> ScannerFunc:
def get_scanner() -> PreprocessorFunc:
return LyrikScanner
......
......@@ -31,7 +31,7 @@ from multiprocessing import Pool
sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging, compile_python_object
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_PTYPE, nil_scanner
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_PTYPE, nil_preprocessor
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransformer, get_ebnf_compiler
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, parser_factory
......@@ -223,7 +223,7 @@ class TestSelfHosting:
assert not errors, str(errors)
# compile the grammar again using the result of the previous
# compilation as parser
compileDSL(self.grammar, nil_scanner, result, get_ebnf_transformer(), compiler)
compileDSL(self.grammar, nil_preprocessor, result, get_ebnf_transformer(), compiler)
def multiprocessing_task(self):
compiler_name = "EBNF"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment