In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit c11ae730 authored by Eckhart Arnold's avatar Eckhart Arnold

- syntaxtree.py: transformation_factory decorator for processing table...

- syntaxtree.py: transformation_factory decorator for processing table functions; further type annotations
parent 8e8792ad
......@@ -31,7 +31,7 @@ from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \
ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object
from DHParser.parsers import Grammar, Compiler, compile_source, nil_scanner, ScannerFunc
from DHParser.syntaxtree import Node, TransformerFunc
from DHParser.syntaxtree import Node, TransformationFunc
__all__ = ['GrammarError',
......@@ -79,8 +79,8 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformerFunc
no_transformation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc
'''
......@@ -171,7 +171,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def compileDSL(text_or_file: str,
scanner: ScannerFunc,
dsl_grammar: Union[str, Grammar],
ast_transformation: TransformerFunc,
ast_transformation: TransformationFunc,
compiler: Compiler) -> Any:
"""Compiles a text in a domain specific language (DSL) with an
EBNF-specified grammar. Returns the compiled text or raises a
......
......@@ -17,7 +17,7 @@ permissions and limitations under the License.
"""
import keyword
from functools import partial
try:
import regex as re
except ImportError:
......@@ -30,7 +30,7 @@ from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, N
ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformerFunc
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc
from DHParser.versionnumber import __version__
......@@ -197,10 +197,9 @@ EBNF_transformation_table = {
"syntax":
remove_expendables,
"directive, definition":
partial(remove_tokens, tokens={'@', '='}),
remove_tokens('@', '='),
"expression":
[replace_by_single_child, flatten,
partial(remove_tokens, tokens={'|'})],
[replace_by_single_child, flatten, remove_tokens('|')],
"term":
[replace_by_single_child, flatten], # supports both idioms: "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
......@@ -214,17 +213,17 @@ EBNF_transformation_table = {
(TOKEN_PTYPE, WHITESPACE_PTYPE):
[remove_expendables, reduce_single_child],
"list_":
[flatten, partial(remove_tokens, tokens={','})],
[flatten, remove_tokens(',')],
"*":
[remove_expendables, replace_by_single_child]
}
EBNF_validation_table = {
# Semantic validation on the AST
# Semantic validation on the AST. EXPERIMENTAL!
"repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
partial(assert_content, regex=r'(?!§)')],
[forbid('repetition', 'option', 'oneormore'),
assert_content(r'(?!§)')]
}
......@@ -234,7 +233,7 @@ def EBNFTransformer(syntax_tree: Node):
traverse(syntax_tree, processing_table, key_func)
def get_ebnf_transformer() -> TransformerFunc:
def get_ebnf_transformer() -> TransformationFunc:
return EBNFTransformer
......@@ -247,7 +246,7 @@ def get_ebnf_transformer() -> TransformerFunc:
ScannerFactoryFunc = Callable[[], ScannerFunc]
ParserFactoryFunc = Callable[[], Grammar]
TransformerFactoryFunc = Callable[[], TransformerFunc]
TransformerFactoryFunc = Callable[[], TransformationFunc]
CompilerFactoryFunc = Callable[[], Compiler]
......@@ -270,7 +269,7 @@ def get_grammar() -> {NAME}Grammar:
TRANSFORMER_FACTORY = '''
def get_transformer() -> TransformerFunc:
def get_transformer() -> TransformationFunc:
return {NAME}Transform
'''
......@@ -349,8 +348,8 @@ class EBNFCompiler(Compiler):
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_operation,')
transtable += [' "*": no_operation', '}', '', tf_name +
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "*": no_transformation', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......
......@@ -50,8 +50,9 @@ https://bitbucket.org/apalala/grako
import copy
from functools import partial
import os
from functools import partial
try:
import regex as re
except ImportError:
......@@ -60,7 +61,7 @@ from typing import Any, Callable, Dict, Iterator, List, Set, Tuple, Union
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node, \
TransformerFunc
TransformationFunc
from DHParser.toolkit import load_if_file, error_messages
__all__ = ['ScannerFunc',
......@@ -224,8 +225,8 @@ class ParserMetaClass(type):
class Parser(metaclass=ParserMetaClass):
ApplyFunc = Callable[['Parser'], None]
def __init__(self, name=''):
assert isinstance(name, str), str(name)
def __init__(self, name: str = '') -> None:
# assert isinstance(name, str), str(name)
self.name = name # type: str
self._grammar = None # type: 'Grammar'
self.reset()
......@@ -367,7 +368,7 @@ class Grammar:
self.all_parsers.add(parser)
parser.grammar = self
def __call__(self, document, start_parser="root__"):
def __call__(self, document: str, start_parser="root__"):
"""Parses a document with with parser-combinators.
Args:
......@@ -378,7 +379,7 @@ class Grammar:
Returns:
Node: The root node ot the parse tree.
"""
assert isinstance(document, str), type(document)
# assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self.dirty_flag:
......@@ -390,7 +391,7 @@ class Grammar:
self.history_tracking = is_logging()
self.document = document
parser = self[start_parser]
stitches = []
stitches = [] # type: List[Node]
rest = document
if not rest:
result, ignore = parser(rest)
......@@ -495,7 +496,7 @@ BEGIN_SCANNER_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c'
def make_token(token, argument='') -> str:
def make_token(token: str, argument: str = '') -> str:
"""Turns the ``token`` and ``argument`` into a special token that
will be caught by the `ScannerToken`-parser.
......@@ -509,7 +510,7 @@ def make_token(token, argument='') -> str:
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN
def nil_scanner(text) -> str:
def nil_scanner(text: str) -> str:
return text
......@@ -524,9 +525,9 @@ class ScannerToken(Parser):
indented block. Otherwise indented block are difficult to handle
with parsing expression grammars.
"""
def __init__(self, scanner_token):
assert isinstance(scanner_token, str) and scanner_token and \
scanner_token.isupper()
def __init__(self, scanner_token: str) -> None:
assert scanner_token and scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token)
......@@ -565,7 +566,8 @@ class RegExp(Parser):
other parsers delegate part of the parsing job to other parsers,
but do not match text directly.
"""
def __init__(self, regexp, name=''):
def __init__(self, regexp, name: str = '') -> None:
super(RegExp, self).__init__(name)
self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
......@@ -673,7 +675,7 @@ class RE(Parser):
class Token(RE):
assert TOKEN_PTYPE == ":Token"
def __init__(self, token, wL=None, wR=None, name=''):
def __init__(self, token: str, wL=None, wR=None, name: str = '') -> None:
self.token = token
super(Token, self).__init__(escape_re(token), wL, wR, name)
......@@ -681,7 +683,7 @@ class Token(RE):
return self.__class__(self.token, self.wL, self.wR, self.name)
def mixin_comment(whitespace, comment):
def mixin_comment(whitespace: str, comment: str) -> str:
"""Returns a regular expression that merges comment and whitespace
regexps. Thus comments cann occur whereever whitespace is allowed
and will be skipped just as implicit whitespace.
......@@ -702,9 +704,9 @@ def mixin_comment(whitespace, comment):
class UnaryOperator(Parser):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(UnaryOperator, self).__init__(name)
assert isinstance(parser, Parser)
# assert isinstance(parser, Parser)
self.parser = parser # type: Parser
def __deepcopy__(self, memo):
......@@ -717,10 +719,10 @@ class UnaryOperator(Parser):
class NaryOperator(Parser):
def __init__(self, *parsers, name=''):
def __init__(self, *parsers: Parser, name: str = '') -> None:
super(NaryOperator, self).__init__(name)
assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers # type: List[Parser]
# assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers # type: Collection ## [Parser]
def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo)
......@@ -733,9 +735,9 @@ class NaryOperator(Parser):
class Optional(UnaryOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(Optional, self).__init__(parser, name)
assert isinstance(parser, Parser)
# assert isinstance(parser, Parser)
assert not isinstance(parser, Optional), \
"Nesting options would be redundant: %s(%s)" % \
(str(name), str(parser.name))
......@@ -766,7 +768,7 @@ class ZeroOrMore(Optional):
class OneOrMore(UnaryOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(OneOrMore, self).__init__(parser, name)
assert not isinstance(parser, Optional), \
"Use ZeroOrMore instead of nesting OneOrMore and Optional: " \
......@@ -790,7 +792,7 @@ class OneOrMore(UnaryOperator):
class Sequence(NaryOperator):
def __init__(self, *parsers, name=''):
def __init__(self, *parsers: Parser, name: str = '') -> None:
super(Sequence, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1
......@@ -808,10 +810,10 @@ class Sequence(NaryOperator):
assert len(results) <= len(self.parsers)
return Node(self, results), text_
def __add__(self, other):
def __add__(self, other: 'Sequence') -> 'Sequence':
return Sequence(*self.parsers, other)
def __radd__(self, other):
def __radd__(self, other: 'Sequence') -> 'Sequence':
return Sequence(other, *self.parsers)
# def __iadd__(self, other):
......@@ -835,7 +837,8 @@ class Alternative(NaryOperator):
# the most selective expression should be put first:
"""
def __init__(self, *parsers, name=''):
def __init__(self, *parsers: Parser, name: str = '') -> None:
super(Alternative, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1
assert all(not isinstance(p, Optional) for p in self.parsers)
......@@ -869,7 +872,7 @@ class Alternative(NaryOperator):
class FlowOperator(UnaryOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(FlowOperator, self).__init__(parser, name)
......@@ -889,7 +892,7 @@ class Required(FlowOperator):
class Lookahead(FlowOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(Lookahead, self).__init__(parser, name)
def __call__(self, text: str) -> Tuple[Node, str]:
......@@ -921,7 +924,7 @@ def iter_right_branch(node) -> Iterator[Node]:
class Lookbehind(FlowOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(Lookbehind, self).__init__(parser, name)
print("WARNING: Lookbehind Operator is experimental!")
......@@ -961,7 +964,7 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator):
def __init__(self, parser, name=''):
def __init__(self, parser: Parser, name: str = '') -> None:
super(Capture, self).__init__(parser, name)
def __call__(self, text: str) -> Tuple[Node, str]:
......@@ -987,8 +990,11 @@ def accumulating_filter(stack):
return "".join(stack)
RetrFilter = Callable[[List[str]], str]
class Retrieve(Parser):
def __init__(self, symbol, retrieve_filter=None, name=''):
def __init__(self, symbol: Parser, retrieve_filter: RetrFilter = None, name: str = '') -> None:
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
......@@ -1135,7 +1141,7 @@ class Compiler:
def compile_source(source: str,
scanner: ScannerFunc, # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformerFunc, # Node -> Node (abstract syntax tree (AST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler): # Node (AST) -> Any
"""Compiles a source in four stages:
1. Scanning (if needed)
......
......@@ -18,14 +18,16 @@ permissions and limitations under the License.
"""
import copy
import inspect
import itertools
import os
from functools import partial
from functools import partial, singledispatch
try:
import regex as re
except ImportError:
import re
from typing import Any, Callable, cast, Iterator, NamedTuple, Union, Tuple, List
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Iterator, List, \
NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import log_dir, expand_table, line_col, smart_list
......@@ -35,11 +37,11 @@ __all__ = ['WHITESPACE_PTYPE',
'ZOMBIE_PARSER',
'Error',
'Node',
'TransformerFunc',
'TransformationFunc',
'key_parser_name',
'key_tag_name',
'traverse',
'no_operation',
'no_transformation',
'replace_by_single_child',
'reduce_single_child',
'replace_parser',
......@@ -445,7 +447,74 @@ class Node:
########################################################################
TransformerFunc = Union[Callable[[Node], Any], partial]
TransformationFunc = Union[Callable[[Node], Any], partial]
def transformation_factory(t=None):
"""Creates factory functions transformer-functions with more than
one parameter like ``remove_tokens(node, tokens)``. Decorating this
function with ``transformation_factory`` creates a function factory with
the same name, but without the ``node`` paramter, e.g.
``remove_tokens(tokens)`` which returns a transformerfunction with
only one parameter (i.e. ``node``), which can be used in processing
dictionaries, thus avoiding explicit lamba- or partial-functions
in the table.
Additionally it converts a list of parameters into a
collection, if the decorated function has exaclty two arguments and
the second argument is of type Collection.
Main benefit is reability of processing tables.
Example:
trans_table = { 'expression': remove_tokens('+', '-') }
rather than:
trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) }
Usage:
@transformation_factory(AbtractSet[str])
def remove_tokens(node, tokens):
...
or, alternatively:
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str]):
...
"""
def decorator(f):
sig = inspect.signature(f)
params = list(sig.parameters.values())[1:]
if len(params) == 0:
return f # '@transformer' not needed w/o free parameters
assert t or params[0].annotation != params[0].empty, \
"No type information on second parameter found! Please, use type " \
"annotation or provide the type information via transfomer-decorator."
p1type = t or params[0].annotation
f = singledispatch(f)
if len(params) == 1 and issubclass(p1type, Container) and not issubclass(p1type, Text) \
and not issubclass(p1type, ByteString):
def gen_special(*args):
c = set(args) if issubclass(p1type, AbstractSet) else \
list(args) if issubclass(p1type, Sequence) else args
d = {params[0].name: c}
return partial(f, **d)
f.register(p1type.__args__[0], gen_special)
def gen_partial(*args, **kwargs):
d = {p.name: arg for p, arg in zip(params, args)}
d.update(kwargs)
return partial(f, **d)
f.register(p1type, gen_partial)
return f
if isinstance(t, type(lambda: 1)):
# assume transformation_factory has been used as decorator w/o parameters
func = t;
t = None
return decorator(func)
else:
return decorator
WHITESPACE_PTYPE = ':Whitespace'
......@@ -518,15 +587,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
traverse_recursive(root_node)
# Note on processing functions: If processing functions receive more
# than one parameter, the ``node``-parameter should always be the
# last parameter to ease partial function application, e.g.:
# def replace_parser(name, node):
# ...
# processing_func = partial(replace_parser, "special")
def no_operation(node):
def no_transformation(node):
pass
......@@ -563,7 +624,8 @@ def reduce_single_child(node):
node.result = node.result[0].result
def replace_parser(node, name):
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
......@@ -632,10 +694,11 @@ def is_expendable(node):
return is_empty(node) or is_whitespace(node)
def is_token(node, tokens):
def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
@transformation_factory(Callable) # @singledispatch
def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function
``condition(child_node)`` evaluates to ``True``."""
......@@ -644,11 +707,14 @@ def remove_children_if(node, condition):
remove_whitespace = partial(remove_children_if, condition=is_whitespace)
# remove_scanner_tokens = partial(remove_children_if, condition=is_scanner_token)
remove_expendables = partial(remove_children_if, condition=is_expendable)
def remove_tokens(node, tokens):
# remove_scanner_tokens = partial(remove_children_if, condition=is_scanner_token)
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed.
......@@ -680,22 +746,26 @@ def map_content(node, func):
########################################################################
def require(node, child_tags):
@transformation_factory
def require(node, child_tags: AbstractSet[str]):
for child in node.children:
if child.tag_name not in child_tags:
node.add_error('Element "%s" is not allowed inside "%s".' %
(child.parser.name, node.parser.name))
def forbid(node, child_tags):
@transformation_factory
def forbid(node, child_tags: AbstractSet[str]):
for child in node.children:
if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
def assert_content(node, regex):
@transformation_factory
def assert_content(node, regex: str):
content = str(node)
if not re.match(regex, content):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regex), content))
......@@ -582,7 +582,7 @@ def remove_brackets(node):
AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'no_operation', 'remove_children_if',
'no_transformation', 'remove_children_if',
'is_whitespace', 'is_expendable', 'remove_whitespace',
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables', 'flatten', 'remove_tokens',
......@@ -1608,8 +1608,8 @@ class EBNFCompiler(CompilerBase):
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_operation,')
transtable += [' "": no_operation', '}', '']
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "": no_transformation', '}', '']
return '\n'.join(transtable)
def gen_compiler_skeleton(self):
......
......@@ -16,7 +16,7 @@ except ImportError:
import re
from DHParser.parsers import Grammar, Compiler, Alternative, Required, Token, \
Optional, OneOrMore, Sequence, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_operation, \
from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_transformation, \
remove_expendables, remove_tokens, flatten, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
......@@ -220,10 +220,10 @@ def join_strings(node, delimiter='\n'):
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"Artikel": no_operation,
"Artikel": no_transformation,
"LemmaPosition":
[partial(remove_tokens, tokens={'LEMMA'})],
"Lemma": no_operation,
"Lemma": no_transformation,
"_tll, _wortart, _genus":
[remove_expendables, reduce_single_child],
"LemmaVarianten":
......@@ -245,32 +245,32 @@ MLW_AST_transformation_table = {
[remove_expendables, reduce_single_child],
"Zusatz":
[remove_expendables, remove_tokens, reduce_single_child],
"ArtikelKopf": no_operation,
"ArtikelKopf": no_transformation,
"SchreibweisenPosition":
[partial(remove_tokens, tokens={'SCHREIBWEISE', ':'}),
flatten, partial(remove_tokens, tokens={','})],
"SWTyp": no_operation,
"SWTyp": no_transformation,
"BedeutungsPosition":
[flatten, partial(remove_tokens, tokens={'BEDEUTUNG'})],
"Bedeutung": no_operation,
"Bedeutungskategorie": no_operation,
"Interpretamente": no_operation,
"Bedeutung": no_transformation,
"Bedeutungskategorie": no_transformation,
"Interpretamente": no_transformation,
"LateinischeBedeutung, DeutscheBedeutung":
[remove_expendables, remove_tokens, reduce_single_child],
"Belege":
[flatten, remove_tokens],
"EinBeleg":
[flatten, remove_expendables, join_strings, reduce_single_child],
"Beleg": no_operation,
"VerweisZiel": no_operation,
"Beleg": no_transformation,
"VerweisZiel": no_transformation,
"Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test,
[remove_expendables, reduce_single_child],
"LEER": no_operation,
"DATEI_ENDE": no_operation,
"NIEMALS": no_operation,
"LEER": no_transformation,
"DATEI_ENDE": no_transformation,
"NIEMALS": no_transformation,
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
[remove_expendables, reduce_single_child],
"*":
......
......@@ -7,23 +7,19 @@
#######################################################################