Commit 60c71076 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- additional AST transformations

parent db0a8568
...@@ -80,11 +80,12 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\ ...@@ -80,11 +80,12 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, ScannerFunc last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\ reduce_single_child, reduce_children, replace_by_single_child, remove_whitespace, \\
no_transformation, remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\ is_empty, is_expendable, collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
TransformationFunc TransformationFunc, remove_children, remove_content, remove_first, remove_last, \\
has_name, has_content
''' '''
......
...@@ -32,7 +32,7 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name ...@@ -32,7 +32,7 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \ from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \ from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \ replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
...@@ -212,9 +212,9 @@ EBNF_transformation_table = { ...@@ -212,9 +212,9 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop": "factor, flowmarker, retrieveop":
replace_by_single_child, replace_by_single_child,
"group": "group":
[remove_enclosing_delimiters, replace_by_single_child], [remove_tokens('(', ')'), replace_by_single_child],
"oneormore, repetition, option, regexchain": "oneormore, repetition, option":
[reduce_single_child, remove_enclosing_delimiters], [reduce_single_child, remove_first, remove_last],
"symbol, literal, regexp": "symbol, literal, regexp":
reduce_single_child, reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE): (TOKEN_PTYPE, WHITESPACE_PTYPE):
...@@ -358,7 +358,8 @@ class EBNFCompiler(Compiler): ...@@ -358,7 +358,8 @@ class EBNFCompiler(Compiler):
self.grammar_name + '-grammar'] self.grammar_name + '-grammar']
transtable.append(' "+": remove_empty,') transtable.append(' "+": remove_empty,')
for name in self.rules: for name in self.rules:
transtable.append(' "' + name + '": no_transformation,') transtable.append(' "' + name + '": [],')
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '', tf_name + transtable += [' "*": replace_by_single_child', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, ''] ' = partial(traverse, processing_table=%s)' % tt_name, '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
......
...@@ -17,10 +17,8 @@ implied. See the License for the specific language governing ...@@ -17,10 +17,8 @@ implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
""" """
import abc
import copy import copy
import inspect import inspect
import itertools
import os import os
from functools import partial, singledispatch from functools import partial, singledispatch
try: try:
...@@ -48,21 +46,26 @@ __all__ = ['WHITESPACE_PTYPE', ...@@ -48,21 +46,26 @@ __all__ = ['WHITESPACE_PTYPE',
'key_parser_name', 'key_parser_name',
'key_tag_name', 'key_tag_name',
'traverse', 'traverse',
'no_transformation',
'replace_by_single_child', 'replace_by_single_child',
'reduce_single_child', 'reduce_single_child',
'reduce_children',
'replace_parser', 'replace_parser',
'is_whitespace', 'is_whitespace',
'is_empty', 'is_empty',
'is_expendable', 'is_expendable',
'is_token', 'is_token',
'has_name',
'has_content',
'remove_children_if', 'remove_children_if',
'remove_children',
'remove_content',
'remove_first',
'remove_last',
'remove_whitespace', 'remove_whitespace',
'remove_empty', 'remove_empty',
'remove_expendables', 'remove_expendables',
'remove_tokens', 'remove_tokens',
'flatten', 'flatten',
'remove_enclosing_delimiters',
'forbid', 'forbid',
'require', 'require',
'assert_content'] 'assert_content']
...@@ -636,10 +639,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None: ...@@ -636,10 +639,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
traverse_recursive(root_node) traverse_recursive(root_node)
def no_transformation(node):
pass
# ------------------------------------------------ # ------------------------------------------------
# #
# rearranging transformations: # rearranging transformations:
...@@ -651,6 +650,19 @@ def no_transformation(node): ...@@ -651,6 +650,19 @@ def no_transformation(node):
# ------------------------------------------------ # ------------------------------------------------
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
def replace_by_single_child(node): def replace_by_single_child(node):
"""Remove single branch node, replacing it by its immediate descendant. """Remove single branch node, replacing it by its immediate descendant.
(In case the descendant's name is empty (i.e. anonymous) the (In case the descendant's name is empty (i.e. anonymous) the
...@@ -673,17 +685,21 @@ def reduce_single_child(node): ...@@ -673,17 +685,21 @@ def reduce_single_child(node):
node.result = node.result[0].result node.result = node.result[0].result
@transformation_factory @transformation_factory(Callable)
def replace_parser(node, name: str): def reduce_children(node, condition=lambda node: not node.name):
"""Replaces the parser of a Node with a mock parser with the given """Replaces those children of node that have children themselves
name. ans fulfil the given condition (default unnamed nodes).
In contrast to ``flatten`` (see below) this transformation does not
Parameters: operate recursively.
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
""" """
name, ptype = (name.split(':') + [''])[:2] if node.children:
node.parser = MockParser(name, ptype) new_result = []
for child in node.children:
if child.children and condition(child):
new_result.extend(child.children)
else:
new_result.append(child)
node.result = tuple(new_result)
def flatten(node): def flatten(node):
...@@ -703,9 +719,8 @@ def flatten(node): ...@@ -703,9 +719,8 @@ def flatten(node):
new_result = [] new_result = []
for child in node.children: for child in node.children:
if not child.parser.name and child.children: if not child.parser.name and child.children:
assert child.children, node.as_sexpr()
flatten(child) flatten(child)
new_result.extend(child.result) new_result.extend(child.children)
else: else:
new_result.append(child) new_result.append(child)
node.result = tuple(new_result) node.result = tuple(new_result)
...@@ -734,19 +749,24 @@ def is_whitespace(node): ...@@ -734,19 +749,24 @@ def is_whitespace(node):
``@comment``-directive.""" ``@comment``-directive."""
return node.parser.ptype == WHITESPACE_PTYPE return node.parser.ptype == WHITESPACE_PTYPE
def is_empty(node): def is_empty(node):
return not node.result return not node.result
def is_expendable(node): def is_expendable(node):
return is_empty(node) or is_whitespace(node) return is_empty(node) or is_whitespace(node)
def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool: def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens) return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
def has_name(node, tag_names: AbstractSet[str]) -> bool:
return node.tag_name in tag_names
def has_content(node, contents: AbstractSet[str]) -> bool:
return str(node) in contents
@transformation_factory(Callable) # @singledispatch @transformation_factory(Callable) # @singledispatch
def remove_children_if(node, condition): def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function """Removes all nodes from the result field if the function
...@@ -760,22 +780,42 @@ remove_empty = remove_children_if(is_empty) ...@@ -760,22 +780,42 @@ remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable) remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
@transformation_factory @transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()): def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate """Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens descendants of a node. If ``tokens`` is the empty set, all tokens
are removed. are removed."""
"""
remove_children_if(node, partial(is_token, tokens=tokens)) remove_children_if(node, partial(is_token, tokens=tokens))
def remove_enclosing_delimiters(node): @transformation_factory
"""Removes any enclosing delimiters from a structure (e.g. quotation marks def remove_children(node, tag_names: AbstractSet[str]) -> bool:
from a literal or braces from a group). """Removes children by 'tag name'."""
""" remove_children_if(node, partial(has_name, tag_names=tag_names))
if len(node.children) >= 3:
assert not node.children[0].children and not node.children[-1].children, node.as_sexpr()
node.result = node.result[1:-1] @transformation_factory
def remove_content(node, contents: AbstractSet[str]):
"""Removes children depending on their string value."""
remove_children_if(node, partial(has_content, contents=contents))
@transformation_factory @transformation_factory
......
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+": remove_empty,
"bibliographisches":
[remove_children('NZ'), remove_tokens],
"autor": [],
"werk": [],
"untertitel": [],
"ort": [],
"jahr":
[reduce_single_child],
"wortfolge":
[reduce_children(partial(has_name, tag_names='WORT')), remove_last(is_whitespace), collapse],
"namenfolge":
[reduce_children(partial(has_name, tag_names='NAME')), remove_last(is_whitespace),
collapse],
"verknüpfung":
[remove_tokens('<', '>'), reduce_single_child],
"ziel":
reduce_single_child,
"gedicht, strophe, text":
[flatten, remove_children('LEERZEILE'), remove_children('NZ')],
"titel, serie":
[flatten, remove_children('LEERZEILE'), remove_children('NZ'), collapse],
"zeile": [],
"vers":
collapse,
"WORT": [],
"NAME": [],
"ZEICHENFOLGE":
reduce_single_child,
"NZ":
reduce_single_child,
"LEERZEILE": [],
"JAHRESZAHL":
[reduce_single_child],
"ENDE": [],
":Whitespace":
map_content(lambda node : " "),
":Token, :RE":
reduce_single_child,
"*": replace_by_single_child
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment