Commit 60c71076 authored by Eckhart Arnold's avatar Eckhart Arnold

- additional AST transformations

parent db0a8568
......@@ -80,11 +80,12 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_transformation, remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, reduce_children, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
TransformationFunc
TransformationFunc, remove_children, remove_content, remove_first, remove_last, \\
has_name, has_content
'''
......
......@@ -32,7 +32,7 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc
from DHParser.versionnumber import __version__
......@@ -212,9 +212,9 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_enclosing_delimiters, replace_by_single_child],
"oneormore, repetition, option, regexchain":
[reduce_single_child, remove_enclosing_delimiters],
[remove_tokens('(', ')'), replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_first, remove_last],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
......@@ -358,7 +358,8 @@ class EBNFCompiler(Compiler):
self.grammar_name + '-grammar']
transtable.append(' "+": remove_empty,')
for name in self.rules:
transtable.append(' "' + name + '": no_transformation,')
transtable.append(' "' + name + '": [],')
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
......
......@@ -17,10 +17,8 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import abc
import copy
import inspect
import itertools
import os
from functools import partial, singledispatch
try:
......@@ -48,21 +46,26 @@ __all__ = ['WHITESPACE_PTYPE',
'key_parser_name',
'key_tag_name',
'traverse',
'no_transformation',
'replace_by_single_child',
'reduce_single_child',
'reduce_children',
'replace_parser',
'is_whitespace',
'is_empty',
'is_expendable',
'is_token',
'has_name',
'has_content',
'remove_children_if',
'remove_children',
'remove_content',
'remove_first',
'remove_last',
'remove_whitespace',
'remove_empty',
'remove_expendables',
'remove_tokens',
'flatten',
'remove_enclosing_delimiters',
'forbid',
'require',
'assert_content']
......@@ -636,10 +639,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
traverse_recursive(root_node)
def no_transformation(node):
pass
# ------------------------------------------------
#
# rearranging transformations:
......@@ -651,6 +650,19 @@ def no_transformation(node):
# ------------------------------------------------
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
def replace_by_single_child(node):
"""Remove single branch node, replacing it by its immediate descendant.
(In case the descendant's name is empty (i.e. anonymous) the
......@@ -673,17 +685,21 @@ def reduce_single_child(node):
node.result = node.result[0].result
@transformation_factory
def replace_parser(node, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
@transformation_factory(Callable)
def reduce_children(node, condition=lambda node: not node.name):
"""Replaces those children of node that have children themselves
ans fulfil the given condition (default unnamed nodes).
In contrast to ``flatten`` (see below) this transformation does not
operate recursively.
"""
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
if node.children:
new_result = []
for child in node.children:
if child.children and condition(child):
new_result.extend(child.children)
else:
new_result.append(child)
node.result = tuple(new_result)
def flatten(node):
......@@ -703,9 +719,8 @@ def flatten(node):
new_result = []
for child in node.children:
if not child.parser.name and child.children:
assert child.children, node.as_sexpr()
flatten(child)
new_result.extend(child.result)
new_result.extend(child.children)
else:
new_result.append(child)
node.result = tuple(new_result)
......@@ -734,19 +749,24 @@ def is_whitespace(node):
``@comment``-directive."""
return node.parser.ptype == WHITESPACE_PTYPE
def is_empty(node):
return not node.result
def is_expendable(node):
return is_empty(node) or is_whitespace(node)
def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
def has_name(node, tag_names: AbstractSet[str]) -> bool:
return node.tag_name in tag_names
def has_content(node, contents: AbstractSet[str]) -> bool:
return str(node) in contents
@transformation_factory(Callable) # @singledispatch
def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function
......@@ -760,22 +780,42 @@ remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed.
"""
are removed."""
remove_children_if(node, partial(is_token, tokens=tokens))
def remove_enclosing_delimiters(node):
"""Removes any enclosing delimiters from a structure (e.g. quotation marks
from a literal or braces from a group).
"""
if len(node.children) >= 3:
assert not node.children[0].children and not node.children[-1].children, node.as_sexpr()
node.result = node.result[1:-1]
@transformation_factory
def remove_children(node, tag_names: AbstractSet[str]) -> bool:
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names))
@transformation_factory
def remove_content(node, contents: AbstractSet[str]):
"""Removes children depending on their string value."""
remove_children_if(node, partial(has_content, contents=contents))
@transformation_factory
......
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+": remove_empty,
"bibliographisches":
[remove_children('NZ'), remove_tokens],
"autor": [],
"werk": [],
"untertitel": [],
"ort": [],
"jahr":
[reduce_single_child],
"wortfolge":
[reduce_children(partial(has_name, tag_names='WORT')), remove_last(is_whitespace), collapse],
"namenfolge":
[reduce_children(partial(has_name, tag_names='NAME')), remove_last(is_whitespace),
collapse],
"verknüpfung":
[remove_tokens('<', '>'), reduce_single_child],
"ziel":
reduce_single_child,
"gedicht, strophe, text":
[flatten, remove_children('LEERZEILE'), remove_children('NZ')],
"titel, serie":
[flatten, remove_children('LEERZEILE'), remove_children('NZ'), collapse],
"zeile": [],
"vers":
collapse,
"WORT": [],
"NAME": [],
"ZEICHENFOLGE":
reduce_single_child,
"NZ":
reduce_single_child,
"LEERZEILE": [],
"JAHRESZAHL":
[reduce_single_child],
"ENDE": [],
":Whitespace":
map_content(lambda node : " "),
":Token, :RE":
reduce_single_child,
"*": replace_by_single_child
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment