Commit 48920684 authored by eckhart's avatar eckhart

- transformation.py: keep replace_by_single_child and content_from_single_child as an optimization

parent 6cba61bd
......@@ -77,7 +77,7 @@ from DHParser import logging, is_filename, load_if_file, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, \\
traverse, remove_children_if, merge_children, is_anonymous, \\
content_from_child, replace_by_child, replace_or_reduce, remove_whitespace, \\
content_from_sinlge_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, \\
......
......@@ -27,7 +27,7 @@ from DHParser.parsers import Grammar, mixin_comment, nil_preprocessor, Forward,
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, typing
from DHParser.transform import traverse, remove_brackets, \
content_from_child, replace_by_child, remove_expendables, \
content_from_sinlge_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator
from DHParser.versionnumber import __version__
from typing import Callable, Dict, List, Set, Tuple
......@@ -198,30 +198,30 @@ EBNF_AST_transformation_table = {
"+":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_child' would be applied
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
remove_tokens('@', '='),
"expression":
[replace_by_child, flatten, remove_tokens('|')], # remove_infix_operator],
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_child, flatten], # supports both idioms: "{ factor }+" and "factor { factor }"
[replace_by_single_child, flatten], # supports both idioms: "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
replace_by_child,
replace_by_single_child,
"group":
[remove_brackets, replace_by_child],
[remove_brackets, replace_by_single_child],
"unordered":
remove_brackets,
"oneormore, repetition, option":
[content_from_child, remove_brackets,
[content_from_sinlge_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')],
"symbol, literal, regexp":
content_from_child,
content_from_sinlge_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
content_from_child,
content_from_sinlge_child,
"list_":
[flatten, remove_infix_operator],
"*":
replace_by_child
replace_by_single_child
}
......@@ -438,10 +438,10 @@ class EBNFCompiler(Compiler):
if rule.startswith('Alternative'):
transformations = '[replace_or_reduce]'
elif rule.startswith('Synonym'):
transformations = '[content_from_child]'
transformations = '[content_from_sinlge_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": content_from_child,')
transtable += [' "*": replace_by_child', '}', '']
transtable.append(' ":Token, :RE": content_from_sinlge_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......
......@@ -36,8 +36,8 @@ __all__ = ('TransformationDict',
'key_tag_name',
'traverse',
'is_named',
'replace_by_child',
'content_from_child',
'replace_by_single_child',
'content_from_sinlge_child',
'replace_or_reduce',
'replace_parser',
'collapse',
......@@ -202,8 +202,8 @@ def traverse(root_node: Node,
key_func yields node.parser.name.
Example:
table = { "term": [replace_by_child, flatten],
"factor, flowmarker, retrieveop": replace_by_child }
table = { "term": [replace_by_single_child, flatten],
"factor, flowmarker, retrieveop": replace_by_single_child }
traverse(node, table)
"""
# Is this optimazation really needed?
......@@ -317,6 +317,8 @@ def replace_by_child(context: List[Node], criteria: CriteriaType=single_child):
a boolean-valued function on the context of the child.
If no child matching the criteria is found, the node will
not be replaced.
With the default value for `criteria` the same semantics is
the same that of `replace_by_single_child`.
"""
child = pick_child(context, criteria)
if child:
......@@ -333,6 +335,8 @@ def content_from_child(context: List[None], criteria: CriteriaType=single_child)
name or a boolean-valued function on the context of the child.
If no child matching the criteria is found, the node will
not be replaced.
With the default value for `criteria` this has the same semantics
as `content_from_single_child`.
"""
child = pick_child(context, criteria)
if child:
......@@ -340,26 +344,26 @@ def content_from_child(context: List[None], criteria: CriteriaType=single_child)
# def replace_by_child(context: List[Node]):
# """
# Remove single branch node, replacing it by its immediate descendant
# if and only if the condition on the descendant is true.
# """
# node = context[-1]
# if len(node.children) == 1:
# replace_by(node, node.children[0])
#
#
# def content_from_child(context: List[Node]):
# """
# Reduce a single branch node, by transferring the result of its
# immediate descendant to this node, but keeping this node's parser entry.
# If the condition evaluates to false on the descendant, it will not
# be reduced.
# """
# node = context[-1]
# if len(node.children) == 1:
# reduce_child(node, node.children[0])
def replace_by_single_child(context: List[Node]):
"""
Remove single branch node, replacing it by its immediate descendant.
If there are more than one children, no replacement takes place.
"""
node = context[-1]
if len(node.children) == 1:
replace_by(node, node.children[0])
def content_from_sinlge_child(context: List[Node]):
"""
Reduce a single branch node by transferring the result of its
immediate descendant to this node, but keeping this node's parser entry.
This will only be done if the last node in the context has is exactly
one child.
"""
node = context[-1]
if len(node.children) == 1:
reduce_child(node, node.children[0])
def is_named(context: List[Node]) -> bool:
......
......@@ -368,20 +368,20 @@ scroll down to the AST section, you'll see something like this:
"+": remove_empty,
"bibliographisches": [remove_nodes('NZ'), remove_tokens],
"autor, werk, untertitel, ort": [],
"jahr": [content_from_child],
"jahr": [content_from_sinlge_child],
"wortfolge": [flatten(is_one_of('WORT'), recursive=False), remove_last(is_whitespace), collapse],
"namenfolge": [flatten(is_one_of('NAME'), recursive=False), remove_last(is_whitespace), collapse],
"verknüpfung": [remove_tokens('<', '>'), content_from_child],
"ziel": content_from_child,
"verknüpfung": [remove_tokens('<', '>'), content_from_sinlge_child],
"ziel": content_from_sinlge_child,
"gedicht, strophe, text": [flatten, remove_nodes('LEERZEILE'), remove_nodes('NZ')],
"titel, serie": [flatten, remove_nodes('LEERZEILE'), remove_nodes('NZ'), collapse],
"vers": collapse,
"zeile": [],
"ZEICHENFOLGE, NZ, JAHRESZAHL": content_from_child,
"ZEICHENFOLGE, NZ, JAHRESZAHL": content_from_sinlge_child,
"WORT, NAME, LEERZEILE, ENDE": [],
":Whitespace": replace_content(lambda node : " "),
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
As you can see, AST-transformations a specified declaratively (with the
......
......@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
traverse, remove_children_if, merge_children, is_anonymous, \
content_from_child, replace_by_child, replace_or_reduce, remove_whitespace, \
content_from_sinlge_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
......@@ -159,8 +159,8 @@ BibTeX_AST_transformation_table = {
"content": [replace_or_reduce],
"plain_content": [],
"text": [],
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
......
......@@ -22,7 +22,7 @@ from DHParser import logging, is_filename, load_if_file, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
traverse, remove_children_if, merge_children, is_anonymous, \
content_from_child, replace_by_child, replace_or_reduce, remove_whitespace, \
content_from_sinlge_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
......@@ -156,8 +156,8 @@ EBNF_AST_transformation_table = {
"regexp": [],
"list_": [],
"EOF": [],
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
......
......@@ -21,7 +21,7 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
content_from_child, replace_by_child, remove_whitespace, \
content_from_sinlge_child, replace_by_single_child, remove_whitespace, \
flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first, \
remove_tokens, remove_nodes, TOKEN_PTYPE
......@@ -389,52 +389,52 @@ LaTeX_AST_transformation_table = {
"latexdoc": [],
"preamble": [],
"document": [flatten_structure],
"frontpages": content_from_child,
"frontpages": content_from_sinlge_child,
"Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
"Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
"heading": content_from_child,
"heading": content_from_sinlge_child,
"Bibliography": [],
"Index": [],
"block_environment": replace_by_child,
"known_environment": replace_by_child,
"block_environment": replace_by_single_child,
"known_environment": replace_by_single_child,
"generic_block": [],
"begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_child],
"begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
"itemize, enumerate": [remove_brackets, flatten],
"item": [],
"figure": [],
"quotation": [content_from_child, remove_brackets],
"quotation": [content_from_sinlge_child, remove_brackets],
"verbatim": [],
"tabular": [],
"tabular_config, block_of_paragraphs": [remove_brackets, content_from_child],
"tabular_config, block_of_paragraphs": [remove_brackets, content_from_sinlge_child],
"tabular_row": [flatten, remove_tokens('&', '\\')],
"tabular_cell": [flatten, remove_whitespace],
"multicolumn": [remove_tokens('{', '}')],
"hline": [remove_whitespace, content_from_child],
"hline": [remove_whitespace, content_from_sinlge_child],
"sequence": [flatten],
"paragraph": [flatten],
"text_element": replace_by_child,
"line_element": replace_by_child,
"inline_environment": replace_by_child,
"known_inline_env": replace_by_child,
"text_element": replace_by_single_child,
"line_element": replace_by_single_child,
"inline_environment": replace_by_single_child,
"known_inline_env": replace_by_single_child,
"generic_inline_env": [],
"begin_inline_env, end_inline_env": [replace_by_child],
"begin_environment, end_environment": [remove_brackets, content_from_child],
"inline_math": [remove_brackets, content_from_child],
"command": replace_by_child,
"known_command": replace_by_child,
"begin_inline_env, end_inline_env": [replace_by_single_child],
"begin_environment, end_environment": [remove_brackets, content_from_sinlge_child],
"inline_math": [remove_brackets, content_from_sinlge_child],
"command": replace_by_single_child,
"known_command": replace_by_single_child,
"text_command": [],
"generic_command": [flatten],
"footnote": [],
"includegraphics": [],
"caption": [],
"config": [remove_brackets, content_from_child],
"block": [remove_brackets, flatten, replace_by_child],
"config": [remove_brackets, content_from_sinlge_child],
"block": [remove_brackets, flatten, replace_by_single_child],
"text": collapse,
"no_command, blockcmd": [],
"structural": [],
"CMDNAME": [remove_whitespace, content_from_child],
"TXTCOMMAND": [remove_whitespace, content_from_child],
"NAME": [content_from_child, remove_whitespace, content_from_child],
"CMDNAME": [remove_whitespace, content_from_sinlge_child],
"TXTCOMMAND": [remove_whitespace, content_from_sinlge_child],
"NAME": [content_from_sinlge_child, remove_whitespace, content_from_sinlge_child],
"ESCAPED": [replace_content(lambda node: str(node)[1:])],
"BRACKETS": [],
"TEXTCHUNK": [],
......@@ -445,10 +445,10 @@ LaTeX_AST_transformation_table = {
"BACKSLASH": [],
"EOF": [],
":Token":
[remove_whitespace, content_from_child],
":RE": replace_by_child,
[remove_whitespace, content_from_sinlge_child],
":RE": replace_by_single_child,
":Whitespace": streamline_whitespace,
"*": replace_by_child
"*": replace_by_single_child
}
......
......@@ -22,7 +22,7 @@ from DHParser import logging, is_filename, load_if_file, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, merge_children, is_anonymous, \
content_from_child, replace_by_child, replace_or_reduce, remove_whitespace, \
content_from_sinlge_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
......@@ -471,7 +471,7 @@ def get_grammar() -> MLWGrammar:
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"+": remove_empty,
"Autor": [content_from_child],
"Autor": [content_from_sinlge_child],
"Artikel": [],
"LemmaPosition": [],
"Lemma": [],
......@@ -490,11 +490,11 @@ MLW_AST_transformation_table = {
"EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [],
"ArtikelKopf": [replace_by_child],
"ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition": [],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_child],
"Schreibweise": [replace_by_single_child],
"BedeutungsPosition": [],
"Bedeutung": [],
"Bedeutungskategorie": [],
......@@ -506,15 +506,15 @@ MLW_AST_transformation_table = {
"Zusatz": [],
"ArtikelVerfasser": [],
"Name": [],
"Stelle": [content_from_child],
"Stelle": [content_from_sinlge_child],
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Beleg": [replace_by_child],
"Beleg": [replace_by_single_child],
"Verweis": [],
"VerweisZiel": [],
"Werk": [content_from_child],
"ZielName": [replace_by_child],
"Werk": [content_from_sinlge_child],
"ZielName": [replace_by_single_child],
"NAMENS_ABKÜRZUNG": [],
"NAME": [],
"DEU_WORT": [],
......@@ -539,8 +539,8 @@ MLW_AST_transformation_table = {
"KOMMENTARZEILEN": [],
"DATEI_ENDE": [],
"NIEMALS": [],
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
......
......@@ -22,7 +22,7 @@ from DHParser import logging, is_filename, load_if_file, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
traverse, remove_children_if, merge_children, is_anonymous, \
content_from_child, replace_by_child, replace_or_reduce, remove_whitespace, \
content_from_sinlge_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
......@@ -312,11 +312,11 @@ MLW_AST_transformation_table = {
"EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [],
"ArtikelKopf": [replace_by_child],
"ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition": [],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_child],
"Schreibweise": [replace_by_single_child],
"BedeutungsPosition": [],
"Bedeutung": [],
"Bedeutungskategorie": [],
......@@ -331,10 +331,10 @@ MLW_AST_transformation_table = {
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Beleg": [replace_by_child],
"Beleg": [replace_by_single_child],
"Verweis": [],
"VerweisZiel": [],
"ZielName": [replace_by_child],
"ZielName": [replace_by_single_child],
"NAMENS_ABKÜRZUNG": [],
"NAME": [],
"DEU_WORT": [],
......@@ -352,8 +352,8 @@ MLW_AST_transformation_table = {
"LZ": [],
"DATEI_ENDE": [],
"NIEMALS": [],
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
......
......@@ -19,11 +19,11 @@ MLW_AST_transformation_table = {
"EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [],
"ArtikelKopf": [replace_by_child],
"ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition": [],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_child],
"Schreibweise": [replace_by_single_child],
"BedeutungsPosition": [],
"Bedeutung": [],
"Bedeutungskategorie": [],
......@@ -38,10 +38,10 @@ MLW_AST_transformation_table = {
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Beleg": [replace_by_child],
"Beleg": [replace_by_single_child],
"Verweis": [],
"VerweisZiel": [],
"ZielName": [replace_by_child],
"ZielName": [replace_by_single_child],
"NAMENS_ABKÜRZUNG": [],
"NAME": [],
"DEU_WORT": [],
......@@ -59,6 +59,6 @@ MLW_AST_transformation_table = {
"LZ": [],
"DATEI_ENDE": [],
"NIEMALS": [],
":Token, :RE": content_from_child,
"*": replace_by_child
":Token, :RE": content_from_sinlge_child,
"*": replace_by_single_child
}
......@@ -14,8 +14,8 @@ arithmetik_ebnf = """
ASTTable = {
"+": remove_expendables,
"*": replace_by_child,
"factor": [content_from_child, remove_brackets]
"*": replace_by_single_child,
"factor": [content_from_sinlge_child, remove_brackets]
}
parser = grammar_provider(arithmetik_ebnf)()
......
......@@ -25,8 +25,8 @@ sys.path.extend(['../', './'])
from DHParser.error import Error
from DHParser.syntaxtree import Node, mock_syntax_tree, TOKEN_PTYPE
from DHParser.transform import traverse, content_from_child, \
replace_by_child, flatten, remove_expendables
from DHParser.transform import traverse, content_from_sinlge_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider
......@@ -73,10 +73,10 @@ class TestNode:
def test_equality2(self):
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
att = {"term": [replace_by_child, flatten],
"factor": [remove_expendables, content_from_child],
(TOKEN_PTYPE): [remove_expendables, content_from_child],
"?": [remove_expendables, replace_by_child]}
att = {"term": [replace_by_single_child, flatten],
"factor": [remove_expendables, content_from_sinlge_child],
(TOKEN_PTYPE): [remove_expendables, content_from_sinlge_child],
"?": [remove_expendables, replace_by_single_child]}
parser = grammar_provider(ebnf)()
tree = parser("20 / 4 * 3")
traverse(tree, att)
......
......@@ -28,7 +28,7 @@ sys.path.extend(['../', './'])
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, remove_expendables, \
replace_by_child, content_from_child, flatten
replace_by_single_child, content_from_sinlge_child, flatten
from DHParser.dsl import grammar_provider
from DHParser.testing import grammar_unit
......@@ -45,10 +45,10 @@ ARITHMETIC_EBNF = """
ARITHMETIC_EBNF_transformation_table = {
# AST Transformations for the DSL-grammar
"formula": [remove_expendables],
"term, expr": [replace_by_child, flatten],
"factor": [remove_expendables, content_from_child],
(TOKEN_PTYPE): [remove_expendables, content_from_child],
"*": [remove_expendables, replace_by_child]
"term, expr": [replace_by_single_child, flatten],
"factor": [remove_expendables, content_from_sinlge_child],
(TOKEN_PTYPE): [remove_expendables, content_from_sinlge_child],
"*": [remove_expendables, replace_by_single_child]
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment