Commit a052b256 authored by di68kap's avatar di68kap
Browse files

- INCOMPTATIBLE CHANGE: '<' and '>' now replace '+' and '~' in the AST-transformation table!

parent c4a8107c
......@@ -213,7 +213,7 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
"+":
"<":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
......@@ -451,7 +451,7 @@ class EBNFCompiler(Compiler):
tt_name = self.grammar_name + '_AST_transformation_table'
transtable = [tt_name + ' = {',
' # AST Transformations for the ' + self.grammar_name + '-grammar']
transtable.append(' "+": remove_empty,')
transtable.append(' "<": remove_empty,')
for name in self.rules:
transformations = '[]'
rule = self.definitions[name]
......@@ -505,7 +505,7 @@ class EBNFCompiler(Compiler):
table.
"""
assert self._dirty_flag
table_entries = set(expand_table(transtable).keys()) - {'*', '+', '~'}
table_entries = set(expand_table(transtable).keys()) - {'*', '<', '>', '~'}
symbols = self.rules.keys()
messages = []
for entry in table_entries:
......
......@@ -256,10 +256,10 @@ def traverse(root_node: Node,
tag names, but any other key function is possible. There exist
three special keys:
- '+': always called (before any other processing function)
- '<': always called (before any other processing function)
- '*': called for those nodes for which no (other) processing
function appears in the table
- '~': always called (after any other processing function)
- '>': always called (after any other processing function)
Args:
root_node (Node): The root-node of the syntax tree to be traversed
......@@ -289,6 +289,16 @@ def traverse(root_node: Node,
table = {name: cast(Sequence[Callable], smart_list(call))
for name, call in list(processing_table.items())}
table = expand_table(table)
# substitute key for insiginificant whitespace
if '~' in table:
if ':Whitespace' in table:
raise AssertionError('"~" is a synonym for ":Whitespace" in the '
'processing table. To avoid confusion, choose either of the two, '
'but do not use both at the same time!')
whitespace_transformation = table['~']
del table['~']
table[':Whitespace'] = whitespace_transformation
# cache expanded table
cache = cast(TransformationDict,
table.setdefault('__cache__', cast(TransformationDict, dict())))
# change processing table in place, so its already expanded and cache filled next time
......@@ -309,13 +319,9 @@ def traverse(root_node: Node,
try:
sequence = cache[key]
except KeyError:
sequence = table.get('+', []) \
sequence = table.get('<', []) \
+ table.get(key, table.get('*', [])) \
+ table.get('~', [])
# '+' always called (before any other processing function)
# '*' called for those nodes for which no (other) processing function
# appears in the table
# '~' always called (after any other processing function)
+ table.get('>', [])
cache[key] = sequence
for call in sequence:
......
......@@ -631,7 +631,7 @@ rich set of predefined operators. Should these not suffice, you
can easily write your own. How does this look like? ::
poetry_AST_transformation_table = {
"+": remove_empty,
"<": remove_empty,
"document": [],
"sentence": [],
"part": [],
......@@ -654,11 +654,12 @@ As you can see, the transformation-table contains an entry for every known
parser, i.e. "document", "sentence", "part", "WORD", "EOF". (If any of these are
missing in the table of your ``poetryCompiler.py``, add them now!) In the
template you'll also find transformations for the anonymous parser
":Token" as well as some curious entries such as "*" and "+". The
":Token" as well as some curious entries such as "*" and "<". The
latter are considered to be "jokers". The transformations related to the
"+"-sign will be applied on any node, before any other transformation is
"<"-sign will be applied on any node, before any other transformation is
applied. In this case, all empty nodes will be removed first (transformation:
``remove_empty``). The "*"-joker contains a list of transformations that will be
``remove_empty``). Similarly, the ">" can be used for transformations that are to applied
after any other transformation. The "*"-joker contains a list of transformations that will be
applied to all those tags that have not been entered explicitly into the
transformation table. For example, if the transformation reaches a node with the
tag-name ":ZeroOrMore" (i.e. an anonymous node that has been generated by the
......
......@@ -99,7 +99,7 @@ def get_grammar() -> ArithmeticGrammar:
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"+": remove_empty,
"<": remove_empty,
"expression": [],
"term": [],
"factor": [replace_or_reduce],
......
......@@ -150,7 +150,7 @@ def get_grammar() -> BibTeXGrammar:
BibTeX_AST_transformation_table = {
# AST Transformations for the BibTeX-grammar
"+": remove_empty,
"<": remove_empty,
"bibliography": [],
"preamble": [],
"pre_code": [],
......
......@@ -148,7 +148,7 @@ def get_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
"+":
"<":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
......
......@@ -396,9 +396,9 @@ drop_expendables = remove_children_if(lambda context: is_empty(context) or
LaTeX_AST_transformation_table = {
# AST Transformations for the LaTeX-grammar
"+": [drop_expendables, flatten_structure],
"<": [drop_expendables, flatten_structure],
"latexdoc": [],
"preamble": [traverse_locally({'+': remove_whitespace, 'block': replace_by_single_child})],
"preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
"document": [flatten_structure],
"pdfinfo": [],
"frontpages": reduce_single_child,
......
......@@ -136,7 +136,7 @@ def halt(node):
Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+": remove_empty,
"<": remove_empty,
"bibliographisches":
[flatten, remove_nodes('NZ'), remove_whitespace, remove_tokens],
"autor": [],
......
......@@ -66,7 +66,7 @@ class XMLGrammar(Grammar):
#
#######################################################################
@ whitespace = /\s*/ # implicit whitespace, signified by ~
@ whitespace = /\s*/ # insignificant whitespace, signified by ~
@ literalws = none # literals have no implicit whitespace
@ comment = // # no implicit comments
@ ignorecase = False # literals and regular expressions are case-sensitive
......@@ -277,7 +277,7 @@ class XMLGrammar(Grammar):
extSubsetDecl = Forward()
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "0d1304f359f001aae4a17e5d9e801f0e"
source_hash__ = "52808225879f254ab3099942adde3b59"
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......@@ -397,134 +397,10 @@ def get_grammar() -> XMLGrammar:
#
#######################################################################
XML_AST_transformation_table = {
# AST Transformations for the XML-grammar
"+": [remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"document": [],
"prolog": [],
"XMLDecl": [],
"VersionInfo": [reduce_single_child],
"VersionNum": [],
"EncodingDecl": [reduce_single_child],
"EncName": [],
"SDDecl": [],
"Yes": [],
"No": [],
"doctypedecl": [],
"intSubset": [],
"DeclSep": [replace_or_reduce],
"markupdecl": [replace_or_reduce],
"extSubset": [],
"extSubsetDecl": [],
"conditionalSect": [replace_or_reduce],
"includeSect": [],
"ignoreSect": [],
"ignoreSectContents": [],
"extParsedEnt": [],
"TextDecl": [],
"elementdecl": [],
"contentspec": [replace_or_reduce],
"EMPTY": [],
"ANY": [],
"Mixed": [replace_or_reduce],
"children": [],
"choice": [],
"cp": [],
"seq": [],
"AttlistDecl": [],
"AttDef": [],
"AttType": [replace_or_reduce],
"StringType": [],
"TokenizedType": [replace_or_reduce],
"ID": [],
"IDREF": [],
"IDREFS": [],
"ENTITY": [],
"ENTITIES": [],
"NMTOKEN": [],
"NMTOKENS": [],
"EnumeratedType": [replace_or_reduce],
"NotationType": [],
"Enumeration": [],
"DefaultDecl": [replace_or_reduce],
"REQUIRED": [],
"IMPLIED": [],
"FIXED": [],
"EntityDecl": [replace_or_reduce],
"GEDecl": [],
"PEDecl": [],
"EntityDef": [replace_or_reduce],
"PEDef": [replace_or_reduce],
"NotationDecl": [],
"ExternalID": [],
"PublicID": [],
"NDataDecl": [],
"element": [replace_or_reduce],
"STag": [],
"ETag": [reduce_single_child],
"EmptyElemTag": [],
"TagName": [replace_by_single_child],
"Attribute": [],
"content": [flatten],
"EntityValue": [replace_or_reduce],
"AttValue": [replace_or_reduce],
"SystemLiteral": [replace_or_reduce],
"PubidLiteral": [replace_or_reduce],
"Reference": [replace_or_reduce],
"EntityRef": [],
"PEReference": [],
"Nmtokens": [],
"Nmtoken": [reduce_single_child],
"Names": [],
"Name": [collapse],
"NameStartChar": [],
"NameChars": [],
"Misc": [],
"Comment": [],
"PI": [],
"PITarget": [reduce_single_child],
"CDSect": [],
"PubidCharsSingleQuoted": [],
"PubidChars": [],
"CharData": [],
"CData": [],
"IgnoreChars": [],
"PIChars": [],
"CommentChars": [],
"CharRef": [replace_or_reduce],
"Chars": [],
"Char": [],
"S": [],
"EOF": [],
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
def XMLTransform() -> TransformationDict:
return partial(traverse, processing_table=XML_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
global thread_local_XML_transformer_singleton
try:
transformer = thread_local_XML_transformer_singleton
except NameError:
thread_local_XML_transformer_singleton = XMLTransform()
transformer = thread_local_XML_transformer_singleton
return transformer
#######################################################################
#
# Tag conversion
#
#######################################################################
XML_AST_transformation_table = {
# AST Transformations for the XML-grammar
"+": [remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"<": [remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"document": [flatten(lambda context: context[-1].tag_name == 'prolog', recursive=False)],
"prolog": [],
"XMLDecl": [],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment