In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 3978f82c authored by Eckhart Arnold's avatar Eckhart Arnold

- imporvements to AST transformations and LaTeX-example

parent a100f00d
......@@ -80,13 +80,13 @@ from DHParser import logging, is_filename, load_if_file, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, \\
traverse, remove_children_if, join_children, \\
Node, TransformationFunc, TRUE_CONDITION, \\
traverse, remove_children_if, merge_children, is_anonymous, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_parser, remove_content, remove_brackets, \\
keep_children, has_name, has_content, apply_if, remove_first, remove_last
remove_parser, remove_content, remove_brackets, replace_parser, \\
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
'''
......
......@@ -35,7 +35,7 @@ from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, R
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, Node, TransformationFunc
from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, key_tag_name
remove_tokens, flatten, forbid, assert_content, key_tag_name, remove_infix_operator
from DHParser.versionnumber import __version__
__all__ = ('get_ebnf_preprocessor',
......@@ -200,7 +200,7 @@ EBNF_transformation_table = {
"directive, definition":
remove_tokens('@', '='),
"expression":
[replace_by_single_child, flatten, remove_tokens('|')],
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_single_child, flatten], # supports both idioms: "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
......@@ -215,7 +215,7 @@ EBNF_transformation_table = {
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
"list_":
[flatten, remove_tokens(',')],
[flatten, remove_infix_operator],
"*":
replace_by_single_child
}
......@@ -353,7 +353,7 @@ class EBNFCompiler(Compiler):
for name in self.rules:
transtable.append(' "' + name + '": [],')
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '', tf_name +
transtable += [' # "*": replace_by_single_child', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -719,7 +719,7 @@ class EBNFCompiler(Compiler):
def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join_children([node.result]) + ')' ?
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.merge_children([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str:
......
......@@ -34,6 +34,7 @@ except ImportError:
from DHParser.toolkit import is_logging, log_dir, line_col, identity
__all__ = ('WHITESPACE_PTYPE',
'MockParser',
'TOKEN_PTYPE',
'ZOMBIE_PARSER',
'ParserBase',
......
......@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0:
# parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".join_children(repr(item) for item in parameter_list))
# return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]:
......
......@@ -43,14 +43,15 @@ __all__ = ('transformation_factory',
'reduce_single_child',
'replace_parser',
'collapse',
'join_children',
'merge_children',
'replace_content',
'apply_if',
'is_anonymous',
'is_whitespace',
'is_empty',
'is_expendable',
'is_token',
'has_name',
'is_one_of',
'has_content',
'remove_children_if',
'remove_parser',
......@@ -61,12 +62,17 @@ __all__ = ('transformation_factory',
'remove_empty',
'remove_expendables',
'remove_brackets',
'remove_infix_operator',
'remove_single_child',
'remove_tokens',
'keep_children',
'flatten',
'forbid',
'require',
'assert_content')
'assert_content',
'assert_condition',
'assert_has_children',
'TRUE_CONDITION')
def transformation_factory(t=None):
......@@ -225,12 +231,18 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# ------------------------------------------------
def replace_by_single_child(node):
"""Remove single branch node, replacing it by its immediate descendant.
def TRUE_CONDITION(node):
return True
@transformation_factory(Callable)
def replace_by_single_child(node, condition=TRUE_CONDITION):
"""Remove single branch node, replacing it by its immediate descendant
if and only if the condision on the descendant is true.
(In case the descendant's name is empty (i.e. anonymous) the
name of this node's parser is kept.)
"""
if node.children and len(node.result) == 1:
if node.children and len(node.result) == 1 and condition(node.children[0]):
if not node.result[0].parser.name:
node.result[0].parser.name = node.parser.name
node.parser = node.result[0].parser
......@@ -238,11 +250,14 @@ def replace_by_single_child(node):
node.result = node.result[0].result
def reduce_single_child(node):
@transformation_factory(Callable)
def reduce_single_child(node, condition=TRUE_CONDITION):
"""Reduce a single branch node, by transferring the result of its
immediate descendant to this node, but keeping this node's parser entry.
If the condition evaluates to false on the descendant, it will not
be reduced.
"""
if node.children and len(node.result) == 1:
if node.children and len(node.result) == 1 and condition(node.children[0]):
node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result
......@@ -288,19 +303,20 @@ def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
def collapse(node):
"""Collapses all sub-nodes by replacing the node's result with it's
string representation.
"""Collapses all sub-nodes of a node by replacing them with the
string representation of the node.
"""
node.result = str(node)
@transformation_factory
def join_children(node, tag_names: List[str]):
def merge_children(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
names into a single child node with mock parser with the name of
the first tag name in the list.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
name, ptype = ('', tag_names[0]) if tag_names[0][:1] == ':' else (tag_names[0], '')
if node.children:
i = 0
L = len(node.children)
......@@ -356,9 +372,14 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
def has_name(node, regexp: str) -> bool:
"""Checks a node's tag name against a regular expression."""
return bool(re.match(regexp, node.tag_name))
def is_anonymous(node):
return not node.parser.name
def is_one_of(node, tag_name_set: AbstractSet[str]) -> bool:
"""Returns true, if the node's tag_name is on of the
given tag names."""
return node.tag_name in tag_name_set
def has_content(node, regexp: str) -> bool:
......@@ -395,9 +416,11 @@ def remove_children_if(node, condition: Callable, section: slice = slice(None)):
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_first = keep_children(slice(1, None))
remove_last = keep_children(slice(None, -1))
remove_brackets = keep_children(slice(1, -1))
remove_first = apply_if(keep_children(slice(1, None)), lambda nd: len(nd.children) > 1)
remove_last = apply_if(keep_children(slice(None, -1)), lambda nd: len(nd.children) > 1)
remove_brackets = apply_if(keep_children(slice(1, -1)), lambda nd: len(nd.children) >= 2)
remove_infix_operator = keep_children(slice(0, None, 2))
remove_single_child = apply_if(keep_children(slice(0)), lambda nd: len(nd.children) == 1)
@transformation_factory
......@@ -411,7 +434,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory
def remove_parser(node, regexp: str):
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, regexp=regexp))
remove_children_if(node, partial(is_one_of, regexp=regexp))
@transformation_factory
......@@ -451,7 +474,7 @@ def assert_content(node, regexp: str):
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not has_name(node, regexp):
# if not is_one_of(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
......
......@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".join_children(errors))
print("\n\n".merge_children(errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
......
......@@ -369,8 +369,8 @@ scrool down to the AST section, you'll see something like this:
"bibliographisches": [remove_parser('NZ'), remove_tokens],
"autor, werk, untertitel, ort": [],
"jahr": [reduce_single_child],
"wortfolge": [flatten(has_name('WORT'), recursive=False), remove_last(is_whitespace), collapse],
"namenfolge": [flatten(has_name('NAME'), recursive=False), remove_last(is_whitespace), collapse],
"wortfolge": [flatten(is_one_of('WORT'), recursive=False), remove_last(is_whitespace), collapse],
"namenfolge": [flatten(is_one_of('NAME'), recursive=False), remove_last(is_whitespace), collapse],
"verknüpfung": [remove_tokens('<', '>'), reduce_single_child],
"ziel": reduce_single_child,
"gedicht, strophe, text": [flatten, remove_parser('LEERZEILE'), remove_parser('NZ')],
......
......@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler)
# print(result)
# if errors:
# print('\n\n'.join_children(errors))
# print('\n\n'.merge_children(errors))
# sys.exit(1)
# else:
# # compile the grammar again using the result of the previous
......
......@@ -77,17 +77,18 @@ table_config = "{" /[lcr|]+/~ "}"
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_environment
paragraph = { !blockcmd text_element //~ }+
text_element = command | text | block | inline_environment
#### inline enivronments ####
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = (begin_inline_env { text_elements }+ §end_environment)
generic_inline_env = (begin_inline_env { text_element }+ §end_inline_env)
begin_inline_env = (-!LB begin_environment) | (begin_environment -!LB)
# end_inline_env = (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
end_inline_env = end_environment
# (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
......@@ -113,7 +114,7 @@ caption = "\caption" block
config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
......@@ -143,7 +144,6 @@ ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
......
This diff is collapsed.
This diff is collapsed.
......@@ -80,6 +80,9 @@
\end{itemize}
\end{itemize}
5 : \begin{itemize}
\item Item-lists may consist of just one item.
\end{itemize}
[fail:itemize]
1 : \begin{itemize}
......
......@@ -21,7 +21,7 @@
6 : Paragraphs may also contain {\xy unknown blocks }.
7 : Paragraphs may contain \xy[xycgf]{unbknown} commands.
7 : Paragraphs may contain \xy[xycgf]{some {\em unbknown}} commands.
8 : Unknwon \xy commands within paragraphs may be simple
or \xy{complex}.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment