In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit e8c626df authored by Eckhart Arnold's avatar Eckhart Arnold

- changes and additions to AST transformation primitives

parent c8bde767
......@@ -84,8 +84,8 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
TransformationFunc, remove_children, remove_content, remove_first, remove_last, \\
has_name, has_content
TransformationFunc, remove_parser, remove_content, remove_brackets, \\
keep_children, has_name, has_content
'''
......
......@@ -32,9 +32,10 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc
from DHParser.versionnumber import __version__
......@@ -212,7 +213,7 @@ EBNF_transformation_table = {
"group":
[remove_tokens('(', ')'), replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_first, remove_last],
[reduce_single_child, remove_brackets],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
......
......@@ -209,6 +209,7 @@ def add_parser_guard(parser_func):
# if parser did non match but a saved result exits, assume
# left recursion and use the saved result
node, rest = parser.visited[location]
# Note: For this to work None-results must not be cached!
parser.recursion_counter[location] -= 1
......
......@@ -20,7 +20,7 @@ permissions and limitations under the License.
import copy
import inspect
import os
from functools import partial, singledispatch
from functools import partial, reduce, singledispatch
try:
import regex as re
except ImportError:
......@@ -50,6 +50,7 @@ __all__ = ['WHITESPACE_PTYPE',
'reduce_single_child',
'replace_parser',
'collapse',
'join',
'replace_content',
'is_whitespace',
'is_empty',
......@@ -57,14 +58,14 @@ __all__ = ['WHITESPACE_PTYPE',
'is_token',
'has_name',
'has_content',
'remove_children_if',
'remove_children',
'remove_parser',
'remove_content',
'remove_first',
'remove_last',
'keep_children',
'remove_children_if',
'remove_whitespace',
'remove_empty',
'remove_expendables',
'remove_brackets',
'remove_tokens',
'flatten',
'forbid',
......@@ -422,7 +423,7 @@ class Node:
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr())
def find(self, match_function) -> Iterator['Node']:
def find(self, match_function: Callable) -> Iterator['Node']:
"""Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
......@@ -722,6 +723,30 @@ def collapse(node):
node.result = str(node)
@transformation_factory
def join(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children:
i = 0; L = len(node.children)
while i < L:
while i < L and not node.children[i].tag_name in tag_names:
result.append(node.children[i])
i += 1
k = i + 1
while (k < L and node.children[k].tag_name in tag_names
and bool(node.children[i].children) == bool(node.children[k].children)):
k += 1
if i < L:
result.append(Node(MockParser(name, ptype),
reduce(lambda a, b: a + b, (node.result for node in node.children[i:k]))))
i = k
node.result = tuple(result)
# ------------------------------------------------
#
# destructive transformations:
......@@ -762,9 +787,18 @@ def has_content(node, contents: AbstractSet[str]) -> bool:
return str(node) in contents
@transformation_factory(Callable) # @singledispatch
@transformation_factory
def keep_children(node, section: slice=slice(None, None, None), condition=lambda node: True):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
if node.children:
node.result = tuple(c for c in node.children[section] if condition(c))
@transformation_factory(Callable)
def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function
"""Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``."""
if node.children:
node.result = tuple(c for c in node.children if not condition(c))
......@@ -773,24 +807,24 @@ def remove_children_if(node, condition):
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
remove_brackets = keep_children(slice(1,-1))
# @transformation_factory(Callable)
# def remove_first(node, condition=lambda node: True):
# """Removes the first child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[0]):
# node.result = node.result[1:]
#
#
# @transformation_factory(Callable)
# def remove_last(node, condition=lambda node: True):
# """Removes the last child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[-1]):
# node.result = node.result[:-1]
@transformation_factory
......@@ -802,7 +836,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory
def remove_children(node, tag_names: AbstractSet[str]):
def remove_parser(node, tag_names: AbstractSet[str]):
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names))
......
......@@ -268,8 +268,8 @@ def load_if_file(text_or_file) -> str:
return content
except FileNotFoundError as error:
if re.fullmatch(r'[\w/:. \\]+', text_or_file):
raise FileNotFoundError('Not a valid file: ' + text_or_file + '\nAdd "\\n" '
'to distinguish source data from a file name!')
raise FileNotFoundError('Not a valid file: ' + text_or_file + '!\n(Add "\\n" '
'to distinguish source data from a file name.)')
else:
return text_or_file
else:
......
......@@ -14,15 +14,15 @@ blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+
paragraph = { !blockcmd (command | block | text) //~ }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ config ] block]
command = CMDNAME [[ //~ config ] //~ block ]
config = "[" cfgtext §"]"
block = "{" { text | block } §"}"
block = /{/ { command | text | block } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
......@@ -32,10 +32,10 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\\w+/~
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
ESCAPED = /\\[%$&]/
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
......
......@@ -3,18 +3,28 @@
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
2: Paragraphs may contain {\em inline blocks} as well as \emph{inline commands}
and also special \& characters.
3: Paragraphs are separated only by at least one blank line.
Therefore,
this line still belongs to the same paragraph.
[fail:paragraph]
1 : \begin{enumerate}
2 : \item
3 : und Vieh; \paragraph
[match:sequence]
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
1 : Paragraphs are separated by gaps.
Like this one.
Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
2 : The second paragraph follows after a long gap.
The parser should accept this, too.
......@@ -23,6 +23,8 @@ import sys
sys.path.extend(['../../', '../', './'])
from DHParser import testing
testing.recompile_grammar('LaTeX.ebnf') # recompiles Grammar only if it has changed
from DHParser import toolkit
from LaTeXCompiler import get_grammar, get_transformer
......
......@@ -27,7 +27,7 @@ from DHParser.syntaxtree import Node, traverse, remove_last, remove_first, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc, \
remove_children, remove_empty, has_content, has_name
remove_parser, remove_empty, has_content, has_name
#######################################################################
......@@ -141,7 +141,7 @@ Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+": remove_empty,
"bibliographisches":
[remove_children('NZ'), remove_tokens],
[remove_parser('NZ'), remove_tokens],
"autor": [],
"werk": [],
"untertitel": [],
......@@ -157,9 +157,9 @@ Lyrik_AST_transformation_table = {
"ziel":
reduce_single_child,
"gedicht, strophe, text":
[flatten, remove_children('LEERZEILE'), remove_children('NZ')],
[flatten, remove_parser('LEERZEILE'), remove_parser('NZ')],
"titel, serie":
[flatten, remove_children('LEERZEILE'), remove_children('NZ'), collapse],
[flatten, remove_parser('LEERZEILE'), remove_parser('NZ'), collapse],
"zeile": [],
"vers":
collapse,
......
......@@ -31,7 +31,7 @@ from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
class TestInfiLoopsAndRecursion:
def test_direct_left_recursion(self):
def test_direct_left_recursion1(self):
minilang ="""
@ whitespace = linefeed
formula = [ //~ ] expr
......@@ -50,7 +50,7 @@ class TestInfiLoopsAndRecursion:
syntax_tree.log("test_LeftRecursion_direct.cst")
# self.minilang_parser1.log_parsing_history__("test_LeftRecursion_direct")
def test_indirect_left_recursion1(self):
def test_direct_left_recursion2(self):
minilang = """
@ whitespace = linefeed
formula = [ //~ ] expr
......@@ -64,12 +64,10 @@ class TestInfiLoopsAndRecursion:
parser = parser_factory(minilang)()
assert parser
syntax_tree = parser(snippet)
assert not syntax_tree.collect_errors()
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect1.cst")
def test_indirect_left_recursion2(self):
def test_indirect_left_recursion1(self):
minilang = """
Expr = //~ (Product | Sum | Value)
Product = Expr { ('*' | '/') Expr }+
......@@ -80,18 +78,40 @@ class TestInfiLoopsAndRecursion:
assert parser
snippet = "8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "7 + 8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag
print(syntax_tree.as_sxpr())
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "9 + 8 * (4 + 3)"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect2.cst")
# def test_indirect_left_recursion2(self):
# """This will always fail, because of the precedence rule of the
# "|"-operator. (Note: This is a difference between PEG and
# classical EBNF). DHParser is a PEG-Parser although it uses the
# syntax of classical EBNF."""
# minilang = """
# Expr = //~ (Product | Sum | Value)
# Product = Expr { ('*' | '/') Expr }
# Sum = Expr { ('+' | '-') Expr }
# Value = /[0-9.]+/~ | '(' Expr ')'
# """
# parser = parser_factory(minilang)()
# assert parser
# snippet = "8 * 4"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "7 + 8 * 4"
# syntax_tree = parser(snippet)
# print(syntax_tree.as_sxpr())
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "9 + 8 * (4 + 3)"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# assert snippet == str(syntax_tree)
def test_inifinite_loops(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment