Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit e8c626df authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- changes and additions to AST transformation primitives

parent c8bde767
......@@ -84,8 +84,8 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
TransformationFunc, remove_children, remove_content, remove_first, remove_last, \\
has_name, has_content
TransformationFunc, remove_parser, remove_content, remove_brackets, \\
keep_children, has_name, has_content
'''
......
......@@ -32,9 +32,10 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc
from DHParser.versionnumber import __version__
......@@ -212,7 +213,7 @@ EBNF_transformation_table = {
"group":
[remove_tokens('(', ')'), replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_first, remove_last],
[reduce_single_child, remove_brackets],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
......
......@@ -209,6 +209,7 @@ def add_parser_guard(parser_func):
# if parser did non match but a saved result exits, assume
# left recursion and use the saved result
node, rest = parser.visited[location]
# Note: For this to work None-results must not be cached!
parser.recursion_counter[location] -= 1
......
......@@ -20,7 +20,7 @@ permissions and limitations under the License.
import copy
import inspect
import os
from functools import partial, singledispatch
from functools import partial, reduce, singledispatch
try:
import regex as re
except ImportError:
......@@ -50,6 +50,7 @@ __all__ = ['WHITESPACE_PTYPE',
'reduce_single_child',
'replace_parser',
'collapse',
'join',
'replace_content',
'is_whitespace',
'is_empty',
......@@ -57,14 +58,14 @@ __all__ = ['WHITESPACE_PTYPE',
'is_token',
'has_name',
'has_content',
'remove_children_if',
'remove_children',
'remove_parser',
'remove_content',
'remove_first',
'remove_last',
'keep_children',
'remove_children_if',
'remove_whitespace',
'remove_empty',
'remove_expendables',
'remove_brackets',
'remove_tokens',
'flatten',
'forbid',
......@@ -422,7 +423,7 @@ class Node:
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr())
def find(self, match_function) -> Iterator['Node']:
def find(self, match_function: Callable) -> Iterator['Node']:
"""Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
......@@ -722,6 +723,30 @@ def collapse(node):
node.result = str(node)
@transformation_factory
def join(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children:
i = 0; L = len(node.children)
while i < L:
while i < L and not node.children[i].tag_name in tag_names:
result.append(node.children[i])
i += 1
k = i + 1
while (k < L and node.children[k].tag_name in tag_names
and bool(node.children[i].children) == bool(node.children[k].children)):
k += 1
if i < L:
result.append(Node(MockParser(name, ptype),
reduce(lambda a, b: a + b, (node.result for node in node.children[i:k]))))
i = k
node.result = tuple(result)
# ------------------------------------------------
#
# destructive transformations:
......@@ -762,9 +787,18 @@ def has_content(node, contents: AbstractSet[str]) -> bool:
return str(node) in contents
@transformation_factory(Callable) # @singledispatch
@transformation_factory
def keep_children(node, section: slice=slice(None, None, None), condition=lambda node: True):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
if node.children:
node.result = tuple(c for c in node.children[section] if condition(c))
@transformation_factory(Callable)
def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function
"""Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``."""
if node.children:
node.result = tuple(c for c in node.children if not condition(c))
......@@ -773,24 +807,24 @@ def remove_children_if(node, condition):
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
remove_brackets = keep_children(slice(1,-1))
# @transformation_factory(Callable)
# def remove_first(node, condition=lambda node: True):
# """Removes the first child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[0]):
# node.result = node.result[1:]
#
#
# @transformation_factory(Callable)
# def remove_last(node, condition=lambda node: True):
# """Removes the last child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[-1]):
# node.result = node.result[:-1]
@transformation_factory
......@@ -802,7 +836,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory
def remove_children(node, tag_names: AbstractSet[str]):
def remove_parser(node, tag_names: AbstractSet[str]):
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names))
......
......@@ -268,8 +268,8 @@ def load_if_file(text_or_file) -> str:
return content
except FileNotFoundError as error:
if re.fullmatch(r'[\w/:. \\]+', text_or_file):
raise FileNotFoundError('Not a valid file: ' + text_or_file + '\nAdd "\\n" '
'to distinguish source data from a file name!')
raise FileNotFoundError('Not a valid file: ' + text_or_file + '!\n(Add "\\n" '
'to distinguish source data from a file name.)')
else:
return text_or_file
else:
......
......@@ -14,15 +14,15 @@ blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+
paragraph = { !blockcmd (command | block | text) //~ }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ config ] block]
command = CMDNAME [[ //~ config ] //~ block ]
config = "[" cfgtext §"]"
block = "{" { text | block } §"}"
block = /{/ { command | text | block } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
......@@ -32,10 +32,10 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\\w+/~
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
ESCAPED = /\\[%$&]/
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
......
......@@ -3,18 +3,28 @@
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
2: Paragraphs may contain {\em inline blocks} as well as \emph{inline commands}
and also special \& characters.
3: Paragraphs are separated only by at least one blank line.
Therefore,
this line still belongs to the same paragraph.
[fail:paragraph]
1 : \begin{enumerate}
2 : \item
3 : und Vieh; \paragraph
[match:sequence]
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
1 : Paragraphs are separated by gaps.
Like this one.
Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
2 : The second paragraph follows after a long gap.
The parser should accept this, too.
......@@ -23,6 +23,8 @@ import sys
sys.path.extend(['../../', '../', './'])
from DHParser import testing
testing.recompile_grammar('LaTeX.ebnf') # recompiles Grammar only if it has changed
from DHParser import toolkit
from LaTeXCompiler import get_grammar, get_transformer
......
......@@ -27,7 +27,7 @@ from DHParser.syntaxtree import Node, traverse, remove_last, remove_first, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc, \
remove_children, remove_empty, has_content, has_name
remove_parser, remove_empty, has_content, has_name
#######################################################################
......@@ -141,7 +141,7 @@ Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+": remove_empty,
"bibliographisches":
[remove_children('NZ'), remove_tokens],
[remove_parser('NZ'), remove_tokens],
"autor": [],
"werk": [],
"untertitel": [],
......@@ -157,9 +157,9 @@ Lyrik_AST_transformation_table = {
"ziel":
reduce_single_child,
"gedicht, strophe, text":
[flatten, remove_children('LEERZEILE'), remove_children('NZ')],
[flatten, remove_parser('LEERZEILE'), remove_parser('NZ')],
"titel, serie":
[flatten, remove_children('LEERZEILE'), remove_children('NZ'), collapse],
[flatten, remove_parser('LEERZEILE'), remove_parser('NZ'), collapse],
"zeile": [],
"vers":
collapse,
......
......@@ -31,7 +31,7 @@ from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
class TestInfiLoopsAndRecursion:
def test_direct_left_recursion(self):
def test_direct_left_recursion1(self):
minilang ="""
@ whitespace = linefeed
formula = [ //~ ] expr
......@@ -50,7 +50,7 @@ class TestInfiLoopsAndRecursion:
syntax_tree.log("test_LeftRecursion_direct.cst")
# self.minilang_parser1.log_parsing_history__("test_LeftRecursion_direct")
def test_indirect_left_recursion1(self):
def test_direct_left_recursion2(self):
minilang = """
@ whitespace = linefeed
formula = [ //~ ] expr
......@@ -64,12 +64,10 @@ class TestInfiLoopsAndRecursion:
parser = parser_factory(minilang)()
assert parser
syntax_tree = parser(snippet)
assert not syntax_tree.collect_errors()
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect1.cst")
def test_indirect_left_recursion2(self):
def test_indirect_left_recursion1(self):
minilang = """
Expr = //~ (Product | Sum | Value)
Product = Expr { ('*' | '/') Expr }+
......@@ -80,18 +78,40 @@ class TestInfiLoopsAndRecursion:
assert parser
snippet = "8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "7 + 8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag
print(syntax_tree.as_sxpr())
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "9 + 8 * (4 + 3)"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect2.cst")
# def test_indirect_left_recursion2(self):
# """This will always fail, because of the precedence rule of the
# "|"-operator. (Note: This is a difference between PEG and
# classical EBNF). DHParser is a PEG-Parser although it uses the
# syntax of classical EBNF."""
# minilang = """
# Expr = //~ (Product | Sum | Value)
# Product = Expr { ('*' | '/') Expr }
# Sum = Expr { ('+' | '-') Expr }
# Value = /[0-9.]+/~ | '(' Expr ')'
# """
# parser = parser_factory(minilang)()
# assert parser
# snippet = "8 * 4"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "7 + 8 * 4"
# syntax_tree = parser(snippet)
# print(syntax_tree.as_sxpr())
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "9 + 8 * (4 + 3)"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# assert snippet == str(syntax_tree)
def test_inifinite_loops(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment