Commit e8c626df authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- changes and additions to AST transformation primitives

parent c8bde767
...@@ -84,8 +84,8 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\ ...@@ -84,8 +84,8 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\ reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\ is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
TransformationFunc, remove_children, remove_content, remove_first, remove_last, \\ TransformationFunc, remove_parser, remove_content, remove_brackets, \\
has_name, has_content keep_children, has_name, has_content
''' '''
......
...@@ -32,9 +32,10 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name ...@@ -32,9 +32,10 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \ from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_first, remove_last, reduce_single_child, \ from DHParser.syntaxtree import Node, traverse, remove_brackets, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \ reduce_single_child, replace_by_single_child, TOKEN_PTYPE, remove_expendables, \
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformationFunc remove_tokens, flatten, forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, \
TransformationFunc
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
...@@ -212,7 +213,7 @@ EBNF_transformation_table = { ...@@ -212,7 +213,7 @@ EBNF_transformation_table = {
"group": "group":
[remove_tokens('(', ')'), replace_by_single_child], [remove_tokens('(', ')'), replace_by_single_child],
"oneormore, repetition, option": "oneormore, repetition, option":
[reduce_single_child, remove_first, remove_last], [reduce_single_child, remove_brackets],
"symbol, literal, regexp": "symbol, literal, regexp":
reduce_single_child, reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE): (TOKEN_PTYPE, WHITESPACE_PTYPE):
......
...@@ -209,6 +209,7 @@ def add_parser_guard(parser_func): ...@@ -209,6 +209,7 @@ def add_parser_guard(parser_func):
# if parser did non match but a saved result exits, assume # if parser did non match but a saved result exits, assume
# left recursion and use the saved result # left recursion and use the saved result
node, rest = parser.visited[location] node, rest = parser.visited[location]
# Note: For this to work None-results must not be cached!
parser.recursion_counter[location] -= 1 parser.recursion_counter[location] -= 1
......
...@@ -20,7 +20,7 @@ permissions and limitations under the License. ...@@ -20,7 +20,7 @@ permissions and limitations under the License.
import copy import copy
import inspect import inspect
import os import os
from functools import partial, singledispatch from functools import partial, reduce, singledispatch
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
...@@ -50,6 +50,7 @@ __all__ = ['WHITESPACE_PTYPE', ...@@ -50,6 +50,7 @@ __all__ = ['WHITESPACE_PTYPE',
'reduce_single_child', 'reduce_single_child',
'replace_parser', 'replace_parser',
'collapse', 'collapse',
'join',
'replace_content', 'replace_content',
'is_whitespace', 'is_whitespace',
'is_empty', 'is_empty',
...@@ -57,14 +58,14 @@ __all__ = ['WHITESPACE_PTYPE', ...@@ -57,14 +58,14 @@ __all__ = ['WHITESPACE_PTYPE',
'is_token', 'is_token',
'has_name', 'has_name',
'has_content', 'has_content',
'remove_children_if', 'remove_parser',
'remove_children',
'remove_content', 'remove_content',
'remove_first', 'keep_children',
'remove_last', 'remove_children_if',
'remove_whitespace', 'remove_whitespace',
'remove_empty', 'remove_empty',
'remove_expendables', 'remove_expendables',
'remove_brackets',
'remove_tokens', 'remove_tokens',
'flatten', 'flatten',
'forbid', 'forbid',
...@@ -422,7 +423,7 @@ class Node: ...@@ -422,7 +423,7 @@ class Node:
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f: with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr()) f.write(self.as_sxpr())
def find(self, match_function) -> Iterator['Node']: def find(self, match_function: Callable) -> Iterator['Node']:
"""Finds nodes in the tree that match a specific criterion. """Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the ``find`` is a generator that yields all nodes for which the
...@@ -722,6 +723,30 @@ def collapse(node): ...@@ -722,6 +723,30 @@ def collapse(node):
node.result = str(node) node.result = str(node)
@transformation_factory
def join(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children:
i = 0; L = len(node.children)
while i < L:
while i < L and not node.children[i].tag_name in tag_names:
result.append(node.children[i])
i += 1
k = i + 1
while (k < L and node.children[k].tag_name in tag_names
and bool(node.children[i].children) == bool(node.children[k].children)):
k += 1
if i < L:
result.append(Node(MockParser(name, ptype),
reduce(lambda a, b: a + b, (node.result for node in node.children[i:k]))))
i = k
node.result = tuple(result)
# ------------------------------------------------ # ------------------------------------------------
# #
# destructive transformations: # destructive transformations:
...@@ -762,9 +787,18 @@ def has_content(node, contents: AbstractSet[str]) -> bool: ...@@ -762,9 +787,18 @@ def has_content(node, contents: AbstractSet[str]) -> bool:
return str(node) in contents return str(node) in contents
@transformation_factory(Callable) # @singledispatch @transformation_factory
def keep_children(node, section: slice=slice(None, None, None), condition=lambda node: True):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
if node.children:
node.result = tuple(c for c in node.children[section] if condition(c))
@transformation_factory(Callable)
def remove_children_if(node, condition): def remove_children_if(node, condition):
"""Removes all nodes from the result field if the function """Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``.""" ``condition(child_node)`` evaluates to ``True``."""
if node.children: if node.children:
node.result = tuple(c for c in node.children if not condition(c)) node.result = tuple(c for c in node.children if not condition(c))
...@@ -773,24 +807,24 @@ def remove_children_if(node, condition): ...@@ -773,24 +807,24 @@ def remove_children_if(node, condition):
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace) remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty) remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable) remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_brackets = keep_children(slice(1,-1))
@transformation_factory(Callable) # @transformation_factory(Callable)
def remove_first(node, condition=lambda node: True): # def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met. # """Removes the first child if the condition is met.
Otherwise does nothing.""" # Otherwise does nothing."""
if node.children: # if node.children:
if condition(node.children[0]): # if condition(node.children[0]):
node.result = node.result[1:] # node.result = node.result[1:]
#
#
@transformation_factory(Callable) # @transformation_factory(Callable)
def remove_last(node, condition=lambda node: True): # def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met. # """Removes the last child if the condition is met.
Otherwise does nothing.""" # Otherwise does nothing."""
if node.children: # if node.children:
if condition(node.children[-1]): # if condition(node.children[-1]):
node.result = node.result[:-1] # node.result = node.result[:-1]
@transformation_factory @transformation_factory
...@@ -802,7 +836,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()): ...@@ -802,7 +836,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory @transformation_factory
def remove_children(node, tag_names: AbstractSet[str]): def remove_parser(node, tag_names: AbstractSet[str]):
"""Removes children by 'tag name'.""" """Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names)) remove_children_if(node, partial(has_name, tag_names=tag_names))
......
...@@ -268,8 +268,8 @@ def load_if_file(text_or_file) -> str: ...@@ -268,8 +268,8 @@ def load_if_file(text_or_file) -> str:
return content return content
except FileNotFoundError as error: except FileNotFoundError as error:
if re.fullmatch(r'[\w/:. \\]+', text_or_file): if re.fullmatch(r'[\w/:. \\]+', text_or_file):
raise FileNotFoundError('Not a valid file: ' + text_or_file + '\nAdd "\\n" ' raise FileNotFoundError('Not a valid file: ' + text_or_file + '!\n(Add "\\n" '
'to distinguish source data from a file name!') 'to distinguish source data from a file name.)')
else: else:
return text_or_file return text_or_file
else: else:
......
...@@ -14,15 +14,15 @@ blockenv = beginenv sequence §endenv ...@@ -14,15 +14,15 @@ blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}" parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+ sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+ paragraph = { !blockcmd (command | block | text) //~ }+
inlineenv = beginenv { command | block | text }+ endenv inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}" beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}" endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ config ] block] command = CMDNAME [[ //~ config ] //~ block ]
config = "[" cfgtext §"]" config = "[" cfgtext §"]"
block = "{" { text | block } §"}" block = /{/ { command | text | block } §/}/
text = { cfgtext | (BRACKETS //~) }+ text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+ cfgtext = { word_sequence | (ESCAPED //~) }+
...@@ -32,10 +32,10 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection" ...@@ -32,10 +32,10 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}" | "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}" | "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\\w+/~ CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~ NAME = /\w+/~
ESCAPED = /\\[%$&]/ ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ] BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace, TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters # linefeed and special characters
......
...@@ -3,18 +3,28 @@ ...@@ -3,18 +3,28 @@
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste. als streng geschieden sind. Der Viehstand ist der bedeutendste.
2: Paragraphs may contain {\em inline blocks} as well as \emph{inline commands}
and also special \& characters.
3: Paragraphs are separated only by at least one blank line.
Therefore,
this line still belongs to the same paragraph.
[fail:paragraph] [fail:paragraph]
1 : \begin{enumerate} 1 : \begin{enumerate}
2 : \item 2 : \item
3 : und Vieh; \paragraph 3 : und Vieh; \paragraph
[match:sequence] [match:sequence]
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten, 1 : Paragraphs are separated by gaps.
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
Like this one.
Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger 2 : The second paragraph follows after a long gap.
als streng geschieden sind. Der Viehstand ist der bedeutendste.
The parser should accept this, too.
...@@ -23,6 +23,8 @@ import sys ...@@ -23,6 +23,8 @@ import sys
sys.path.extend(['../../', '../', './']) sys.path.extend(['../../', '../', './'])
from DHParser import testing from DHParser import testing
testing.recompile_grammar('LaTeX.ebnf') # recompiles Grammar only if it has changed
from DHParser import toolkit from DHParser import toolkit
from LaTeXCompiler import get_grammar, get_transformer from LaTeXCompiler import get_grammar, get_transformer
......
...@@ -27,7 +27,7 @@ from DHParser.syntaxtree import Node, traverse, remove_last, remove_first, \ ...@@ -27,7 +27,7 @@ from DHParser.syntaxtree import Node, traverse, remove_last, remove_first, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \ remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc, \ collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc, \
remove_children, remove_empty, has_content, has_name remove_parser, remove_empty, has_content, has_name
####################################################################### #######################################################################
...@@ -141,7 +141,7 @@ Lyrik_AST_transformation_table = { ...@@ -141,7 +141,7 @@ Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar # AST Transformations for the Lyrik-grammar
"+": remove_empty, "+": remove_empty,
"bibliographisches": "bibliographisches":
[remove_children('NZ'), remove_tokens], [remove_parser('NZ'), remove_tokens],
"autor": [], "autor": [],
"werk": [], "werk": [],
"untertitel": [], "untertitel": [],
...@@ -157,9 +157,9 @@ Lyrik_AST_transformation_table = { ...@@ -157,9 +157,9 @@ Lyrik_AST_transformation_table = {
"ziel": "ziel":
reduce_single_child, reduce_single_child,
"gedicht, strophe, text": "gedicht, strophe, text":
[flatten, remove_children('LEERZEILE'), remove_children('NZ')], [flatten, remove_parser('LEERZEILE'), remove_parser('NZ')],
"titel, serie": "titel, serie":
[flatten, remove_children('LEERZEILE'), remove_children('NZ'), collapse], [flatten, remove_parser('LEERZEILE'), remove_parser('NZ'), collapse],
"zeile": [], "zeile": [],
"vers": "vers":
collapse, collapse,
......
...@@ -31,7 +31,7 @@ from DHParser.dsl import parser_factory, DHPARSER_IMPORTS ...@@ -31,7 +31,7 @@ from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
class TestInfiLoopsAndRecursion: class TestInfiLoopsAndRecursion:
def test_direct_left_recursion(self): def test_direct_left_recursion1(self):
minilang =""" minilang ="""
@ whitespace = linefeed @ whitespace = linefeed
formula = [ //~ ] expr formula = [ //~ ] expr
...@@ -50,7 +50,7 @@ class TestInfiLoopsAndRecursion: ...@@ -50,7 +50,7 @@ class TestInfiLoopsAndRecursion:
syntax_tree.log("test_LeftRecursion_direct.cst") syntax_tree.log("test_LeftRecursion_direct.cst")
# self.minilang_parser1.log_parsing_history__("test_LeftRecursion_direct") # self.minilang_parser1.log_parsing_history__("test_LeftRecursion_direct")
def test_indirect_left_recursion1(self): def test_direct_left_recursion2(self):
minilang = """ minilang = """
@ whitespace = linefeed @ whitespace = linefeed
formula = [ //~ ] expr formula = [ //~ ] expr
...@@ -64,12 +64,10 @@ class TestInfiLoopsAndRecursion: ...@@ -64,12 +64,10 @@ class TestInfiLoopsAndRecursion:
parser = parser_factory(minilang)() parser = parser_factory(minilang)()
assert parser assert parser
syntax_tree = parser(snippet) syntax_tree = parser(snippet)
assert not syntax_tree.collect_errors() assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree) assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect1.cst")
def test_indirect_left_recursion2(self): def test_indirect_left_recursion1(self):
minilang = """ minilang = """
Expr = //~ (Product | Sum | Value) Expr = //~ (Product | Sum | Value)
Product = Expr { ('*' | '/') Expr }+ Product = Expr { ('*' | '/') Expr }+
...@@ -80,18 +78,40 @@ class TestInfiLoopsAndRecursion: ...@@ -80,18 +78,40 @@ class TestInfiLoopsAndRecursion:
assert parser assert parser
snippet = "8 * 4" snippet = "8 * 4"
syntax_tree = parser(snippet) syntax_tree = parser(snippet)
assert not syntax_tree.error_flag assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "7 + 8 * 4" snippet = "7 + 8 * 4"
syntax_tree = parser(snippet) syntax_tree = parser(snippet)
assert not syntax_tree.error_flag print(syntax_tree.as_sxpr())
assert not syntax_tree.error_flag, syntax_tree.collect_errors()
snippet = "9 + 8 * (4 + 3)" snippet = "9 + 8 * (4 + 3)"
syntax_tree = parser(snippet) syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.collect_errors() assert not syntax_tree.error_flag, syntax_tree.collect_errors()
assert snippet == str(syntax_tree) assert snippet == str(syntax_tree)
if is_logging():
syntax_tree.log("test_LeftRecursion_indirect2.cst") # def test_indirect_left_recursion2(self):
# """This will always fail, because of the precedence rule of the
# "|"-operator. (Note: This is a difference between PEG and
# classical EBNF). DHParser is a PEG-Parser although it uses the
# syntax of classical EBNF."""
# minilang = """
# Expr = //~ (Product | Sum | Value)
# Product = Expr { ('*' | '/') Expr }
# Sum = Expr { ('+' | '-') Expr }
# Value = /[0-9.]+/~ | '(' Expr ')'
# """
# parser = parser_factory(minilang)()
# assert parser
# snippet = "8 * 4"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "7 + 8 * 4"
# syntax_tree = parser(snippet)
# print(syntax_tree.as_sxpr())
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "9 + 8 * (4 + 3)"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# assert snippet == str(syntax_tree)
def test_inifinite_loops(self): def test_inifinite_loops(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment