Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
e8c626df
Commit
e8c626df
authored
Jul 08, 2017
by
Eckhart Arnold
Browse files
- changes and additions to AST transformation primitives
parent
c8bde767
Changes
11
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
e8c626df
...
...
@@ -84,8 +84,8 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
TransformationFunc, remove_
children
, remove_content, remove_
first, remove_last
,
\\
has_name, has_content
TransformationFunc, remove_
parser
, remove_content, remove_
brackets
,
\\
keep_children,
has_name, has_content
'''
...
...
DHParser/ebnf.py
View file @
e8c626df
...
...
@@ -32,9 +32,10 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_scanner
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Series
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_first
,
remove_last
,
reduce_single_child
,
\
replace_by_single_child
,
TOKEN_PTYPE
,
remove_expendables
,
remove_tokens
,
flatten
,
\
forbid
,
assert_content
,
WHITESPACE_PTYPE
,
key_tag_name
,
TransformationFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
TOKEN_PTYPE
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
WHITESPACE_PTYPE
,
key_tag_name
,
\
TransformationFunc
from
DHParser.versionnumber
import
__version__
...
...
@@ -212,7 +213,7 @@ EBNF_transformation_table = {
"group"
:
[
remove_tokens
(
'('
,
')'
),
replace_by_single_child
],
"oneormore, repetition, option"
:
[
reduce_single_child
,
remove_
first
,
remove_last
],
[
reduce_single_child
,
remove_
brackets
],
"symbol, literal, regexp"
:
reduce_single_child
,
(
TOKEN_PTYPE
,
WHITESPACE_PTYPE
):
...
...
DHParser/parsers.py
View file @
e8c626df
...
...
@@ -209,6 +209,7 @@ def add_parser_guard(parser_func):
# if parser did non match but a saved result exits, assume
# left recursion and use the saved result
node
,
rest
=
parser
.
visited
[
location
]
# Note: For this to work None-results must not be cached!
parser
.
recursion_counter
[
location
]
-=
1
...
...
DHParser/syntaxtree.py
View file @
e8c626df
...
...
@@ -20,7 +20,7 @@ permissions and limitations under the License.
import
copy
import
inspect
import
os
from
functools
import
partial
,
singledispatch
from
functools
import
partial
,
reduce
,
singledispatch
try
:
import
regex
as
re
except
ImportError
:
...
...
@@ -50,6 +50,7 @@ __all__ = ['WHITESPACE_PTYPE',
'reduce_single_child'
,
'replace_parser'
,
'collapse'
,
'join'
,
'replace_content'
,
'is_whitespace'
,
'is_empty'
,
...
...
@@ -57,14 +58,14 @@ __all__ = ['WHITESPACE_PTYPE',
'is_token'
,
'has_name'
,
'has_content'
,
'remove_children_if'
,
'remove_children'
,
'remove_parser'
,
'remove_content'
,
'
remove_first
'
,
'remove_
last
'
,
'
keep_children
'
,
'remove_
children_if
'
,
'remove_whitespace'
,
'remove_empty'
,
'remove_expendables'
,
'remove_brackets'
,
'remove_tokens'
,
'flatten'
,
'forbid'
,
...
...
@@ -422,7 +423,7 @@ class Node:
with
open
(
os
.
path
.
join
(
log_dir
(),
st_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
self
.
as_sxpr
())
def
find
(
self
,
match_function
)
->
Iterator
[
'Node'
]:
def
find
(
self
,
match_function
:
Callable
)
->
Iterator
[
'Node'
]:
"""Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
...
...
@@ -722,6 +723,30 @@ def collapse(node):
node
.
result
=
str
(
node
)
@
transformation_factory
def
join
(
node
,
tag_names
:
List
[
str
]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result
=
[]
name
,
ptype
=
(
tag_names
[
0
].
split
(
':'
)
+
[
''
])[:
2
]
if
node
.
children
:
i
=
0
;
L
=
len
(
node
.
children
)
while
i
<
L
:
while
i
<
L
and
not
node
.
children
[
i
].
tag_name
in
tag_names
:
result
.
append
(
node
.
children
[
i
])
i
+=
1
k
=
i
+
1
while
(
k
<
L
and
node
.
children
[
k
].
tag_name
in
tag_names
and
bool
(
node
.
children
[
i
].
children
)
==
bool
(
node
.
children
[
k
].
children
)):
k
+=
1
if
i
<
L
:
result
.
append
(
Node
(
MockParser
(
name
,
ptype
),
reduce
(
lambda
a
,
b
:
a
+
b
,
(
node
.
result
for
node
in
node
.
children
[
i
:
k
]))))
i
=
k
node
.
result
=
tuple
(
result
)
# ------------------------------------------------
#
# destructive transformations:
...
...
@@ -762,9 +787,18 @@ def has_content(node, contents: AbstractSet[str]) -> bool:
return
str
(
node
)
in
contents
@
transformation_factory
(
Callable
)
# @singledispatch
@
transformation_factory
def
keep_children
(
node
,
section
:
slice
=
slice
(
None
,
None
,
None
),
condition
=
lambda
node
:
True
):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
if
node
.
children
:
node
.
result
=
tuple
(
c
for
c
in
node
.
children
[
section
]
if
condition
(
c
))
@
transformation_factory
(
Callable
)
def
remove_children_if
(
node
,
condition
):
"""Removes all nodes from the result field if the function
"""Removes all nodes from
a slice of
the result field if the function
``condition(child_node)`` evaluates to ``True``."""
if
node
.
children
:
node
.
result
=
tuple
(
c
for
c
in
node
.
children
if
not
condition
(
c
))
...
...
@@ -773,24 +807,24 @@ def remove_children_if(node, condition):
remove_whitespace
=
remove_children_if
(
is_whitespace
)
# partial(remove_children_if, condition=is_whitespace)
remove_empty
=
remove_children_if
(
is_empty
)
remove_expendables
=
remove_children_if
(
is_expendable
)
# partial(remove_children_if, condition=is_expendable)
@
transformation_factory
(
Callable
)
def
remove_first
(
node
,
condition
=
lambda
node
:
True
):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if
node
.
children
:
if
condition
(
node
.
children
[
0
]):
node
.
result
=
node
.
result
[
1
:]
@
transformation_factory
(
Callable
)
def
remove_last
(
node
,
condition
=
lambda
node
:
True
):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if
node
.
children
:
if
condition
(
node
.
children
[
-
1
]):
node
.
result
=
node
.
result
[:
-
1
]
remove_brackets
=
keep_children
(
slice
(
1
,
-
1
))
#
@transformation_factory(Callable)
#
def remove_first(node, condition=lambda node: True):
#
"""Removes the first child if the condition is met.
#
Otherwise does nothing."""
#
if node.children:
#
if condition(node.children[0]):
#
node.result = node.result[1:]
#
#
#
@transformation_factory(Callable)
#
def remove_last(node, condition=lambda node: True):
#
"""Removes the last child if the condition is met.
#
Otherwise does nothing."""
#
if node.children:
#
if condition(node.children[-1]):
#
node.result = node.result[:-1]
@
transformation_factory
...
...
@@ -802,7 +836,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@
transformation_factory
def
remove_
children
(
node
,
tag_names
:
AbstractSet
[
str
]):
def
remove_
parser
(
node
,
tag_names
:
AbstractSet
[
str
]):
"""Removes children by 'tag name'."""
remove_children_if
(
node
,
partial
(
has_name
,
tag_names
=
tag_names
))
...
...
DHParser/toolkit.py
View file @
e8c626df
...
...
@@ -268,8 +268,8 @@ def load_if_file(text_or_file) -> str:
return
content
except
FileNotFoundError
as
error
:
if
re
.
fullmatch
(
r
'[\w/:. \\]+'
,
text_or_file
):
raise
FileNotFoundError
(
'Not a valid file: '
+
text_or_file
+
'
\n
Add "
\\
n" '
'to distinguish source data from a file name
!
'
)
raise
FileNotFoundError
(
'Not a valid file: '
+
text_or_file
+
'
!
\n
(
Add "
\\
n" '
'to distinguish source data from a file name
.)
'
)
else
:
return
text_or_file
else
:
...
...
examples/LaTeX/LaTeX.ebnf
View file @
e8c626df
...
...
@@ -14,15 +14,15 @@ blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+
paragraph = { !blockcmd (command | block | text)
//~
}+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ config ] block]
command = CMDNAME [[
//~
config ]
//~
block
]
config = "[" cfgtext §"]"
block =
"{" {
text | block } §
"}"
block =
/{/ { command |
text | block } §
/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
...
...
@@ -32,10 +32,10 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\\w+/~
CMDNAME = /\\
(?:(?!_)
\w
)
+/~
NAME = /\w+/~
ESCAPED = /\\[%$&]/
ESCAPED = /\\[%$&
_\/
]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
...
...
examples/LaTeX/grammar_tests/test_paragraph.ini
View file @
e8c626df
...
...
@@ -3,18 +3,28 @@
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
2:
Paragraphs
may
contain
{\em
inline
blocks}
as
well
as
\emph{inline
commands}
and
also
special
\&
characters.
3:
Paragraphs
are
separated
only
by
at
least
one
blank
line.
Therefore,
this
line
still
belongs
to
the
same
paragraph.
[fail:paragraph]
1
:
\begin{enumerate}
2
:
\item
3
:
und
Vieh
; \paragraph
[match:sequence]
1
:
Im
allgemeinen
werden
die
Bewohner
Göttingens
eingeteilt
in
Studenten,
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
1
:
Paragraphs
are
separated
by
gaps.
Like
this
one.
Im
allgemeinen
werden
die
Bewohner
Göttingens
eingeteilt
in
Studenten,
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
2
:
The
second
paragraph
follows
after
a
long
gap.
The
parser
should
accept
this,
too.
examples/LaTeX/recompile_grammar.py
→
examples/LaTeX/recompile_grammar
_obsolote
.py
View file @
e8c626df
File moved
examples/LaTeX/tst_grammar.py
View file @
e8c626df
...
...
@@ -23,6 +23,8 @@ import sys
sys
.
path
.
extend
([
'../../'
,
'../'
,
'./'
])
from
DHParser
import
testing
testing
.
recompile_grammar
(
'LaTeX.ebnf'
)
# recompiles Grammar only if it has changed
from
DHParser
import
toolkit
from
LaTeXCompiler
import
get_grammar
,
get_transformer
...
...
examples/Tutorial/LyrikCompiler_example.py
View file @
e8c626df
...
...
@@ -27,7 +27,7 @@ from DHParser.syntaxtree import Node, traverse, remove_last, remove_first, \
remove_children_if
,
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
remove_expendables
,
remove_tokens
,
flatten
,
is_whitespace
,
is_expendable
,
\
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
TransformationFunc
,
\
remove_
children
,
remove_empty
,
has_content
,
has_name
remove_
parser
,
remove_empty
,
has_content
,
has_name
#######################################################################
...
...
@@ -141,7 +141,7 @@ Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"+"
:
remove_empty
,
"bibliographisches"
:
[
remove_
children
(
'NZ'
),
remove_tokens
],
[
remove_
parser
(
'NZ'
),
remove_tokens
],
"autor"
:
[],
"werk"
:
[],
"untertitel"
:
[],
...
...
@@ -157,9 +157,9 @@ Lyrik_AST_transformation_table = {
"ziel"
:
reduce_single_child
,
"gedicht, strophe, text"
:
[
flatten
,
remove_
children
(
'LEERZEILE'
),
remove_
children
(
'NZ'
)],
[
flatten
,
remove_
parser
(
'LEERZEILE'
),
remove_
parser
(
'NZ'
)],
"titel, serie"
:
[
flatten
,
remove_
children
(
'LEERZEILE'
),
remove_
children
(
'NZ'
),
collapse
],
[
flatten
,
remove_
parser
(
'LEERZEILE'
),
remove_
parser
(
'NZ'
),
collapse
],
"zeile"
:
[],
"vers"
:
collapse
,
...
...
test/test_parsers.py
View file @
e8c626df
...
...
@@ -31,7 +31,7 @@ from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
class
TestInfiLoopsAndRecursion
:
def
test_direct_left_recursion
(
self
):
def
test_direct_left_recursion
1
(
self
):
minilang
=
"""
@ whitespace = linefeed
formula = [ //~ ] expr
...
...
@@ -50,7 +50,7 @@ class TestInfiLoopsAndRecursion:
syntax_tree
.
log
(
"test_LeftRecursion_direct.cst"
)
# self.minilang_parser1.log_parsing_history__("test_LeftRecursion_direct")
def
test_
in
direct_left_recursion
1
(
self
):
def
test_direct_left_recursion
2
(
self
):
minilang
=
"""
@ whitespace = linefeed
formula = [ //~ ] expr
...
...
@@ -64,12 +64,10 @@ class TestInfiLoopsAndRecursion:
parser
=
parser_factory
(
minilang
)()
assert
parser
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
collect_errors
()
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
collect_errors
()
assert
snippet
==
str
(
syntax_tree
)
if
is_logging
():
syntax_tree
.
log
(
"test_LeftRecursion_indirect1.cst"
)
def
test_indirect_left_recursion
2
(
self
):
def
test_indirect_left_recursion
1
(
self
):
minilang
=
"""
Expr = //~ (Product | Sum | Value)
Product = Expr { ('*' | '/') Expr }+
...
...
@@ -80,18 +78,40 @@ class TestInfiLoopsAndRecursion:
assert
parser
snippet
=
"8 * 4"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
collect_errors
()
snippet
=
"7 + 8 * 4"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
print
(
syntax_tree
.
as_sxpr
())
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
collect_errors
()
snippet
=
"9 + 8 * (4 + 3)"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
collect_errors
()
assert
snippet
==
str
(
syntax_tree
)
if
is_logging
():
syntax_tree
.
log
(
"test_LeftRecursion_indirect2.cst"
)
# def test_indirect_left_recursion2(self):
# """This will always fail, because of the precedence rule of the
# "|"-operator. (Note: This is a difference between PEG and
# classical EBNF). DHParser is a PEG-Parser although it uses the
# syntax of classical EBNF."""
# minilang = """
# Expr = //~ (Product | Sum | Value)
# Product = Expr { ('*' | '/') Expr }
# Sum = Expr { ('+' | '-') Expr }
# Value = /[0-9.]+/~ | '(' Expr ')'
# """
# parser = parser_factory(minilang)()
# assert parser
# snippet = "8 * 4"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "7 + 8 * 4"
# syntax_tree = parser(snippet)
# print(syntax_tree.as_sxpr())
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# snippet = "9 + 8 * (4 + 3)"
# syntax_tree = parser(snippet)
# assert not syntax_tree.error_flag, syntax_tree.collect_errors()
# assert snippet == str(syntax_tree)
def
test_inifinite_loops
(
self
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment