Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
cd4aab6b
Commit
cd4aab6b
authored
Apr 09, 2019
by
di68kap
Browse files
- transform.py: move_adjacent can now be configured not to merge nodes after moving
parent
2afa6b3b
Changes
3
Hide whitespace changes
Inline
Side-by-side
DHParser/transform.py
View file @
cd4aab6b
...
...
@@ -58,6 +58,7 @@ __all__ = ('TransformationDict',
'replace_content'
,
'replace_content_by'
,
'normalize_whitespace'
,
'merge_adjacent'
,
'move_adjacent'
,
'left_associative'
,
'lean_left'
,
...
...
@@ -411,7 +412,7 @@ def is_insignificant_whitespace(context: List[Node]) -> bool:
return
context
[
-
1
].
tag_name
==
WHITESPACE_PTYPE
RX_WHITESPACE
=
re
.
compile
(
r
'\s
+
'
)
RX_WHITESPACE
=
re
.
compile
(
r
'\s
*$
'
)
def
contains_only_whitespace
(
context
:
List
[
Node
])
->
bool
:
...
...
@@ -419,8 +420,7 @@ def contains_only_whitespace(context: List[Node]) -> bool:
of the tag_name, i.e. nodes the content of which matches the regular
expression /\s*/, including empty nodes. Note, that this is not true
for anonymous whitespace nodes that contain comments."""
content
=
context
[
-
1
].
content
return
bool
(
not
content
or
RX_WHITESPACE
.
match
(
content
))
return
bool
(
RX_WHITESPACE
.
match
(
context
[
-
1
].
content
))
def
is_any_kind_of_whitespace
(
context
:
List
[
Node
])
->
bool
:
...
...
@@ -820,9 +820,11 @@ def normalize_whitespace(context):
node
.
result
=
re
.
sub
(
r
'\s+'
,
' '
,
node
.
result
)
def
merge_
whitesp
ace
(
context
):
def
merge_
adj
ace
nt
(
context
,
condition
:
Callable
):
"""
Merges adjacent whitespace. UNTESTED!
Merges adjacent nodes that fulfill the given `condition`. It is
is assumed that `condition` is never true for leaf-nodes and non-leaf-nodes
alike. Otherwise a type-error might ensue.
"""
node
=
context
[
-
1
]
children
=
node
.
children
...
...
@@ -830,26 +832,42 @@ def merge_whitespace(context):
i
=
0
L
=
len
(
children
)
while
i
<
L
:
if
children
[
i
].
tag_name
==
WHITESPACE_PTYPE
:
if
condition
([
children
[
i
]]):
initial
=
()
if
children
[
i
].
children
else
''
k
=
i
while
i
<
L
and
c
hildren
[
k
].
tag_name
==
WHITESPACE_PTYPE
:
while
i
<
L
and
c
ondition
([
children
[
k
]])
:
i
+=
1
if
i
>
k
:
children
[
k
].
result
=
sum
(
children
[
n
].
result
for
n
in
range
(
k
,
i
+
1
))
children
[
k
].
result
=
sum
(
(
children
[
n
].
result
for
n
in
range
(
k
,
i
+
1
))
,
initial
)
new_result
.
append
(
children
[
k
])
i
+=
1
node
.
result
=
tuple
(
new_result
)
@
transformation_factory
(
collections
.
abc
.
Callable
)
def
move_adjacent
(
context
:
List
[
Node
],
condition
:
Callable
=
is_insignificant_whitespac
e
):
def
move_adjacent
(
context
:
List
[
Node
],
condition
:
Callable
,
merge
:
bool
=
Tru
e
):
"""
Moves adjacent nodes that fulfill the given condition to the parent node.
If the `merge`-flag is set, a moved node will be merged with its
predecessor (or successor, respectively) in the parent node in case it
also fulfill the given `condition`.
"""
def
join_results
(
a
:
Node
,
b
:
Node
,
c
:
Node
)
->
bool
:
"""Joins the results of node `a` and `b` and write them to the result
of `c` type-safely, if possible. Return True, if join was possible
and done, False otherwise."""
def
merge_results
(
a
:
Node
,
b
:
Node
,
c
:
Node
)
->
bool
:
"""
Merges the results of node `a` and `b` and writes them to the result
of `c` type-safely, if b and c are either both leaf-nodes (in which case
their result-strings are concatenated) or both non-leaf-nodes (in which
case the tuples of children are concatenated).
Returns `True` in case of a successful merge, `False` if only one node
was a leaf node and the merge could thus not be done.
Example:
>>> head, tail = Node('head', '123'), Node('tail', '456')
>>> merge_results(head, tail, head) # merge head and tail (in that order) into head
True
>>> str(head)
'123456'
"""
if
a
.
children
and
b
.
children
:
c
.
result
=
cast
(
Tuple
[
Node
,
...],
a
.
result
)
+
cast
(
Tuple
[
Node
,
...],
b
.
result
)
return
True
...
...
@@ -858,7 +876,6 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh
return
True
return
False
node
=
context
[
-
1
]
if
len
(
context
)
<=
1
or
not
node
.
children
:
return
...
...
@@ -881,19 +898,20 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh
if
id
(
child
)
==
id
(
node
):
break
# merge adjacent whitespace
prevN
=
parent
.
children
[
i
-
1
]
if
i
>
0
else
None
nextN
=
parent
.
children
[
i
+
1
]
if
i
<
len
(
parent
.
children
)
-
1
else
None
if
before
and
prevN
and
condition
([
prevN
]):
# prevN.result = prevN.result + before[0].result
# before = ()
if
join_results
(
prevN
,
before
[
0
],
prevN
):
before
=
()
if
after
and
nextN
and
condition
([
nextN
]):
# nextN.result = after[0].result + nextN.result
# after = ()
if
join_results
(
after
[
0
],
nextN
,
nextN
):
after
=
()
# merge adjacent nodes that fulfil the condition
if
merge
:
prevN
=
parent
.
children
[
i
-
1
]
if
i
>
0
else
None
nextN
=
parent
.
children
[
i
+
1
]
if
i
<
len
(
parent
.
children
)
-
1
else
None
if
before
and
prevN
and
condition
([
prevN
]):
# prevN.result = prevN.result + before[0].result
# before = ()
if
merge_results
(
prevN
,
before
[
0
],
prevN
):
before
=
()
if
after
and
nextN
and
condition
([
nextN
]):
# nextN.result = after[0].result + nextN.result
# after = ()
if
merge_results
(
after
[
0
],
nextN
,
nextN
):
after
=
()
parent
.
result
=
parent
.
children
[:
i
]
+
before
+
(
node
,)
+
after
+
parent
.
children
[
i
+
1
:]
...
...
test/test_dhparser.py
View file @
cd4aab6b
...
...
@@ -46,7 +46,7 @@ class TestDHParserCommandLineTool:
os
.
chdir
(
self
.
cwd
)
def
test_dhparser
(
self
):
os
.
system
(
self
.
python
+
'../scripts/dhparser.py testdata/neu '
+
self
.
nulldevice
)
os
.
system
(
self
.
python
+
'../
DHParser/
scripts/dhparser.py testdata/neu '
+
self
.
nulldevice
)
os
.
system
(
self
.
python
+
'testdata/neu/tst_neu_grammar.py '
+
self
.
nulldevice
)
os
.
system
(
self
.
python
+
'testdata/neu/neuCompiler.py testdata/neu/example.dsl '
'>testdata/neu/example.xml'
)
...
...
test/test_transform.py
View file @
cd4aab6b
...
...
@@ -28,7 +28,7 @@ from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, PLACE
tree_sanity_check
,
TOKEN_PTYPE
from
DHParser.transform
import
traverse
,
reduce_single_child
,
remove_whitespace
,
move_adjacent
,
\
traverse_locally
,
collapse
,
collapse_if
,
lstrip
,
rstrip
,
remove_content
,
remove_tokens
,
\
transformation_factory
,
has_parent
,
contains_only_whitespace
transformation_factory
,
has_parent
,
contains_only_whitespace
,
is_insignificant_whitespace
from
DHParser.toolkit
import
typing
from
typing
import
AbstractSet
,
List
,
Sequence
,
Tuple
...
...
@@ -41,7 +41,7 @@ class TestRemoval:
assert
contains_only_whitespace
([
Node
(
'test'
,
''
)])
assert
contains_only_whitespace
([
Node
(
'test'
,
'
\n
'
)])
assert
not
contains_only_whitespace
([
Node
(
'test'
,
'Katze'
)])
assert
contains_only_whitespace
([
Node
(
'test'
,
' tag '
)])
assert
not
contains_only_whitespace
([
Node
(
'test'
,
' tag '
)])
def
test_lstrip
(
self
):
cst
=
parse_sxpr
(
'(_Token (:Whitespace " ") (:Re test))'
)
...
...
@@ -255,7 +255,7 @@ class TestWhitespaceTransformations:
'(WORD (LETTERS "not") (:Whitespace " ")) '
'(WORD (LETTERS "to") (:Whitespace " "))'
'(WORD (LETTERS "be") (:Whitespace " ")))'
)
transformations
=
{
'WORD'
:
move_adjacent
}
transformations
=
{
'WORD'
:
move_adjacent
(
is_insignificant_whitespace
)
}
traverse
(
sentence
,
transformations
)
assert
tree_sanity_check
(
sentence
)
assert
all
(
i
%
2
==
0
or
node
.
tag_name
==
':Whitespace'
for
i
,
node
in
enumerate
(
sentence
))
...
...
@@ -267,7 +267,7 @@ class TestWhitespaceTransformations:
'(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "to") (:Whitespace " "))'
'(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))'
)
transformations
=
{
'WORD'
:
move_adjacent
}
transformations
=
{
'WORD'
:
move_adjacent
(
is_insignificant_whitespace
)
}
traverse
(
sentence
,
transformations
)
assert
tree_sanity_check
(
sentence
)
assert
all
(
i
%
2
==
0
or
node
.
tag_name
==
':Whitespace'
for
i
,
node
in
enumerate
(
sentence
))
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment