Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
adbbd2e6
Commit
adbbd2e6
authored
Mar 04, 2020
by
di68kap
Browse files
transform.py: has_attr() and attr_equals() added
parent
f9c6395f
Changes
7
Hide whitespace changes
Inline
Side-by-side
DHParser/parse.py
View file @
adbbd2e6
...
...
@@ -407,7 +407,7 @@ class Parser:
# apply reentry-rule or catch error at root-parser
if
i
<
0
:
i
=
0
try
:
zombie
=
pe
.
node
[
ZOMBIE_TAG
]
# type: Optional[Node]
zombie
=
pe
.
node
.
pick_child
(
ZOMBIE_TAG
)
# type: Optional[Node]
except
(
KeyError
,
ValueError
):
zombie
=
None
if
zombie
and
not
zombie
.
result
:
...
...
DHParser/syntaxtree.py
View file @
adbbd2e6
...
...
@@ -612,9 +612,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# tree traversal and node selection #######################################
def
__getitem__
(
self
,
key
:
Union
[
CriteriaType
,
int
])
->
Union
[
'Node'
,
List
[
'Node'
]]:
def
__getitem__
(
self
,
key
:
Union
[
CriteriaType
,
int
])
->
Union
[
'Node'
,
Sequence
[
'Node'
]]:
"""
Returns the child node with the given index if ``
index_or_tagname
`` is
Returns the child node with the given index if ``
key
`` is
an integer or all child-nodes with the given tag name. Examples::
>>> tree = parse_sxpr('(a (b "X") (X (c "d")) (e (X "F")))')
...
...
@@ -627,7 +627,10 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
key(str): A criterion (tag name(s), match function, node) or
an index of the child that shall be returned.
Returns:
Node: All nodes which have a given tag name.
Node: The node with the given index (always type Node),
all nodes which have a given tag name (type Node if there
exists only one or type Tuple[Node] if there are more than
one).
Raises:
KeyError: if no matching child was found.
IndexError: if key was an integer index that did not exist
...
...
@@ -637,9 +640,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return
self
.
children
[
key
]
else
:
mf
=
create_match_function
(
key
)
for
child
in
self
.
children
:
if
mf
(
child
)
:
return
child
items
=
tuple
(
child
for
child
in
self
.
children
if
mf
(
child
))
if
items
:
return
items
if
len
(
items
)
>=
2
else
items
[
0
]
raise
IndexError
(
'index out of range'
)
if
isinstance
(
key
,
int
)
\
else
KeyError
(
str
(
key
))
...
...
@@ -658,16 +661,23 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
mf
=
create_match_function
(
key
)
self
.
result
=
tuple
(
child
for
child
in
self
.
children
if
not
mf
(
child
))
def
get
(
self
,
index_or_tagname
:
Union
[
CriteriaType
,
int
],
surrogate
:
Union
[
'Node'
,
Iterator
[
'Node'
]])
->
Union
[
'Node'
,
Iterator
[
'Node'
]]:
"""Returns the child node with the given index if ``
index_or_tagname
``
def
get
(
self
,
key
:
Union
[
CriteriaType
,
int
],
surrogate
:
Union
[
'Node'
,
Sequence
[
'Node'
]])
->
Union
[
'Node'
,
Sequence
[
'Node'
]]:
"""Returns the child node with the given index if ``
key
``
is an integer or the first child node with the given tag name. If no
child with the given index or tag_name exists, the ``surrogate`` is
returned instead. This mimics the behaviour of Python's dictionary's
get-method.
The type of the return value is always the same type as that of the
surrogate. If the surrogate is a Node, but there are several items
matching key, then the first of these will be returned.
"""
try
:
return
self
[
index_or_tagname
]
items
=
self
[
key
]
if
isinstance
(
surrogate
,
Sequence
):
return
items
if
isinstance
(
items
,
Sequence
)
else
(
items
,)
else
:
return
items
[
0
]
if
isinstance
(
items
,
Sequence
)
else
items
except
KeyError
:
return
surrogate
...
...
DHParser/testing.py
View file @
adbbd2e6
...
...
@@ -361,7 +361,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
for
parent
in
syntax_tree
.
select_if
(
lambda
node
:
any
(
child
.
tag_name
==
ZOMBIE_TAG
for
child
in
node
.
children
),
include_root
=
True
,
reverse
=
True
):
zombie
=
parent
[
ZOMBIE_TAG
]
zombie
=
parent
.
pick_child
(
ZOMBIE_TAG
)
zombie
.
tag_name
=
'__TESTING_ARTIFACT__'
zombie
.
result
=
'Artifact can be ignored. Be aware, though, that also the '
\
'tree structure may not be the same as in a non-testing '
\
...
...
DHParser/toolkit.py
View file @
adbbd2e6
...
...
@@ -71,6 +71,9 @@ __all__ = ('typing',
'abbreviate_middle'
,
'escape_formatstr'
,
'as_identifier'
,
'as_list'
,
'first'
,
'last'
,
'linebreaks'
,
'line_col'
,
'text_pos'
,
...
...
@@ -307,6 +310,30 @@ def as_identifier(s: str, replacement: str = "_") -> str:
return
''
.
join
(
ident
)
def
as_list
(
item_or_sequence
)
->
List
[
Any
]:
"""Turns an arbitrary sequence or a single item into a list. In case of
a single item, the list contains this element as its sole item."""
if
isinstance
(
item_or_sequence
,
Iterable
):
return
list
(
item_or_sequence
)
return
[
item_or_sequence
]
def
first
(
item_or_sequence
:
Union
[
Sequence
,
Any
])
->
Any
:
"""Returns an item or a the first item of a sequence of items."""
if
isinstance
(
item_or_sequence
,
Sequence
):
return
item_or_sequence
[
0
]
else
:
return
item_or_sequence
def
last
(
item_or_sequence
:
Union
[
Sequence
,
Any
])
->
Any
:
"""Returns an item or a the first item of a sequence of items."""
if
isinstance
(
item_or_sequence
,
Sequence
):
return
item_or_sequence
[
-
1
]
else
:
return
item_or_sequence
#######################################################################
#
# type system support
...
...
DHParser/transform.py
View file @
adbbd2e6
...
...
@@ -575,17 +575,17 @@ def has_content(context: List[Node], regexp: str) -> bool:
return
bool
(
re
.
match
(
regexp
,
context
[
-
1
].
content
))
# TODO: rename has_ancestor
@
transformation_factory
(
collections
.
abc
.
Set
)
def
has_parent
(
context
:
List
[
Node
],
tag_name_set
:
AbstractSet
[
str
],
start_level
:
int
=
2
)
->
bool
:
def
has_parent
(
context
:
List
[
Node
],
tag_name_set
:
AbstractSet
[
str
],
ancestry
:
int
=
1
)
->
bool
:
"""
Checks whether a node with one of the given tag names appears somewhere
in the context before the last node in the context.
:param start_level: "nearest" ancestor considered: 2 menans parent level,
3 grand-parents. To include the node itself, use 1.
:param ancestry: determines how deep `has_parent` should dive into
the ancestry. "1" means only the immediate parents wil be considered,
"2" means also the grandparents, ans so on.
"""
assert
start_level
>
0
for
i
in
range
(
start_level
,
len
(
context
)
+
1
):
assert
ancestry
>
0
for
i
in
range
(
2
,
max
(
ancestry
+
2
,
len
(
context
)
+
1
)
)
:
if
context
[
-
i
].
tag_name
in
tag_name_set
:
return
True
return
False
...
...
test/test_syntaxtree.py
View file @
adbbd2e6
...
...
@@ -249,7 +249,7 @@ class TestNode:
tree
=
parse_sxpr
(
'(A (B 1) (C 1) (B 2))'
)
assert
'B'
in
tree
assert
'X'
not
in
tree
assert
tree
[
'B'
]
.
equals
(
Node
(
'B'
,
'1'
))
assert
tree
.
pick_child
(
'B'
)
.
equals
(
Node
(
'B'
,
'1'
))
item_w_value_2
=
lambda
nd
:
nd
.
content
==
'2'
assert
item_w_value_2
in
tree
item_w_value_4
=
lambda
nd
:
nd
.
content
==
'4'
...
...
@@ -534,7 +534,7 @@ class TestSerialization:
all_tags
=
{
'XML'
,
'T'
,
'L'
}
assert
tree
.
as_xml
(
inline_tags
=
all_tags
,
omit_tags
=
all_tags
)
==
"Hallo Welt!"
# tags with attributes will never be ommitted
tree
[
'T'
]
.
attr
[
'class'
]
=
"kursiv"
tree
.
pick_child
(
'T'
)
.
attr
[
'class'
]
=
"kursiv"
assert
tree
.
as_xml
(
inline_tags
=
all_tags
,
omit_tags
=
all_tags
)
==
\
'<T class="kursiv">Hallo</T> Welt!'
...
...
test/test_transform.py
View file @
adbbd2e6
...
...
@@ -171,7 +171,7 @@ class TestConditionalTransformations:
context
=
[
Node
(
'A'
,
'alpha'
),
Node
(
'B'
,
'beta'
),
Node
(
'C'
,
'gamma'
)]
assert
has_parent
(
context
,
{
'A'
})
assert
has_parent
(
context
,
{
'A'
}
,
2
)
assert
has_parent
(
context
,
{
'B'
})
assert
not
has_parent
(
context
,
{
'C'
})
...
...
@@ -292,7 +292,7 @@ class TestWhitespaceTransformations:
transformations
=
{
'SENTENCE'
:
merge_adjacent
(
is_one_of
(
'TEXT'
,
'L'
),
'TEXT'
)}
traverse
(
sentence
,
transformations
)
assert
tree_sanity_check
(
sentence
)
assert
sentence
[
'TEXT'
]
.
result
==
"Guten Tag"
assert
sentence
.
pick_child
(
'TEXT'
)
.
result
==
"Guten Tag"
assert
sentence
[
2
].
result
==
"Hallo Welt"
assert
sentence
[
-
1
].
tag_name
==
'L'
assert
'T'
in
sentence
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment