Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
73c9aa65
Commit
73c9aa65
authored
Jun 23, 2018
by
eckhart
Browse files
- XML-Parser verbessert
parent
7db02298
Changes
4
Hide whitespace changes
Inline
Side-by-side
DHParser/syntaxtree.py
View file @
73c9aa65
...
...
@@ -523,7 +523,8 @@ class Node(collections.abc.Sized):
tail
=
tail
.
lstrip
()
usetab
,
sep
=
''
,
''
else
:
usetab
,
sep
=
tab
,
'
\n
'
usetab
=
tab
if
head
else
''
# no indentation if tag is already omitted
sep
=
'
\n
'
if
self
.
children
:
content
=
[]
...
...
@@ -537,6 +538,7 @@ class Node(collections.abc.Sized):
res
=
cast
(
str
,
self
.
result
)
# safe, because if there are no children, result is a string
if
not
inline
and
not
head
:
# strip whitespace for omitted non inline node, e.g. CharData in mixed elements
res
=
res
.
strip
()
if
density
&
1
and
res
.
find
(
'
\n
'
)
<
0
:
# and head[0] == "<":
# except for XML, add a gap between opening statement and content
...
...
@@ -635,7 +637,7 @@ class Node(collections.abc.Sized):
if
node
.
tag_name
in
empty_tags
:
assert
not
node
.
result
,
(
"Node %s with content %s is not an empty element!"
%
(
node
.
tag_name
,
str
(
node
)))
ending
=
"/>
\n
"
ending
=
"/>
\n
"
if
not
node
.
tag_name
[
0
]
==
'?'
else
"?>
\n
"
else
:
ending
=
">
\n
"
return
""
.
join
(
txt
+
[
ending
])
...
...
examples/XML/XMLCompiler.py
View file @
73c9aa65
...
...
@@ -527,7 +527,7 @@ def get_transformer() -> TransformationFunc:
XML_AST_transformation_table
=
{
# AST Transformations for the XML-grammar
"+"
:
[
remove_empty
,
remove_anonymous_tokens
,
remove_whitespace
,
remove_nodes
(
"S"
)],
"document"
:
[],
"document"
:
[
flatten
(
lambda
context
:
context
[
-
1
].
tag_name
==
'prolog'
,
recursive
=
False
)
],
"prolog"
:
[],
"XMLDecl"
:
[],
"VersionInfo"
:
[
reduce_single_child
],
...
...
@@ -675,16 +675,12 @@ class XMLCompiler(Compiler):
def
__init__
(
self
,
grammar_name
=
"XML"
,
grammar_source
=
""
):
super
(
XMLCompiler
,
self
).
__init__
(
grammar_name
,
grammar_source
)
assert
re
.
match
(
'\w+\Z'
,
grammar_name
)
self
.
cleanup_whitespace
=
True
# remove empty CharData from mixed elements
def
_reset
(
self
):
super
().
_reset
()
self
.
mock_parsers
=
dict
()
def
on_document
(
self
,
node
):
self
.
tree
.
omit_tags
.
add
(
'CharData'
)
self
.
tree
.
inline_tags
.
update
({
'to'
,
'from'
,
'heading'
,
'body'
,
'remark'
})
return
self
.
fallback_compiler
(
node
)
def
extract_attributes
(
self
,
node_sequence
):
attributes
=
OrderedDict
()
for
node
in
node_sequence
:
...
...
@@ -698,11 +694,42 @@ class XMLCompiler(Compiler):
"""Returns a mock parser with the given tag_name as parser name."""
return
self
.
mock_parsers
.
setdefault
(
tag_name
,
MockParser
(
tag_name
))
def
validity_constraint
(
self
,
node
,
condition
,
err_msg
):
"""If `condition` is False an error is issued."""
if
not
condition
:
self
.
tree
.
add_error
(
node
,
err_msg
)
def
value_constraint
(
self
,
node
,
value
,
allowed
):
"""If value is not in allowed, an error is issued."""
self
.
constraint
(
node
,
value
in
allowed
,
'Invalid value "%s" for "standalone"! Must be one of %s.'
%
(
value
,
str
(
allowed
)))
def
on_document
(
self
,
node
):
self
.
tree
.
omit_tags
.
update
({
'CharData'
,
'document'
})
# TODO: Remove the following line. It is specific for testing with example.xml!
self
.
tree
.
inline_tags
.
update
({
'to'
,
'from'
,
'heading'
,
'body'
,
'remark'
})
return
self
.
fallback_compiler
(
node
)
# def on_prolog(self, node):
# return node
# def on_XMLDecl(self, node):
# return node
def
on_XMLDecl
(
self
,
node
):
attributes
=
dict
()
for
child
in
node
.
children
:
s
=
child
.
content
if
child
.
tag_name
==
"VersionInfo"
:
attributes
[
'version'
]
=
s
elif
child
.
tag_name
==
"EncodingDecl"
:
attributes
[
'encoding'
]
=
s
elif
child
.
tag_name
==
"SDDecl"
:
attributes
[
'standalone'
]
=
s
self
.
value_constraint
(
node
,
s
,
{
'yes'
,
'no'
})
if
attributes
:
node
.
attributes
.
update
(
attributes
)
node
.
result
=
''
self
.
tree
.
empty_tags
.
add
(
'?xml'
)
node
.
parser
=
self
.
get_parser
(
'?xml'
)
return
node
# def on_VersionInfo(self, node):
# return node
...
...
@@ -874,11 +901,23 @@ class XMLCompiler(Compiler):
def
on_element
(
self
,
node
):
stag
=
node
[
'STag'
]
tag_name
=
stag
[
'Name'
].
content
attributes
=
self
.
extract_attributes
(
stag
.
children
)
preserve_whitespace
=
tag_name
in
self
.
tree
.
inline_tags
if
attributes
:
node
.
attributes
.
update
(
attributes
)
node
.
parser
=
self
.
get_parser
(
stag
[
'Name'
].
content
)
node
.
result
=
self
.
compile_children
(
node
.
get
(
'content'
,
ZOMBIE_NODE
))
preserve_whitespace
|=
attributes
.
get
(
'xml:space'
,
''
)
==
'preserve'
node
.
parser
=
self
.
get_parser
(
tag_name
)
content
=
self
.
compile_children
(
node
.
get
(
'content'
,
ZOMBIE_NODE
))
if
len
(
content
)
==
1
:
if
content
[
0
].
tag_name
==
"CharData"
:
# reduce single CharData children
content
=
content
[
0
].
content
elif
self
.
cleanup_whitespace
and
not
preserve_whitespace
:
# remove CharData that consists only of whitespace from mixed elements
content
=
tuple
(
child
for
child
in
content
if
child
.
tag_name
!=
"CharData"
or
child
.
content
.
strip
()
!=
''
)
node
.
result
=
content
return
node
# def on_STag(self, node):
...
...
@@ -1050,6 +1089,7 @@ if __name__ == "__main__":
print
(
rel_path
+
':'
+
str
(
error
))
sys
.
exit
(
1
)
else
:
print
(
result
.
as_sxpr
(
compact
=
True
))
print
(
result
.
customized_XML
()
if
isinstance
(
result
,
Node
)
else
result
)
else
:
print
(
"Usage: XMLCompiler.py [FILENAME]"
)
examples/XML/example.xml
View file @
73c9aa65
...
...
@@ -3,7 +3,7 @@
<to>
Tove
</to>
<from>
Jani
</from>
<heading>
Reminder
</heading>
<body>
Don't forget me this weekend!
</body>
<body>
Don't forget me this weekend!
</body>
<priority
level=
"high"
/>
<remark></remark>
</note>
\ No newline at end of file
test/test_syntaxtree.py
View file @
73c9aa65
...
...
@@ -23,7 +23,6 @@ import copy
import
sys
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.error
import
Error
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_sxpr
,
parse_xml
,
flatten_sxpr
,
flatten_xml
,
TOKEN_PTYPE
from
DHParser.transform
import
traverse
,
reduce_single_child
,
\
replace_by_single_child
,
flatten
,
remove_expendables
...
...
@@ -166,37 +165,6 @@ class TestRootNode:
assert
error_str
.
find
(
"A"
)
<
error_str
.
find
(
"B"
)
# class TestErrorHandling:
# def test_error_flag_propagation(self):
# tree = parse_sxpr('(a (b c) (d (e (f (g h)))))')
#
# def find_h(context):
# node = context[-1]
# if node.result == "h":
# node.new_error("an error deep inside the syntax tree")
#
# assert not tree.error_flag
# traverse(tree, {"*": find_h})
# assert tree.error_flag, tree.as_sxpr()
#
# def test_collect_errors(self):
# tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
# A = tree
# B = next(tree.select(lambda node: str(node) == "1"))
# D = next(tree.select(lambda node: node.parser.name == "D"))
# F = next(tree.select(lambda node: str(node) == "3"))
# B.new_error("Error in child node")
# F.new_error("Error in child's child node")
# tree.error_flag = Error.ERROR
# errors = tree.collect_errors()
# assert len(errors) == 2, str(errors)
# assert A.error_flag
# assert D.error_flag
# errors = tree.collect_errors(clear_errors=True)
# assert len(errors) == 2
# assert not D.error_flag
class
TestNodeFind
():
"""Test the select-functions of class Node.
"""
...
...
@@ -268,6 +236,14 @@ class TestSerialization:
assert
s
==
'(A
\n
(B
\n
(C
\n
"D"
\n
"X"
\n
)'
\
'
\n
(E
\n
"F"
\n
)
\n
)
\n
(G
\n
" H "
\n
" Y "
\n
)
\n
)'
,
s
def
test_compact_representation
(
self
):
tree
=
parse_sxpr
(
'(A (B (C "D") (E "F")) (G "H"))'
)
compact
=
tree
.
as_sxpr
(
compact
=
True
)
assert
compact
==
'A
\n
B
\n
C "D"
\n
E "F"
\n
G "H"'
,
compact
tree
=
parse_sxpr
(
'(A (B (C "D
\n
X") (E "F")) (G " H
\n
Y "))'
)
compact
=
tree
.
as_sxpr
(
compact
=
True
)
assert
compact
==
'A
\n
B
\n
C
\n
"D"
\n
"X"
\n
E "F"'
\
'
\n
G
\n
" H "
\n
" Y "'
,
compact
def
test_xml_inlining
(
self
):
tree
=
parse_sxpr
(
'(A (B "C") (D "E"))'
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment