Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
9f1872d8
Commit
9f1872d8
authored
Apr 22, 2018
by
eckhart
Browse files
- syntaxtree: added flatten_xml(); parse_xml still buggy!
parent
1c42afde
Changes
2
Hide whitespace changes
Inline
Side-by-side
DHParser/syntaxtree.py
View file @
9f1872d8
...
...
@@ -44,7 +44,9 @@ __all__ = ('ParserBase',
'Node'
,
'RootNode'
,
'parse_sxpr'
,
'flatten_sxpr'
)
'parse_xml'
,
'flatten_sxpr'
,
'flatten_xml'
)
#######################################################################
...
...
@@ -182,6 +184,13 @@ def flatten_sxpr(sxpr: str) -> str:
return
re
.
sub
(
r
'\s(?=\))'
,
''
,
re
.
sub
(
r
'\s+'
,
' '
,
sxpr
)).
strip
()
def
flatten_xml
(
xml
:
str
)
->
str
:
"""Returns an XML-tree as a one linter without unnecessary whitespace,
i.e. only whitespace within leaf-nodes is preserved.
"""
return
re
.
sub
(
r
'\s+(?=<\w)'
,
''
,
re
.
sub
(
r
'(?<=</\w+>)\s+'
,
''
,
xml
))
class
Node
(
collections
.
abc
.
Sized
):
"""
Represents a node in the concrete or abstract syntax tree.
...
...
@@ -869,7 +878,7 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml
=
StringView
(
xml
)
PlainText
=
MockParser
(
''
,
'PlainText'
)
PlainText
=
MockParser
(
''
,
'
:
PlainText'
)
mock_parsers
=
{
':PlainText'
:
PlainText
}
def
parse_attributes
(
s
:
StringView
)
->
Tuple
[
StringView
,
OrderedDict
]:
...
...
@@ -894,10 +903,10 @@ def parse_xml(xml: str) -> Node:
s
,
attributes
=
parse_attributes
(
s
[
match
.
end
()
-
s
.
begin
:])
i
=
s
.
find
(
'>'
)
assert
i
>=
0
return
s
[
i
+
1
,
],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
return
s
[
i
+
1
:
],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
def
parse_closing_tag
(
s
:
StringView
)
->
Tuple
[
StringView
,
str
]:
"""Parses a closing tag returns the string segment, just after
"""Parses a closing tag
and
returns the string segment, just after
the closing tag."""
match
=
s
.
match
(
re
.
compile
(
r
'</\s*(?P<tagname>[\w:]+)>'
))
assert
match
...
...
@@ -935,7 +944,9 @@ def parse_xml(xml: str) -> Node:
result
=
tuple
(
result
)
return
Node
(
mock_parsers
.
setdefault
(
tagname
,
MockParser
(
name
,
":"
+
class_name
)),
result
)
return
parse_full_content
(
xml
[
xml
.
search
(
re
.
compile
(
r
'<(?!\?)'
)):])
match_header
=
xml
.
search
(
re
.
compile
(
r
'<(?!\?)'
))
start
=
match_header
.
start
()
if
match_header
else
0
return
parse_full_content
(
xml
[
start
:])
# if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
...
...
test/test_syntaxtree.py
View file @
9f1872d8
...
...
@@ -24,15 +24,15 @@ import sys
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.error
import
Error
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_sxpr
,
flatten_sxpr
,
TOKEN_PTYPE
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_sxpr
,
parse_xml
,
flatten_sxpr
,
flatten_xml
,
TOKEN_PTYPE
from
DHParser.transform
import
traverse
,
reduce_single_child
,
\
replace_by_single_child
,
flatten
,
remove_expendables
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
class
Test
MockSyntaxTree
:
def
test_
mock_syntax_tree
(
self
):
class
Test
ParseSxpression
:
def
test_
parse_s_expression
(
self
):
tree
=
parse_sxpr
(
'(a (b c))'
)
assert
flatten_sxpr
(
tree
.
as_sxpr
())
==
'(a (b "c"))'
,
flatten_sxpr
(
tree
.
as_sxpr
())
tree
=
parse_sxpr
(
'(a i
\n
j
\n
k)'
)
...
...
@@ -44,6 +44,14 @@ class TestMockSyntaxTree:
except
ValueError
:
pass
class
TestParseXML
:
def
test_roundtrip
(
self
):
tree
=
parse_sxpr
(
'(a (b c) (d (e f) (h i)))'
)
xml
=
tree
.
as_xml
()
fxml
=
flatten_xml
(
xml
)
assert
fxml
==
'<a><b>c</b><d><e>f</e><h>i</h></d></a>'
tree2
=
parse_xml
(
fxml
)
print
(
tree2
.
as_sxpr
())
class
TestNode
:
"""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment