Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
d8cc42fb
Commit
d8cc42fb
authored
Apr 24, 2018
by
di68kap
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- syntaxtree.py: fixed "parse_xml()"
parent
9f1872d8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
9 deletions
+27
-9
DHParser/parse.py
DHParser/parse.py
+2
-1
DHParser/stringview.py
DHParser/stringview.py
+1
-0
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+15
-7
test/test_syntaxtree.py
test/test_syntaxtree.py
+9
-1
No files found.
DHParser/parse.py
View file @
d8cc42fb
...
...
@@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord
from
DHParser.preprocess
import
BEGIN_TOKEN
,
END_TOKEN
,
RX_TOKEN_NAME
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
RootNode
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
PLAINTEXT_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
from
typing
import
Callable
,
cast
,
Dict
,
DefaultDict
,
List
,
Set
,
Tuple
,
Union
,
Optional
...
...
@@ -894,6 +894,7 @@ class PlainText(Parser):
>>> Grammar(while_token)("while").content
'while'
"""
assert
PLAINTEXT_PTYPE
==
":PlainText"
def
__init__
(
self
,
text
:
str
,
name
:
str
=
''
)
->
None
:
super
().
__init__
(
name
)
...
...
DHParser/stringview.py
View file @
d8cc42fb
...
...
@@ -118,6 +118,7 @@ class StringView(collections.abc.Sized):
return
self
.
fullstring
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
# return self.text[self.begin:self.end] # use this for debugging!
self
.
fullstring
=
self
.
text
[
self
.
begin
:
self
.
end
]
return
self
.
fullstring
...
...
DHParser/syntaxtree.py
View file @
d8cc42fb
...
...
@@ -36,6 +36,7 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl
__all__
=
(
'ParserBase'
,
'WHITESPACE_PTYPE'
,
'PLAINTEXT_PTYPE'
,
'TOKEN_PTYPE'
,
'MockParser'
,
'ZombieParser'
,
...
...
@@ -109,6 +110,7 @@ class ParserBase:
WHITESPACE_PTYPE
=
':Whitespace'
PLAINTEXT_PTYPE
=
':PlainText'
TOKEN_PTYPE
=
':Token'
...
...
@@ -873,13 +875,16 @@ def parse_sxpr(sxpr: str) -> Node:
return
inner_parser
(
sxpr
)
RX_WHITESPACE_TAIL
=
re
.
compile
(
r
'\s*$'
)
def
parse_xml
(
xml
:
str
)
->
Node
:
"""
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml
=
StringView
(
xml
)
PlainText
=
MockParser
(
''
,
':PlainText'
)
mock_parsers
=
{
':PlainText'
:
PlainText
}
PlainText
=
MockParser
(
''
,
PLAINTEXT_PTYPE
)
mock_parsers
=
{
PLAINTEXT_PTYPE
:
PlainText
}
def
parse_attributes
(
s
:
StringView
)
->
Tuple
[
StringView
,
OrderedDict
]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
...
...
@@ -900,7 +905,8 @@ def parse_xml(xml: str) -> Node:
match
=
s
.
match
(
re
.
compile
(
r
'<\s*(?P<tagname>[\w:]+)\s*'
))
assert
match
tagname
=
match
.
groupdict
()[
'tagname'
]
s
,
attributes
=
parse_attributes
(
s
[
match
.
end
()
-
s
.
begin
:])
section
=
s
[
match
.
end
()
-
s
.
begin
:]
s
,
attributes
=
parse_attributes
(
section
)
i
=
s
.
find
(
'>'
)
assert
i
>=
0
return
s
[
i
+
1
:],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
...
...
@@ -931,22 +937,24 @@ def parse_xml(xml: str) -> Node:
if
not
solitary
:
while
s
and
not
s
[:
2
]
==
"</"
:
s
,
leaf
=
parse_leaf_content
(
s
)
if
not
s
.
match
(
re
.
compile
(
"\s*$"
)
):
if
not
leaf
.
match
(
RX_WHITESPACE_TAIL
):
result
.
append
(
Node
(
PlainText
,
leaf
))
if
s
[:
1
]
==
"<"
and
s
[:
2
]
!=
"</"
:
s
,
child
=
parse_full_content
(
s
)
result
.
append
(
child
)
s
,
closing_tagname
=
parse_closing_tag
(
s
)
assert
tagname
==
closing_tagname
if
len
(
result
)
==
1
and
isinstance
(
result
[
0
].
parser
==
PlainText
)
:
if
len
(
result
)
==
1
and
result
[
0
].
parser
.
ptype
==
PLAINTEXT_PTYPE
:
result
=
result
[
0
].
result
else
:
result
=
tuple
(
result
)
return
Node
(
mock_parsers
.
setdefault
(
tagname
,
MockParser
(
name
,
":"
+
class_name
)),
result
)
return
s
,
Node
(
mock_parsers
.
setdefault
(
tagname
,
MockParser
(
name
,
":"
+
class_name
)),
result
)
match_header
=
xml
.
search
(
re
.
compile
(
r
'<(?!\?)'
))
start
=
match_header
.
start
()
if
match_header
else
0
return
parse_full_content
(
xml
[
start
:])
_
,
tree
=
parse_full_content
(
xml
[
start
:])
assert
_
.
match
(
RX_WHITESPACE_TAIL
)
return
tree
# if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
...
...
test/test_syntaxtree.py
View file @
d8cc42fb
...
...
@@ -51,7 +51,15 @@ class TestParseXML:
fxml
=
flatten_xml
(
xml
)
assert
fxml
==
'<a><b>c</b><d><e>f</e><h>i</h></d></a>'
tree2
=
parse_xml
(
fxml
)
print
(
tree2
.
as_sxpr
())
assert
fxml
==
flatten_xml
(
tree2
.
as_xml
())
def
test_plaintext_handling
(
self
):
tree
=
parse_xml
(
'<a>alpha <b>beta</b> gamma</a>'
)
assert
flatten_sxpr
(
tree
.
as_sxpr
())
==
\
'(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
tree
=
parse_xml
(
' <a> <b>beta</b> </a> '
)
assert
flatten_xml
(
tree
.
as_xml
())
==
'<a><b>beta</b></a>'
class
TestNode
:
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment