Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
bf89fbf2
Commit
bf89fbf2
authored
Jun 15, 2018
by
Eckhart Arnold
Browse files
- error handling simplified; XML parser continued
parent
43cb9807
Changes
7
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
bf89fbf2
...
...
@@ -38,7 +38,7 @@ import os
import
re
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
RootNode
from
DHParser.syntaxtree
import
Node
,
RootNode
,
StrictResultType
from
DHParser.transform
import
TransformationFunc
from
DHParser.parse
import
Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
...
...
@@ -152,6 +152,16 @@ class Compiler:
"""
return
'on_'
+
node_name
def
compile_children
(
self
,
node
:
Node
)
->
StrictResultType
:
"""Compiles all children of the given node and returns the tuple
of the compiled children or the node's (potentially empty) result
in case the node does not have any children.
"""
if
node
.
children
:
return
tuple
(
self
.
compile
(
child
)
for
child
in
node
.
children
)
else
:
return
node
.
result
def
fallback_compiler
(
self
,
node
:
Node
)
->
Any
:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
...
...
DHParser/syntaxtree.py
View file @
bf89fbf2
...
...
@@ -368,6 +368,20 @@ class Node(collections.abc.Sized):
# return False
def
get
(
self
,
index_or_tagname
:
Union
[
int
,
str
],
surrogate
:
Union
[
'Node'
,
Iterator
[
'Node'
]])
->
Union
[
'Node'
,
Iterator
[
'Node'
]]:
"""Returns the child node with the given index if ``index_or_tagname``
is an integer or the first child node with the given tag name. If no
child with the given index or tag_name exists, the ``surrogate`` is
returned instead. This mimics the behaviour of Python's dictionary's
get-method.
"""
try
:
return
self
[
index_or_tagname
]
except
KeyError
:
return
surrogate
@
property
# this needs to be a (dynamic) property, in case sef.parser gets updated
def
tag_name
(
self
)
->
str
:
"""
...
...
@@ -525,7 +539,9 @@ class Node(collections.abc.Sized):
return
head
+
'
\n
'
.
join
([
tab
+
data_fn
(
s
)
for
s
in
res
.
split
(
'
\n
'
)])
+
tail
def
as_sxpr
(
self
,
src
:
str
=
None
,
showerrors
:
bool
=
True
,
indentation
:
int
=
2
,
def
as_sxpr
(
self
,
src
:
str
=
None
,
showerrors
:
bool
=
True
,
indentation
:
int
=
2
,
compact
:
bool
=
False
)
->
str
:
"""
Returns content as S-expression, i.e. in lisp-like form.
...
...
@@ -534,7 +550,9 @@ class Node(collections.abc.Sized):
src: The source text or `None`. In case the source text is
given the position of the element in the text will be
reported as line and column.
compact: If True a compact representation is returned where
showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
compact: If True, a compact representation is returned where
brackets are omitted and only the indentation indicates the
tree structure.
"""
...
...
@@ -567,8 +585,12 @@ class Node(collections.abc.Sized):
return
self
.
_tree_repr
(
' '
*
indentation
,
opening
,
closing
,
pretty
,
density
=
density
)
def
as_xml
(
self
,
src
:
str
=
None
,
showerrors
:
bool
=
True
,
indentation
:
int
=
2
,
inline_tags
:
Set
[
str
]
=
set
(),
omit_tags
:
Set
[
str
]
=
set
())
->
str
:
def
as_xml
(
self
,
src
:
str
=
None
,
showerrors
:
bool
=
True
,
indentation
:
int
=
2
,
inline_tags
:
Set
[
str
]
=
set
(),
omit_tags
:
Set
[
str
]
=
set
(),
empty_tags
:
Set
[
str
]
=
set
())
->
str
:
"""
Returns content as XML-tree.
...
...
@@ -576,6 +598,8 @@ class Node(collections.abc.Sized):
src: The source text or `None`. In case the source text is
given the position will also be reported as line and
column.
showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
inline_tags: A set of tag names, the content of which will always be written
on a single line, unless it contains explicit line feeds ('
\n
').
omit_tags: A set of tags from which only the content will be printed, but
...
...
@@ -583,6 +607,8 @@ class Node(collections.abc.Sized):
allows producing a mix of plain text and child tags in the output,
which otherwise is not supported by the Node object, because it
requires its content to be either a tuple of children or string content.
empty_tags: A set of tags which shall be rendered as empty elements, e.g.
"<empty/>" instead of "<empty><empty>".
"""
def
opening
(
node
)
->
str
:
...
...
@@ -599,11 +625,17 @@ class Node(collections.abc.Sized):
if
showerrors
and
node
.
errors
and
not
has_reserved_attrs
:
txt
.
append
(
' err="%s"'
%
''
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
))
return
""
.
join
(
txt
+
[
">
\n
"
])
if
node
.
tag_name
in
empty_tags
:
assert
not
node
.
result
,
(
"Node %s with content %s is not an empty element!"
%
(
node
.
tag_name
,
str
(
node
)))
ending
=
"/>
\n
"
else
:
ending
=
">
\n
"
return
""
.
join
(
txt
+
[
ending
])
def
closing
(
node
):
"""Returns the closing string for the representation of `node`."""
if
node
.
tag_name
in
omit_tags
:
if
node
.
tag_name
in
omit_tags
or
node
.
tag_name
in
empty_tags
:
return
''
return
(
'
\n
</'
)
+
node
.
tag_name
+
'>'
...
...
@@ -716,6 +748,10 @@ class RootNode(Node):
self
.
error_flag
=
0
if
node
is
not
None
:
self
.
swallow
(
node
)
# customization for XML-Representation
self
.
inline_tags
=
set
()
self
.
omit_tags
=
set
()
self
.
empty_tags
=
set
()
def
swallow
(
self
,
node
:
Node
)
->
'RootNode'
:
"""Put `self` in the place of `node` by copying all its data.
...
...
@@ -766,6 +802,14 @@ class RootNode(Node):
self
.
all_errors
.
sort
(
key
=
lambda
e
:
e
.
pos
)
return
self
.
all_errors
def
customized_XML
(
self
):
"""Returns a customized XML representation of the tree.
See the docstring of `Node.as_xml()` for an explanation of the
customizations."""
return
self
.
as_xml
(
inline_tags
=
self
.
inline_tags
,
omit_tags
=
self
.
omit_tags
,
empty_tags
=
self
.
empty_tags
)
ZOMBIE_NODE
=
Node
(
ZOMBIE_PARSER
,
''
)
...
...
dhparser.py
View file @
bf89fbf2
...
...
@@ -207,7 +207,7 @@ def create_project(path: str):
create_file
(
'README.md'
,
README_TEMPLATE
.
format
(
name
=
name
))
create_file
(
'tst_%s_grammar.py'
%
name
,
GRAMMAR_TEST_TEMPLATE
.
format
(
name
=
name
,
dhparserdir
=
dhparserdir
))
create_file
(
'example.
ds
l'
,
'Life is but a walking shadow
\n
'
)
create_file
(
'example.
xm
l'
,
'Life is but a walking shadow
\n
'
)
os
.
chmod
(
'tst_%s_grammar.py'
%
name
,
0o755
)
# The following is left to the user as an exercise
# print('Creating file "%s".' % (name + 'Compiler.py'))
...
...
examples/XML/XMLCompiler.py
View file @
bf89fbf2
...
...
@@ -32,7 +32,8 @@ from DHParser import logging, is_filename, load_if_file, \
is_empty
,
is_expendable
,
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
remove_nodes
,
remove_content
,
remove_brackets
,
replace_parser
,
remove_anonymous_tokens
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
MockParser
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
MockParser
,
\
ZOMBIE_NODE
#######################################################################
...
...
@@ -680,14 +681,15 @@ class XMLCompiler(Compiler):
self
.
mock_parsers
=
dict
()
def
on_document
(
self
,
node
):
self
.
tree
.
omit_tags
.
add
(
'CharData'
)
return
self
.
fallback_compiler
(
node
)
def
extract_attributes
(
self
,
node_sequence
):
attributes
=
OrderedDict
()
for
node
in
node_sequence
:
if
node
.
tag_name
==
"Attribute"
:
assert
node
[
0
].
tag_name
==
"Name"
assert
node
[
1
].
tag_name
==
"A
TT
Value"
assert
node
[
0
].
tag_name
==
"Name"
,
node
.
as_sexpr
()
assert
node
[
1
].
tag_name
==
"A
tt
Value"
,
node
.
as_sxpr
()
attributes
[
node
[
0
].
content
]
=
node
[
1
].
content
return
attributes
...
...
@@ -875,7 +877,7 @@ class XMLCompiler(Compiler):
if
attributes
:
node
.
attributes
.
update
(
attributes
)
node
.
parser
=
self
.
get_parser
(
stag
[
'Name'
].
content
)
node
.
result
=
node
[
'content'
].
result
node
.
result
=
self
.
compile_children
(
node
.
get
(
'content'
,
ZOMBIE_NODE
))
return
node
# def on_STag(self, node):
...
...
@@ -890,6 +892,7 @@ class XMLCompiler(Compiler):
node
.
attributes
.
update
(
attributes
)
node
.
parser
=
self
.
get_parser
(
node
[
'Name'
].
content
)
node
.
result
=
''
self
.
tree
.
empty_tags
.
add
(
node
.
tag_name
)
return
node
# def on_TagName(self, node):
...
...
@@ -1046,6 +1049,6 @@ if __name__ == "__main__":
print
(
rel_path
+
':'
+
str
(
error
))
sys
.
exit
(
1
)
else
:
print
(
result
.
as_xml
()
if
isinstance
(
result
,
Node
)
else
result
)
print
(
result
.
customized_XML
()
if
isinstance
(
result
,
Node
)
else
result
)
else
:
print
(
"Usage: XMLCompiler.py [FILENAME]"
)
examples/XML/example.dsl
deleted
100644 → 0
View file @
43cb9807
Life is but a walking shadow
examples/XML/example.xml
0 → 100644
View file @
bf89fbf2
<?xml version="1.0" encoding="UTF-8"?>
<note
date=
"2018-06-14"
>
<to>
Tove
</to>
<from>
Jani
</from>
<heading>
Reminder
</heading>
<body>
Don't forget me this weekend!
</body>
<priority
level=
"high"
/>
<remark></remark>
</note>
\ No newline at end of file
test/test_dhparser.py
View file @
bf89fbf2
...
...
@@ -45,7 +45,7 @@ class TestDHParserCommandLineTool:
def
test_dhparser
(
self
):
os
.
system
(
'python ../dhparser.py testdata/neu >/dev/null'
)
os
.
system
(
'python testdata/neu/tst_neu_grammar.py >/dev/null'
)
os
.
system
(
'python testdata/neu/neuCompiler.py testdata/neu/example.
ds
l >testdata/neu/example.xml'
)
os
.
system
(
'python testdata/neu/neuCompiler.py testdata/neu/example.
xm
l >testdata/neu/example.xml'
)
with
open
(
'testdata/neu/example.xml'
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
xml
=
f
.
read
()
assert
xml
.
find
(
'<document>'
)
>=
0
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment