Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
8c7f14fb
Commit
8c7f14fb
authored
May 10, 2021
by
Eckhart Arnold
Browse files
lxml compatibilit of attributes can be ensured via configuration value
parent
06bda349
Changes
3
Hide whitespace changes
Inline
Side-by-side
DHParser/configuration.py
View file @
8c7f14fb
...
...
@@ -336,12 +336,18 @@ CONFIG_PRESET['compact_sxpr_threshold'] = 20
# e.g. attr="<". Possible values are:
# 'ignore' - faulty attribute values will be serialized nonetheless
# 'fix' - attribute values will be corrected, e.g. "<" will be
# replaced by the respective enity and the like
# 'fail' - an error will be raised. Observe that this error will be
# raised when serializing as XML, not when setting the value
# replaced by the respective entity and the like.
# 'lxml' - attributes values will be corrected and any non-ASCII
# character will be replaced by a question mark to ensure
# compatibility with the lxml library.
# 'fail' - an error will be raised, when an illegal attribute value
# is encountered while serializing a tree as XML. Illegal
# attribute values can still be set, though, since they
# they concern only the XMl-serialization and not the
# S-expression or JSON serialization.
# Default value = "fail"
CONFIG_PRESET
[
'xml_attribute_error_handling'
]
=
'fail'
ALLOWED_PRESET_VALUES
[
'xml_attribute_error_handling'
]
=
frozenset
({
'ignore'
,
'fix'
,
'fail'
})
ALLOWED_PRESET_VALUES
[
'xml_attribute_error_handling'
]
=
frozenset
({
'ignore'
,
'fix'
,
'lxml'
,
'fail'
})
########################################################################
#
...
...
DHParser/syntaxtree.py
View file @
8c7f14fb
...
...
@@ -595,7 +595,8 @@ from DHParser.error import Error, ErrorCode, ERROR, PARSER_STOPPED_BEFORE_END, \
from
DHParser.preprocess
import
SourceMapFunc
from
DHParser.stringview
import
StringView
# , real_indices
from
DHParser.toolkit
import
re
,
cython
,
linebreaks
,
line_col
,
JSONnull
,
\
validate_XML_attribute_value
,
fix_XML_attribute_value
,
identity
,
Protocol
validate_XML_attribute_value
,
fix_XML_attribute_value
,
lxml_XML_attribute_value
,
\
identity
,
Protocol
__all__
=
(
'WHITESPACE_PTYPE'
,
...
...
@@ -1990,6 +1991,8 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
attr_filter
=
validate_XML_attribute_value
elif
attr_err_handling
==
'fix'
:
attr_filter
=
fix_XML_attribute_value
elif
attr_err_handling
==
'lxml'
:
attr_filter
=
lxml_XML_attribute_value
else
:
assert
attr_err_handling
==
'ignore'
,
'Illegal value for configuration '
+
\
'variable "xml_attribute_error_handling": '
+
attr_err_handling
...
...
tests/test_syntaxtree.py
View file @
8c7f14fb
...
...
@@ -208,6 +208,22 @@ class TestParseJSON:
assert
tree
.
pos
==
46
assert
not
'pos'
in
tree
.
attr
def
test_attr_error_reporting_and_fixing
(
self
):
n
=
Node
(
'tag'
,
'content'
).
with_attr
(
faulty
=
'<&"'
)
set_config_value
(
'xml_attribute_error_handling'
,
'fail'
)
try
:
s
=
n
.
as_xml
()
assert
False
,
"ValueError expected"
except
ValueError
:
pass
set_config_value
(
'xml_attribute_error_handling'
,
'fix'
)
assert
n
.
as_xml
()
==
'''<tag faulty='<&"'>content</tag>'''
,
n
.
as_xml
()
set_config_value
(
'xml_attribute_error_handling'
,
'ignore'
)
assert
n
.
as_xml
()
==
'''<tag faulty='<&"'>content</tag>'''
n
.
attr
[
'nonascii'
]
=
'ἱεραρχικωτάτου'
set_config_value
(
'xml_attribute_error_handling'
,
'lxml'
)
assert
n
.
as_xml
()
==
'''<tag faulty='<&"' nonascii="??????????????">content</tag>'''
class
TestNode
:
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment