Commit 8c7f14fb authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

lxml compatibilit of attributes can be ensured via configuration value

parent 06bda349
......@@ -336,12 +336,18 @@ CONFIG_PRESET['compact_sxpr_threshold'] = 20
# e.g. attr="<". Possible values are:
# 'ignore' - faulty attribute values will be serialized nonetheless
# 'fix' - attribute values will be corrected, e.g. "<" will be
# replaced by the respective enity and the like
# 'fail' - an error will be raised. Observe that this error will be
# raised when serializing as XML, not when setting the value
# replaced by the respective entity and the like.
# 'lxml' - attributes values will be corrected and any non-ASCII
# character will be replaced by a question mark to ensure
# compatibility with the lxml library.
# 'fail' - an error will be raised, when an illegal attribute value
# is encountered while serializing a tree as XML. Illegal
# attribute values can still be set, though, since they
# they concern only the XMl-serialization and not the
# S-expression or JSON serialization.
# Default value = "fail"
CONFIG_PRESET['xml_attribute_error_handling'] = 'fail'
ALLOWED_PRESET_VALUES['xml_attribute_error_handling'] = frozenset({'ignore', 'fix', 'fail'})
ALLOWED_PRESET_VALUES['xml_attribute_error_handling'] = frozenset({'ignore', 'fix', 'lxml', 'fail'})
########################################################################
#
......
......@@ -595,7 +595,8 @@ from DHParser.error import Error, ErrorCode, ERROR, PARSER_STOPPED_BEFORE_END, \
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView # , real_indices
from DHParser.toolkit import re, cython, linebreaks, line_col, JSONnull, \
validate_XML_attribute_value, fix_XML_attribute_value, identity, Protocol
validate_XML_attribute_value, fix_XML_attribute_value, lxml_XML_attribute_value, \
identity, Protocol
__all__ = ('WHITESPACE_PTYPE',
......@@ -1990,6 +1991,8 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
attr_filter = validate_XML_attribute_value
elif attr_err_handling == 'fix':
attr_filter = fix_XML_attribute_value
elif attr_err_handling == 'lxml':
attr_filter = lxml_XML_attribute_value
else:
assert attr_err_handling == 'ignore', 'Illegal value for configuration ' +\
'variable "xml_attribute_error_handling": ' + attr_err_handling
......
......@@ -208,6 +208,22 @@ class TestParseJSON:
assert tree.pos == 46
assert not 'pos' in tree.attr
def test_attr_error_reporting_and_fixing(self):
n = Node('tag', 'content').with_attr(faulty='<&"')
set_config_value('xml_attribute_error_handling', 'fail')
try:
s = n.as_xml()
assert False, "ValueError expected"
except ValueError:
pass
set_config_value('xml_attribute_error_handling', 'fix')
assert n.as_xml() == '''<tag faulty='&lt;&amp;"'>content</tag>''', n.as_xml()
set_config_value('xml_attribute_error_handling', 'ignore')
assert n.as_xml() == '''<tag faulty='<&"'>content</tag>'''
n.attr['nonascii'] = 'ἱεραρχικωτάτου'
set_config_value('xml_attribute_error_handling', 'lxml')
assert n.as_xml() == '''<tag faulty='&lt;&amp;"' nonascii="??????????????">content</tag>'''
class TestNode:
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment