Commit 920e07d4 authored by di68kap's avatar di68kap

- syntaxtree: parse_XXX-functions: bug fixes when parsing attributes and...

- syntaxtree: parse_XXX-functions: bug fixes when parsing attributes and parse_json_syntaxtree added (test case added to test/test_syntaxtree.py)
parent 6022f5b8
......@@ -10,9 +10,9 @@ import cython
# critical code paths of stringview.py.
# see https://cython.readthedocs.io/en/latest/src/tutorial/pure.html
cdef int first_char(str text, int begin, int end)
cdef int first_char(str text, int begin, int end, str chars)
cdef int last_char(str text, int begin, int end)
cdef int last_char(str text, int begin, int end, str chars)
cdef int pack_index(int index, int length)
......
......@@ -47,20 +47,20 @@ except ImportError:
__all__ = ('StringView', 'EMPTY_STRING_VIEW', 'cython_optimized')
def first_char(text, begin: int, end: int) -> int:
def first_char(text, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while begin < end and text[begin] in ' \n\t':
while begin < end and text[begin] in chars:
begin += 1
return begin
def last_char(text, begin: int, end: int) -> int:
def last_char(text, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while end > begin and text[end - 1] in ' \n\t':
while end > begin and text[end - 1] in chars:
end -= 1
return end
......@@ -289,24 +289,24 @@ class StringView: # collections.abc.Sized
return regex.finditer(self._text, pos=self._begin, endpos=self._end)
@cython.locals(begin=cython.int, end=cython.int)
def strip(self):
def strip(self, chars = ' \n\t'):
"""Returns a copy of the StringView `self` with leading and trailing
whitespace removed.
"""
begin = first_char(self._text, self._begin, self._end) - self._begin
end = last_char(self._text, self._begin, self._end) - self._begin
begin = first_char(self._text, self._begin, self._end, chars) - self._begin
end = last_char(self._text, self._begin, self._end, chars) - self._begin
return self if begin == 0 and end == self._len else self[begin:end]
@cython.locals(begin=cython.int)
def lstrip(self):
def lstrip(self, chars = ' \n\t'):
"""Returns a copy of `self` with leading whitespace removed."""
begin = first_char(self._text, self._begin, self._end) - self._begin
begin = first_char(self._text, self._begin, self._end, chars) - self._begin
return self if begin == 0 else self[begin:]
@cython.locals(end=cython.int)
def rstrip(self):
def rstrip(self, chars = ' \n\t'):
"""Returns a copy of `self` with trailing whitespace removed."""
end = last_char(self._text, self._begin, self._end) - self._begin
end = last_char(self._text, self._begin, self._end, chars) - self._begin
return self if end == self._len else self[:end]
@cython.locals(length=cython.int, i=cython.int, k=cython.int)
......
This diff is collapsed.
......@@ -133,6 +133,8 @@ class TestStringView:
s = StringView('(a (b c))')
assert s.strip() == '(a (b c))'
assert s[1:].strip() == 'a (b c))'
s = StringView('"22"')
assert s.strip('"') == '22'
def text_split(self):
s = StringView(' 1,2,3,4,5 ', 1, -1)
......
......@@ -25,7 +25,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
flatten_xml, ZOMBIE_TAG
flatten_xml, parse_json_syntaxtree, ZOMBIE_TAG
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_empty, remove_whitespace
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -91,6 +91,36 @@ class TestParseJSON:
s = self.tree.as_json(indent=2, ensure_ascii=False)
tree_copy = Node.from_json_obj(json.loads(s))
def test_attr_serialization_and_parsing(self):
n = Node('employee', 'James Bond').with_pos(46)
n.attr['branch'] = 'Secret Service'
n.attr['id'] = '007'
# json
json = n.as_json()
tree = parse_json_syntaxtree(json)
print()
# XML
xml = n.as_xml()
assert xml.find('_pos') < 0
xml = n.as_xml('')
assert xml.find('_pos') >= 0
tree = parse_xml(xml)
assert tree.pos == 46
assert not '_pos' in tree.attr
tree = parse_xml(xml, ignore_pos=True)
assert '_pos' in tree.attr
assert tree._pos < 0
# S-Expression
sxpr = n.as_sxpr()
assert sxpr.find('pos') < 0
sxpr = n.as_sxpr('')
assert sxpr.find('pos') >= 0
tree = parse_sxpr(sxpr)
assert tree.pos == 46
assert not 'pos' in tree.attr
class TestNode:
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment