In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 920e07d4 authored by di68kap's avatar di68kap

- syntaxtree: parse_XXX-functions: bug fixes when parsing attributes and...

- syntaxtree: parse_XXX-functions: bug fixes when parsing attributes and parse_json_syntaxtree added (test case added to test/test_syntaxtree.py)
parent 6022f5b8
...@@ -10,9 +10,9 @@ import cython ...@@ -10,9 +10,9 @@ import cython
# critical code paths of stringview.py. # critical code paths of stringview.py.
# see https://cython.readthedocs.io/en/latest/src/tutorial/pure.html # see https://cython.readthedocs.io/en/latest/src/tutorial/pure.html
cdef int first_char(str text, int begin, int end) cdef int first_char(str text, int begin, int end, str chars)
cdef int last_char(str text, int begin, int end) cdef int last_char(str text, int begin, int end, str chars)
cdef int pack_index(int index, int length) cdef int pack_index(int index, int length)
......
...@@ -47,20 +47,20 @@ except ImportError: ...@@ -47,20 +47,20 @@ except ImportError:
__all__ = ('StringView', 'EMPTY_STRING_VIEW', 'cython_optimized') __all__ = ('StringView', 'EMPTY_STRING_VIEW', 'cython_optimized')
def first_char(text, begin: int, end: int) -> int: def first_char(text, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string """Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end]. `text` within the bounds [begin, end].
""" """
while begin < end and text[begin] in ' \n\t': while begin < end and text[begin] in chars:
begin += 1 begin += 1
return begin return begin
def last_char(text, begin: int, end: int) -> int: def last_char(text, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string """Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end]. `text` within the bounds [begin, end].
""" """
while end > begin and text[end - 1] in ' \n\t': while end > begin and text[end - 1] in chars:
end -= 1 end -= 1
return end return end
...@@ -289,24 +289,24 @@ class StringView: # collections.abc.Sized ...@@ -289,24 +289,24 @@ class StringView: # collections.abc.Sized
return regex.finditer(self._text, pos=self._begin, endpos=self._end) return regex.finditer(self._text, pos=self._begin, endpos=self._end)
@cython.locals(begin=cython.int, end=cython.int) @cython.locals(begin=cython.int, end=cython.int)
def strip(self): def strip(self, chars = ' \n\t'):
"""Returns a copy of the StringView `self` with leading and trailing """Returns a copy of the StringView `self` with leading and trailing
whitespace removed. whitespace removed.
""" """
begin = first_char(self._text, self._begin, self._end) - self._begin begin = first_char(self._text, self._begin, self._end, chars) - self._begin
end = last_char(self._text, self._begin, self._end) - self._begin end = last_char(self._text, self._begin, self._end, chars) - self._begin
return self if begin == 0 and end == self._len else self[begin:end] return self if begin == 0 and end == self._len else self[begin:end]
@cython.locals(begin=cython.int) @cython.locals(begin=cython.int)
def lstrip(self): def lstrip(self, chars = ' \n\t'):
"""Returns a copy of `self` with leading whitespace removed.""" """Returns a copy of `self` with leading whitespace removed."""
begin = first_char(self._text, self._begin, self._end) - self._begin begin = first_char(self._text, self._begin, self._end, chars) - self._begin
return self if begin == 0 else self[begin:] return self if begin == 0 else self[begin:]
@cython.locals(end=cython.int) @cython.locals(end=cython.int)
def rstrip(self): def rstrip(self, chars = ' \n\t'):
"""Returns a copy of `self` with trailing whitespace removed.""" """Returns a copy of `self` with trailing whitespace removed."""
end = last_char(self._text, self._begin, self._end) - self._begin end = last_char(self._text, self._begin, self._end, chars) - self._begin
return self if end == self._len else self[:end] return self if end == self._len else self[:end]
@cython.locals(length=cython.int, i=cython.int, k=cython.int) @cython.locals(length=cython.int, i=cython.int, k=cython.int)
......
This diff is collapsed.
...@@ -133,6 +133,8 @@ class TestStringView: ...@@ -133,6 +133,8 @@ class TestStringView:
s = StringView('(a (b c))') s = StringView('(a (b c))')
assert s.strip() == '(a (b c))' assert s.strip() == '(a (b c))'
assert s[1:].strip() == 'a (b c))' assert s[1:].strip() == 'a (b c))'
s = StringView('"22"')
assert s.strip('"') == '22'
def text_split(self): def text_split(self):
s = StringView(' 1,2,3,4,5 ', 1, -1) s = StringView(' 1,2,3,4,5 ', 1, -1)
......
...@@ -25,7 +25,7 @@ import sys ...@@ -25,7 +25,7 @@ import sys
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \ from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
flatten_xml, ZOMBIE_TAG flatten_xml, parse_json_syntaxtree, ZOMBIE_TAG
from DHParser.transform import traverse, reduce_single_child, \ from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_empty, remove_whitespace replace_by_single_child, flatten, remove_empty, remove_whitespace
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
...@@ -91,6 +91,36 @@ class TestParseJSON: ...@@ -91,6 +91,36 @@ class TestParseJSON:
s = self.tree.as_json(indent=2, ensure_ascii=False) s = self.tree.as_json(indent=2, ensure_ascii=False)
tree_copy = Node.from_json_obj(json.loads(s)) tree_copy = Node.from_json_obj(json.loads(s))
def test_attr_serialization_and_parsing(self):
n = Node('employee', 'James Bond').with_pos(46)
n.attr['branch'] = 'Secret Service'
n.attr['id'] = '007'
# json
json = n.as_json()
tree = parse_json_syntaxtree(json)
print()
# XML
xml = n.as_xml()
assert xml.find('_pos') < 0
xml = n.as_xml('')
assert xml.find('_pos') >= 0
tree = parse_xml(xml)
assert tree.pos == 46
assert not '_pos' in tree.attr
tree = parse_xml(xml, ignore_pos=True)
assert '_pos' in tree.attr
assert tree._pos < 0
# S-Expression
sxpr = n.as_sxpr()
assert sxpr.find('pos') < 0
sxpr = n.as_sxpr('')
assert sxpr.find('pos') >= 0
tree = parse_sxpr(sxpr)
assert tree.pos == 46
assert not 'pos' in tree.attr
class TestNode: class TestNode:
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment