Commit a471a1e0 authored by di68kap's avatar di68kap

- parsers.py: string slicing replaces by toolkit.StringView

parent 481891e3
This diff is collapsed.
......@@ -31,7 +31,7 @@ except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import is_logging, log_dir, TextView, line_col, identity
from DHParser.toolkit import is_logging, log_dir, StringView, line_col, identity
__all__ = ('WHITESPACE_PTYPE',
'MockParser',
......@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser()
Error = NamedTuple('Error', [('pos', int), ('msg', str)])
ChildrenType = Tuple['Node', ...]
StrictResultType = Union[ChildrenType, TextView, str]
ResultType = Union[ChildrenType, 'Node', TextView, str, None]
StrictResultType = Union[ChildrenType, StringView, str]
ResultType = Union[ChildrenType, 'Node', StringView, str, None]
def flatten_sxpr(sxpr: str) -> str:
......@@ -189,6 +189,7 @@ class Node:
__slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag']
def __init__(self, parser, result: ResultType) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
......@@ -251,7 +252,7 @@ class Node:
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
self._result = (result,) if isinstance(result, Node) else str(result) \
if isinstance(result, TextView) else result or '' # type: StrictResultType
if isinstance(result, StringView) else result or '' # type: StrictResultType
self.children = cast(ChildrenType, self._result) \
if isinstance(self._result, tuple) else cast(ChildrenType, ()) # type: ChildrenType
self.error_flag = any(r.error_flag for r in self.children) # type: bool
......
......@@ -43,14 +43,18 @@ except ImportError:
import sys
try:
from typing import Any, List, Tuple, Optional
from typing import Any, List, Tuple, Collection, Union, Optional
except ImportError:
from .typing34 import Any, List, Tuple, Optional
from .typing34 import Any, List, Tuple, Collection, Union, Optional
__all__ = ('logging',
'is_logging',
'log_dir',
'logfile_basename',
'StringView',
'sv_match',
'sv_index',
'sv_search',
# 'supress_warnings',
# 'warnings',
# 'repr_call',
......@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os.rmdir(log_dirname)
class TextView:
__slots__ = ['text', 'begin', 'end']
class StringView:
""""A rudimentary StringView class, just enough for the use cases
in parswer.py.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
__slots__ = ['text', 'begin', 'end', 'len']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str
self.begin = begin or 0 # type: int # TODO: Negative Values!!!
self.end = end or len(text) # type: int
self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0)
@staticmethod
def real_indices(begin, end, len):
def pack(index, len):
index = index if index >= 0 else index + len
return 0 if index < 0 else len if index > len else index
if begin is None: begin = 0
if end is None: end = len
return pack(begin, len), pack(end, len)
def __bool__(self):
return bool(self.text) and self.end > self.begin
def __len__(self):
return self.len
def __str__(self):
return self.text[self.begin:self.end]
def __getitem__(self, index):
assert isinstance(index, slice), "Minimal implementation of TextView just allows slicing."
start = index.start or 0
stop = index.stop or (self.end - self.begin)
return TextView(self.text, self.begin + start, self.begin + stop)
assert isinstance(index, slice), "As of now, StringView only allows slicing."
assert index.step is None or index.step == 1, \
"Step sizes other than 1 are not yet supported by StringView"
start, stop = StringView.real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
def __eq__(self, other):
return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
def find(self, sub, start=None, end=None) -> int:
if start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool:
start += self.begin
end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end)
def sv_match(regex, sv: StringView):
return regex.match(sv.text, pos=sv.begin, endpos=sv.end)
def sv_index(absolute_index: Union[int, Collection], sv: StringView) -> Union[int, tuple]:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv_match(re.compile('I'), sv)
>>> match.end()
3
>>> sv_index(match.end(), sv)
1
"""
try:
return absolute_index - sv.begin
except TypeError:
return tuple(index - sv.begin for index in absolute_index)
def sv_search(regex, sv: StringView):
return regex.search(sv.text, pos=sv.begin, endpos=sv.end)
EMPTY_STRING_VIEW = StringView('')
# def repr_call(f, parameter_list) -> str:
......
......@@ -49,7 +49,7 @@ def fail_on_error(src, result):
sys.exit(1)
def test():
def tst_func():
with toolkit.logging(False):
files = os.listdir('testdata')
files.sort()
......@@ -87,7 +87,8 @@ def mem_profile(func):
print(stat)
if __name__ == "__main__":
cpu_profile(test)
cpu_profile(tst_func)
......@@ -26,4 +26,5 @@ sys.path.extend(['../', './'])
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
\ No newline at end of file
runner("", globals())
......@@ -24,7 +24,7 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging, logging, compile_python_object
from DHParser.toolkit import is_logging, logging, StringView, compile_python_object
from DHParser.parser import compile_source, Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -152,7 +152,7 @@ class TestRegex:
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
node, rest = parser.regex('abc+def')
node, rest = parser.regex(StringView('abc+def'))
assert rest == ''
assert node.parser.name == "regex"
assert str(node) == 'abc+def'
......
......@@ -97,6 +97,7 @@ class TestNode:
transform = get_ebnf_transformer()
compiler = get_ebnf_compiler()
tree = parser(ebnf)
print(tree.as_sxpr())
tree_copy = copy.deepcopy(tree)
transform(tree_copy)
res1 = compiler(tree_copy)
......
......@@ -23,9 +23,112 @@ limitations under the License.
import concurrent.futures
import os
import sys
try:
import regex as re
except ImportError:
import re
sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \
sv_match, sv_search, EMPTY_STRING_VIEW
class TestStringView:
def test_real_indices(self):
assert StringView.real_indices(3, 5, 10) == (3, 5)
assert StringView.real_indices(None, None, 10) == (0, 10)
assert StringView.real_indices(-2, -1, 10) == (8, 9)
assert StringView.real_indices(-3, 11, 10) == (7, 10)
assert StringView.real_indices(-5, -12, 10) == (5, 0)
assert StringView.real_indices(-12, -5, 10) == (0, 5)
assert StringView.real_indices(7, 6, 10) == (7, 6)
assert StringView.real_indices(None, 0, 10) == (0, 0)
def test_creation(self):
s = "0123456789"
assert str(StringView(s)) == s
assert str(StringView(s, 3, 4)) == '3'
assert str(StringView(s, -4)) == '6789'
def test_equality(self):
s = "0123456789"
assert StringView(s) == s
assert StringView(s, 3, 4) == '3'
assert StringView(s, -4) == '6789'
def test_slicing(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert sv == '0123456789'
assert sv[3:4] == '3'
assert sv[-3:-1] == '78'
assert sv[4:3] == ''
assert sv[:4] == '0123'
assert sv[4:] == '456789'
assert sv[-2:] == '89'
assert sv[:-5] == '01234'
assert isinstance(sv[3:5], StringView)
def test_len(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert len(sv) == 10
assert sv.len == 10
assert len(sv[5:5]) == 0
assert len(sv[7:4]) == 0
assert len(sv[-12:-2]) == 8
assert len(sv[-12:12]) == 10
def test_bool(self):
assert not StringView('')
assert StringView('x')
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert not sv[5:4]
assert sv[4:5], str(sv[4:5])
assert not sv[3:3]
assert not sv[12:13]
assert sv[0:20]
def test_sv_match(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert sv_match(re.compile(r'\d'), sv)
assert sv_match(re.compile(r'\d+'), sv)
assert not sv_match(re.compile(r' '), sv)
assert sv_match(re.compile(r'45'), sv[4:])
def test_sv_search(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert sv_search(re.compile(r'5'), sv)
assert not sv_search(re.compile(r' '), sv)
assert sv_search(re.compile(r'5'), sv[5:])
assert not sv_search(re.compile(r'9'), sv[:9])
def test_find(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert sv.find('5') == 5
assert sv.find(' ') < 0
assert sv.find('0', 1) < 0
assert sv.find('9', 0, 8) < 0
assert sv.find('45', 1, 8) == 4
def test_startswith(self):
s = " 0123456789 "
sv = StringView(s, 1, -1)
assert sv.startswith('012')
assert sv.startswith('123', 1)
assert not sv.startswith('123', 1, 3)
def test_EMPTY_STRING_VIEW(self):
assert len(EMPTY_STRING_VIEW) == 0
assert EMPTY_STRING_VIEW.find('x') < 0
assert not sv_match(re.compile(r'x'), EMPTY_STRING_VIEW)
assert sv_match(re.compile(r'.*'), EMPTY_STRING_VIEW)
assert len(EMPTY_STRING_VIEW[0:1]) == 0
class TestToolkit:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment