16.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit ca252120 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- small adjustments

parent f27dba11
...@@ -23,3 +23,5 @@ dist/ ...@@ -23,3 +23,5 @@ dist/
MANIFEST MANIFEST
playground/* playground/*
DevScripts/DHParser.py DevScripts/DHParser.py
DHParser/cstringview.c
*.so
...@@ -34,10 +34,10 @@ cdef inline int pack_index(int index, int len): ...@@ -34,10 +34,10 @@ cdef inline int pack_index(int index, int len):
return 0 if index < 0 else len if index > len else index return 0 if index < 0 else len if index > len else index
cdef real_indices(begin, end, len): cdef real_indices(begin, end, int len):
if begin is None: begin = 0 cdef int begin_i = 0 if begin is None else begin
if end is None: end = len cdef int end_i = len if end is None else end
return pack_index(begin, len), pack_index(end, len) return pack_index(begin_i, len), pack_index(end_i, len)
class StringView(collections.abc.Sized): class StringView(collections.abc.Sized):
...@@ -52,9 +52,7 @@ class StringView(collections.abc.Sized): ...@@ -52,9 +52,7 @@ class StringView(collections.abc.Sized):
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag'] __slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str self.text = text
self.begin = 0 # type: int
self.end = 0 # type: int
self.begin, self.end = real_indices(begin, end, len(text)) self.begin, self.end = real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) self.len = max(self.end - self.begin, 0)
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) self.fullstring_flag = (self.begin == 0 and self.len == len(self.text))
...@@ -160,6 +158,7 @@ class StringView(collections.abc.Sized): ...@@ -160,6 +158,7 @@ class StringView(collections.abc.Sized):
return regex.search(self.text, pos=self.begin, endpos=self.end) return regex.search(self.text, pos=self.begin, endpos=self.end)
def strip(self): def strip(self):
cdef int begin, end
if self.fullstring_flag: if self.fullstring_flag:
return self.text.strip() return self.text.strip()
else: else:
...@@ -173,6 +172,7 @@ class StringView(collections.abc.Sized): ...@@ -173,6 +172,7 @@ class StringView(collections.abc.Sized):
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string # return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
def split(self, sep=None): def split(self, sep=None):
cdef int i, k, l
if self.fullstring_flag: if self.fullstring_flag:
return self.text.split(sep) return self.text.split(sep)
else: else:
......
...@@ -41,7 +41,7 @@ def real_indices(begin, end, len): ...@@ -41,7 +41,7 @@ def real_indices(begin, end, len):
class StringView(collections.abc.Sized): class StringView(collections.abc.Sized):
"""" """
A rudimentary StringView class, just enough for the use cases A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without builtin strings is that StringView-objects do slicing without
...@@ -53,11 +53,9 @@ class StringView(collections.abc.Sized): ...@@ -53,11 +53,9 @@ class StringView(collections.abc.Sized):
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str self.text = text # type: str
self.begin = 0 # type: int
self.end = 0 # type: int
self.begin, self.end = real_indices(begin, end, len(text)) self.begin, self.end = real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) self.len = max(self.end - self.begin, 0) # type: int
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) # type: bool
def __bool__(self): def __bool__(self):
return self.end > self.begin # and bool(self.text) return self.end > self.begin # and bool(self.text)
......
...@@ -25,13 +25,11 @@ However, this becomes costly (in terms of space and as a consequence also ...@@ -25,13 +25,11 @@ However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview` time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class. does not work for unicode strings. Hence, the StringView class.
""" """
import collections
from typing import Optional, Iterable, Tuple
__all__ = ('StringView', 'EMPTY_STRING_VIEW') __all__ = ('StringView', 'EMPTY_STRING_VIEW')
try: try:
import pyximport; pyximport.install() # import pyximport; pyximport.install() # only for development
from DHParser.cstringview import StringView, EMPTY_STRING_VIEW from DHParser.cstringview import StringView, EMPTY_STRING_VIEW
except ImportError: except ImportError:
from DHParser.pstringview import StringView, EMPTY_STRING_VIEW from DHParser.pstringview import StringView, EMPTY_STRING_VIEW
......
#!/bin/sh #!/bin/sh
python3 setup.py sdist bdist_wheel python3 setup.py sdist bdist
...@@ -15,7 +15,7 @@ setup( ...@@ -15,7 +15,7 @@ setup(
name='DHParser', name='DHParser',
version=__version__, version=__version__,
packages=['DHParser'], packages=['DHParser'],
ext_modules = cythonize('DHParser/cstringview.pyx') ext_modules=cythonize('DHParser/cstringview.pyx'),
url='https://gitlab.lrz.de/badw-it/DHParser', url='https://gitlab.lrz.de/badw-it/DHParser',
license='MIT License (https://opensource.org/licenses/MIT)', license='MIT License (https://opensource.org/licenses/MIT)',
author='Eckhart Arnold', author='Eckhart Arnold',
......
...@@ -20,13 +20,10 @@ limitations under the License. ...@@ -20,13 +20,10 @@ limitations under the License.
""" """
import sys import sys
try:
import regex as re
except ImportError:
import re
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.toolkit import re
import pyximport; pyximport.install() import pyximport; pyximport.install()
from DHParser.cstringview import StringView, EMPTY_STRING_VIEW from DHParser.cstringview import StringView, EMPTY_STRING_VIEW
......
...@@ -20,13 +20,10 @@ limitations under the License. ...@@ -20,13 +20,10 @@ limitations under the License.
""" """
import sys import sys
try:
import regex as re
except ImportError:
import re
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.toolkit import re
from DHParser.pstringview import StringView, EMPTY_STRING_VIEW, real_indices from DHParser.pstringview import StringView, EMPTY_STRING_VIEW, real_indices
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment