Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
f27dba11
Commit
f27dba11
authored
Sep 24, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- cstringview finished (some (enough?) optimizations)
parent
802fcc4a
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
476 additions
and
216 deletions
+476
-216
DHParser/VERALTET/cstringview.pyx
DHParser/VERALTET/cstringview.pyx
+212
-0
DHParser/cstringview.pyx
DHParser/cstringview.pyx
+4
-4
DHParser/dsl.py
DHParser/dsl.py
+4
-10
DHParser/ebnf.py
DHParser/ebnf.py
+3
-10
DHParser/parser.py
DHParser/parser.py
+4
-20
DHParser/pstringview.py
DHParser/pstringview.py
+192
-0
DHParser/stringview.py
DHParser/stringview.py
+8
-110
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+5
-17
DHParser/testing.py
DHParser/testing.py
+1
-5
DHParser/toolkit.py
DHParser/toolkit.py
+4
-2
DHParser/transform.py
DHParser/transform.py
+4
-12
test/test_cstringview.py
test/test_cstringview.py
+20
-10
test/test_ebnf.py
test/test_ebnf.py
+1
-5
test/test_parser.py
test/test_parser.py
+1
-5
test/test_pstringview.py
test/test_pstringview.py
+12
-1
test/test_toolkit.py
test/test_toolkit.py
+1
-5
No files found.
DHParser/VERALTET/cstringview.pyx
0 → 100644
View file @
f27dba11
"""cstringview.pyx - a cython-version of the stringview class for speedup
slicing strings without copying
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
StringView provides string-slicing without copying.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
import
collections
from
typing
import
Optional
,
Iterable
,
Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
cdef
struct
Range
:
int
begin
int
end
cdef
inline
int
pack_index
(
int
index
,
int
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
cdef
Range
real_indices
(
begin
,
end
,
int
len
):
cdef
int
ibegin
=
0
cdef
int
iend
=
len
if
begin
is
not
None
:
ibegin
=
begin
if
end
is
not
None
:
iend
=
end
cdef
Range
r
r
.
begin
=
pack_index
(
ibegin
,
len
)
r
.
end
=
pack_index
(
iend
,
len
)
return
r
class
StringView
(
collections
.
abc
.
Sized
):
""""
A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
0
# type: int
self
.
end
=
0
# type: int
cdef
Range
r
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
begin
=
r
.
begin
self
.
end
=
r
.
end
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
self
.
fullstring_flag
=
(
self
.
begin
==
0
and
self
.
len
==
len
(
self
.
text
))
def
__bool__
(
self
):
return
self
.
end
>
self
.
begin
# and bool(self.text)
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
if
self
.
fullstring_flag
:
# optimization: avoid slicing/copying
return
self
.
text
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
self
.
text
=
self
.
text
[
self
.
begin
:
self
.
end
]
self
.
begin
=
0
self
.
len
=
len
(
self
.
text
)
self
.
end
=
self
.
len
self
.
fullstring_flag
=
True
return
self
.
text
def
__eq__
(
self
,
other
):
return
len
(
other
)
==
len
(
self
)
and
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
__hash__
(
self
):
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string-slice
def
__add__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
str
(
self
)
+
other
)
else
:
return
StringView
(
str
(
self
)
+
str
(
other
))
def
__radd__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
other
+
str
(
self
))
else
:
return
StringView
(
str
(
other
)
+
str
(
self
))
def
__getitem__
(
self
,
index
):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
cdef
Range
r
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
start
=
r
.
begin
;
stop
=
r
.
end
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
count
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
cdef
Range
r
if
self
.
fullstring_flag
:
return
self
.
text
.
count
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
count
(
sub
,
self
.
begin
,
self
.
end
)
else
:
r
=
real_indices
(
start
,
end
,
self
.
len
)
start
=
r
.
begin
;
end
=
r
.
end
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
cdef
Range
r
if
self
.
fullstring_flag
:
return
self
.
text
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
r
=
real_indices
(
start
,
end
,
self
.
len
)
start
=
r
.
begin
;
end
=
r
.
end
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
cdef
Range
r
if
self
.
fullstring_flag
:
return
self
.
text
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
text
.
rfind
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
r
=
real_indices
(
start
,
end
,
self
.
len
)
start
=
r
.
begin
;
end
=
r
.
end
return
self
.
text
.
rfind
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
match
(
self
,
regex
):
return
regex
.
match
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
index
(
self
,
absolute_index
:
int
)
->
int
:
"""
Converts an index for a string watched by a StringView object
to an index relative to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv.match(re.compile('I'))
>>> match.end()
3
>>> sv.index(match.end())
1
"""
return
absolute_index
-
self
.
begin
def
indices
(
self
,
absolute_indices
:
Iterable
[
int
])
->
Tuple
[
int
,
...]:
"""Converts indices for a string watched by a StringView object
to indices relative to the string view object. See also: `sv_index()`
"""
return
tuple
(
index
-
self
.
begin
for
index
in
absolute_indices
)
def
search
(
self
,
regex
):
return
regex
.
search
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
strip
(
self
):
if
self
.
fullstring_flag
:
return
self
.
text
.
strip
()
else
:
begin
=
self
.
begin
end
=
self
.
end
while
begin
<
end
and
self
.
text
[
begin
]
in
'
\n\t
'
:
begin
+=
1
while
end
>
begin
and
self
.
text
[
end
]
in
'
\n\t
'
:
end
-=
1
return
self
.
text
[
begin
:
end
]
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
def
split
(
self
,
sep
=
None
):
if
self
.
fullstring_flag
:
return
self
.
text
.
split
(
sep
)
else
:
pieces
=
[]
l
=
len
(
sep
)
k
=
0
i
=
self
.
find
(
sep
,
k
)
while
i
>=
0
:
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
begin
+
i
])
k
=
i
+
l
i
=
self
.
find
(
sep
,
k
)
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
end
])
return
pieces
# return str(self).split(sep, maxsplit) # PERFORMANCE WARNING: This creates a copy of the string
EMPTY_STRING_VIEW
=
StringView
(
''
)
DHParser/cstringview.pyx
View file @
f27dba11
...
...
@@ -29,12 +29,12 @@ from typing import Optional, Iterable, Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
def
pack_index
(
index
,
len
):
cdef
inline
int
pack_index
(
int
index
,
int
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
def
real_indices
(
begin
,
end
,
len
):
c
def
real_indices
(
begin
,
end
,
len
):
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack_index
(
begin
,
len
),
pack_index
(
end
,
len
)
...
...
@@ -78,10 +78,10 @@ class StringView(collections.abc.Sized):
return
self
.
text
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
return
len
(
other
)
==
len
(
self
)
and
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
__hash__
(
self
):
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string
-slice
def
__add__
(
self
,
other
):
if
isinstance
(
other
,
str
):
...
...
DHParser/dsl.py
View file @
f27dba11
...
...
@@ -21,23 +21,17 @@ compilation of domain specific languages based on an EBNF-grammar.
import
os
try
:
import
regex
as
re
except
ImportError
:
import
re
try
:
from
typing
import
Any
,
cast
,
Tuple
,
Union
,
Iterator
,
Iterable
except
ImportError
:
from
.typing34
import
Any
,
cast
,
Tuple
,
Union
,
Iterator
,
Iterable
from
DHParser.ebnf
import
EBNFCompiler
,
grammar_changed
,
\
get_ebnf_preprocessor
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
PreprocessorFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
,
\
re
,
typing
from
DHParser.parser
import
Grammar
,
Compiler
,
compile_source
,
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
only_errors
from
typing
import
Any
,
cast
,
Tuple
,
Union
,
Iterator
,
Iterable
__all__
=
(
'GrammarError'
,
'CompilationError'
,
'load_compiler_suite'
,
...
...
DHParser/ebnf.py
View file @
f27dba11
...
...
@@ -20,16 +20,7 @@ import keyword
from
collections
import
OrderedDict
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
try
:
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Union
except
ImportError
:
from
.typing34
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Union
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
typing
from
DHParser.parser
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
RegExp
,
RE
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
Required
,
Compiler
,
PreprocessorFunc
...
...
@@ -40,6 +31,8 @@ from DHParser.transform import traverse, remove_brackets, \
remove_tokens
,
flatten
,
forbid
,
assert_content
,
remove_infix_operator
from
DHParser.versionnumber
import
__version__
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Union
__all__
=
(
'get_ebnf_preprocessor'
,
'get_ebnf_grammar'
,
'get_ebnf_transformer'
,
...
...
DHParser/parser.py
View file @
f27dba11
...
...
@@ -61,29 +61,13 @@ import copy
import
os
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
try
:
from
typing
import
Any
,
Callable
,
cast
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
,
Optional
# try:
# from typing import Collection
# except ImportError:
# pass
except
ImportError
:
from
.typing34
import
Any
,
Callable
,
cast
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
,
load_if_file
,
\
re
,
typing
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
ZOMBIE_PARSER
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
from
DHParser.toolkit
import
load_if_file
try
:
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
,
EMPTY_STRING_VIEW
except
ImportError
:
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
typing
import
Any
,
Callable
,
cast
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
,
Optional
__all__
=
(
'PreprocessorFunc'
,
'HistoryRecord'
,
...
...
DHParser/pstringview.py
0 → 100644
View file @
f27dba11
"""cstringview.pyx - a cython-version of the stringview class for speedup
slicing strings without copying
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
StringView provides string-slicing without copying.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
import
collections
from
typing
import
Optional
,
Iterable
,
Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
def
pack_index
(
index
,
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
def
real_indices
(
begin
,
end
,
len
):
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack_index
(
begin
,
len
),
pack_index
(
end
,
len
)
class
StringView
(
collections
.
abc
.
Sized
):
""""
A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
0
# type: int
self
.
end
=
0
# type: int
self
.
begin
,
self
.
end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
self
.
fullstring_flag
=
(
self
.
begin
==
0
and
self
.
len
==
len
(
self
.
text
))
def
__bool__
(
self
):
return
self
.
end
>
self
.
begin
# and bool(self.text)
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
if
self
.
fullstring_flag
:
# optimization: avoid slicing/copying
return
self
.
text
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
self
.
text
=
self
.
text
[
self
.
begin
:
self
.
end
]
self
.
begin
=
0
self
.
len
=
len
(
self
.
text
)
self
.
end
=
self
.
len
self
.
fullstring_flag
=
True
return
self
.
text
def
__eq__
(
self
,
other
):
return
len
(
other
)
==
len
(
self
)
and
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
__hash__
(
self
):
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string-slice
def
__add__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
str
(
self
)
+
other
)
else
:
return
StringView
(
str
(
self
)
+
str
(
other
))
def
__radd__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
other
+
str
(
self
))
else
:
return
StringView
(
str
(
other
)
+
str
(
self
))
def
__getitem__
(
self
,
index
):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
start
,
stop
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
count
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
count
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
count
(
sub
,
self
.
begin
,
self
.
end
)
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
text
.
rfind
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
rfind
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
match
(
self
,
regex
):
return
regex
.
match
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
index
(
self
,
absolute_index
:
int
)
->
int
:
"""
Converts an index for a string watched by a StringView object
to an index relative to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv.match(re.compile('I'))
>>> match.end()
3
>>> sv.index(match.end())
1
"""
return
absolute_index
-
self
.
begin
def
indices
(
self
,
absolute_indices
:
Iterable
[
int
])
->
Tuple
[
int
,
...]:
"""Converts indices for a string watched by a StringView object
to indices relative to the string view object. See also: `sv_index()`
"""
return
tuple
(
index
-
self
.
begin
for
index
in
absolute_indices
)
def
search
(
self
,
regex
):
return
regex
.
search
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
strip
(
self
):
if
self
.
fullstring_flag
:
return
self
.
text
.
strip
()
else
:
begin
=
self
.
begin
end
=
self
.
end
while
begin
<
end
and
self
.
text
[
begin
]
in
'
\n\t
'
:
begin
+=
1
while
end
>
begin
and
self
.
text
[
end
]
in
'
\n\t
'
:
end
-=
1
return
self
.
text
[
begin
:
end
]
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
def
split
(
self
,
sep
=
None
):
if
self
.
fullstring_flag
:
return
self
.
text
.
split
(
sep
)
else
:
pieces
=
[]
l
=
len
(
sep
)
k
=
0
i
=
self
.
find
(
sep
,
k
)
while
i
>=
0
:
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
begin
+
i
])
k
=
i
+
l
i
=
self
.
find
(
sep
,
k
)
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
end
])
return
pieces
# return str(self).split(sep, maxsplit) # PERFORMANCE WARNING: This creates a copy of the string
EMPTY_STRING_VIEW
=
StringView
(
''
)
DHParser/stringview.py
View file @
f27dba11
"""stringview.py - a stringview class: slicing strings without copying
(This module merely passes through the Python or Cython version of
string views. The real implementations are to be found in the
pstringview.py and cstringview.pyx modules, respectively.)
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
...
...
@@ -27,114 +30,9 @@ from typing import Optional, Iterable, Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
try
:
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
,
EMPTY_STRING_VIEW
except
ImportError
:
from
DHParser.pstringview
import
StringView
,
EMPTY_STRING_VIEW
def
pack_index
(
index
,
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
def
real_indices
(
begin
,
end
,
len
):
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack_index
(
begin
,
len
),
pack_index
(
end
,
len
)
class
StringView
(
collections
.
abc
.
Sized
):
""""
A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
0
# type: int
self
.
end
=
0
# type: int
self
.
begin
,
self
.
end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
self
.
fullstring_flag
=
(
self
.
begin
==
0
and
self
.
len
==
len
(
self
.
text
))
def
__bool__
(
self
):
return
bool
(
self
.
text
)
and
self
.
end
>
self
.
begin
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
if
self
.
fullstring_flag
:
# optimization: avoid slicing/copying
return
self
.
text
return
self
.
text
[
self
.
begin
:
self
.
end
]
def
__getitem__
(
self
,
index
):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
start
,
stop
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
count
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
count
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
count
(
sub
,
self
.
begin
,
self
.
end
)
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
text
.
rfind
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)