Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
802fcc4a
Commit
802fcc4a
authored
Sep 24, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- cstringview basic implementation (no optimazazions yet)
parent
87afe62f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
352 additions
and
6 deletions
+352
-6
DHParser/cstringview.pyx
DHParser/cstringview.pyx
+192
-0
DHParser/parser.py
DHParser/parser.py
+5
-1
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+11
-4
setup.py
setup.py
+6
-0
test/test_cstringview.py
test/test_cstringview.py
+133
-0
test/test_parser.py
test/test_parser.py
+5
-1
No files found.
DHParser/cstringview.pyx
0 → 100644
View file @
802fcc4a
"""cstringview.pyx - a cython-version of the stringview class for speedup
slicing strings without copying
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
StringView provides string-slicing without copying.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
import
collections
from
typing
import
Optional
,
Iterable
,
Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
def
pack_index
(
index
,
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
def
real_indices
(
begin
,
end
,
len
):
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack_index
(
begin
,
len
),
pack_index
(
end
,
len
)
class
StringView
(
collections
.
abc
.
Sized
):
""""
A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
0
# type: int
self
.
end
=
0
# type: int
self
.
begin
,
self
.
end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
self
.
fullstring_flag
=
(
self
.
begin
==
0
and
self
.
len
==
len
(
self
.
text
))
def
__bool__
(
self
):
return
self
.
end
>
self
.
begin
# and bool(self.text)
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
if
self
.
fullstring_flag
:
# optimization: avoid slicing/copying
return
self
.
text
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
self
.
text
=
self
.
text
[
self
.
begin
:
self
.
end
]
self
.
begin
=
0
self
.
len
=
len
(
self
.
text
)
self
.
end
=
self
.
len
self
.
fullstring_flag
=
True
return
self
.
text
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
__hash__
(
self
):
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string
def
__add__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
str
(
self
)
+
other
)
else
:
return
StringView
(
str
(
self
)
+
str
(
other
))
def
__radd__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
other
+
str
(
self
))
else
:
return
StringView
(
str
(
other
)
+
str
(
self
))
def
__getitem__
(
self
,
index
):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
start
,
stop
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
count
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
count
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
count
(
sub
,
self
.
begin
,
self
.
end
)
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
text
.
rfind
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
rfind
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
match
(
self
,
regex
):
return
regex
.
match
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
index
(
self
,
absolute_index
:
int
)
->
int
:
"""
Converts an index for a string watched by a StringView object
to an index relative to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv.match(re.compile('I'))
>>> match.end()
3
>>> sv.index(match.end())
1
"""
return
absolute_index
-
self
.
begin
def
indices
(
self
,
absolute_indices
:
Iterable
[
int
])
->
Tuple
[
int
,
...]:
"""Converts indices for a string watched by a StringView object
to indices relative to the string view object. See also: `sv_index()`
"""
return
tuple
(
index
-
self
.
begin
for
index
in
absolute_indices
)
def
search
(
self
,
regex
):
return
regex
.
search
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
strip
(
self
):
if
self
.
fullstring_flag
:
return
self
.
text
.
strip
()
else
:
begin
=
self
.
begin
end
=
self
.
end
while
begin
<
end
and
self
.
text
[
begin
]
in
'
\n\t
'
:
begin
+=
1
while
end
>
begin
and
self
.
text
[
end
]
in
'
\n\t
'
:
end
-=
1
return
self
.
text
[
begin
:
end
]
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
def
split
(
self
,
sep
=
None
):
if
self
.
fullstring_flag
:
return
self
.
text
.
split
(
sep
)
else
:
pieces
=
[]
l
=
len
(
sep
)
k
=
0
i
=
self
.
find
(
sep
,
k
)
while
i
>=
0
:
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
begin
+
i
])
k
=
i
+
l
i
=
self
.
find
(
sep
,
k
)
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
end
])
return
pieces
# return str(self).split(sep, maxsplit) # PERFORMANCE WARNING: This creates a copy of the string
EMPTY_STRING_VIEW
=
StringView
(
''
)
DHParser/parser.py
View file @
802fcc4a
...
...
@@ -75,11 +75,15 @@ except ImportError:
from
.typing34
import
Any
,
Callable
,
cast
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
ZOMBIE_PARSER
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
from
DHParser.toolkit
import
load_if_file
try
:
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
,
EMPTY_STRING_VIEW
except
ImportError
:
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
__all__
=
(
'PreprocessorFunc'
,
'HistoryRecord'
,
...
...
DHParser/syntaxtree.py
View file @
802fcc4a
...
...
@@ -33,8 +33,12 @@ except ImportError:
Iterator
,
Iterable
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
,
Hashable
from
DHParser.toolkit
import
is_logging
,
log_dir
,
identity
from
DHParser.stringview
import
StringView
from
DHParser.error
import
Error
,
linebreaks
,
line_col
try
:
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
except
ImportError
:
from
DHParser.stringview
import
StringView
__all__
=
(
'ParserBase'
,
'WHITESPACE_PTYPE'
,
...
...
@@ -224,7 +228,9 @@ class Node(collections.abc.Sized):
def
__str__
(
self
):
if
self
.
children
:
return
""
.
join
(
str
(
child
)
for
child
in
self
.
children
)
return
str
(
self
.
result
)
elif
isinstance
(
self
.
result
,
StringView
):
self
.
result
=
str
(
self
.
result
)
return
self
.
result
def
__repr__
(
self
):
...
...
@@ -277,8 +283,9 @@ class Node(collections.abc.Sized):
# or isinstance(result, str)), str(result)
# Possible optimization: Do not allow single nodes as argument:
# assert not isinstance(result, Node)
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
str
(
result
)
\
if
isinstance
(
result
,
StringView
)
else
result
or
''
# type: StrictResultType
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
result
or
''
# type: StrictResultType
# self._result = (result,) if isinstance(result, Node) else str(result) \
# if isinstance(result, StringView) else result or '' # type: StrictResultType
self
.
children
=
cast
(
ChildrenType
,
self
.
_result
)
\
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
# type: ChildrenType
if
self
.
children
:
...
...
setup.py
View file @
802fcc4a
#from distutils.core import setup
from
setuptools
import
setup
try
:
from
Cython.Build
import
cythonize
except
ImportError
:
def
cythonize
(
filename
):
return
[]
from
DHParser.versionnumber
import
__version__
...
...
@@ -10,6 +15,7 @@ setup(
name
=
'DHParser'
,
version
=
__version__
,
packages
=
[
'DHParser'
],
ext_modules
=
cythonize
(
'DHParser/cstringview.pyx'
)
url
=
'https://gitlab.lrz.de/badw-it/DHParser'
,
license
=
'MIT License (https://opensource.org/licenses/MIT)'
,
author
=
'Eckhart Arnold'
,
...
...
test/test_cstringview.py
0 → 100644
View file @
802fcc4a
#!/usr/bin/python3
"""test_stringview.py - tests of the stringview-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import
sys
try
:
import
regex
as
re
except
ImportError
:
import
re
sys
.
path
.
extend
([
'../'
,
'./'
])
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
,
EMPTY_STRING_VIEW
,
real_indices
class
TestStringView
:
def
test_real_indices
(
self
):
assert
real_indices
(
3
,
5
,
10
)
==
(
3
,
5
)
assert
real_indices
(
None
,
None
,
10
)
==
(
0
,
10
)
assert
real_indices
(
-
2
,
-
1
,
10
)
==
(
8
,
9
)
assert
real_indices
(
-
3
,
11
,
10
)
==
(
7
,
10
)
assert
real_indices
(
-
5
,
-
12
,
10
)
==
(
5
,
0
)
assert
real_indices
(
-
12
,
-
5
,
10
)
==
(
0
,
5
)
assert
real_indices
(
7
,
6
,
10
)
==
(
7
,
6
)
assert
real_indices
(
None
,
0
,
10
)
==
(
0
,
0
)
def
test_creation
(
self
):
s
=
"0123456789"
assert
str
(
StringView
(
s
))
==
s
assert
str
(
StringView
(
s
,
3
,
4
))
==
'3'
assert
str
(
StringView
(
s
,
-
4
))
==
'6789'
def
test_equality
(
self
):
s
=
"0123456789"
assert
StringView
(
s
)
==
s
assert
StringView
(
s
,
3
,
4
)
==
'3'
assert
StringView
(
s
,
-
4
)
==
'6789'
def
test_slicing
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
==
'0123456789'
assert
sv
[
3
:
4
]
==
'3'
assert
sv
[
-
3
:
-
1
]
==
'78'
assert
sv
[
4
:
3
]
==
''
assert
sv
[:
4
]
==
'0123'
assert
sv
[
4
:]
==
'456789'
assert
sv
[
-
2
:]
==
'89'
assert
sv
[:
-
5
]
==
'01234'
assert
isinstance
(
sv
[
3
:
5
],
StringView
)
def
test_len
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
len
(
sv
)
==
10
assert
sv
.
len
==
10
assert
len
(
sv
[
5
:
5
])
==
0
assert
len
(
sv
[
7
:
4
])
==
0
assert
len
(
sv
[
-
12
:
-
2
])
==
8
assert
len
(
sv
[
-
12
:
12
])
==
10
def
test_bool
(
self
):
assert
not
StringView
(
''
)
assert
StringView
(
'x'
)
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
not
sv
[
5
:
4
]
assert
sv
[
4
:
5
],
str
(
sv
[
4
:
5
])
assert
not
sv
[
3
:
3
]
assert
not
sv
[
12
:
13
]
assert
sv
[
0
:
20
]
def
test_sv_match
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
match
(
re
.
compile
(
r'\d'
))
assert
sv
.
match
(
re
.
compile
(
r'\d+'
))
assert
not
sv
.
match
(
re
.
compile
(
r' '
))
assert
sv
[
4
:].
match
(
re
.
compile
(
r'45'
))
def
test_sv_search
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
search
(
re
.
compile
(
r'5'
))
assert
not
sv
.
search
(
re
.
compile
(
r' '
))
assert
sv
[
5
:].
search
(
re
.
compile
(
r'5'
))
assert
not
sv
[:
9
].
search
(
re
.
compile
(
r'9'
))
def
test_find
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
find
(
'5'
)
==
5
assert
sv
.
find
(
' '
)
<
0
assert
sv
.
find
(
'0'
,
1
)
<
0
assert
sv
.
find
(
'9'
,
0
,
8
)
<
0
assert
sv
.
find
(
'45'
,
1
,
8
)
==
4
def
test_startswith
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
startswith
(
'012'
)
assert
sv
.
startswith
(
'123'
,
1
)
assert
not
sv
.
startswith
(
'123'
,
1
,
3
)
def
test_EMPTY_STRING_VIEW
(
self
):
assert
len
(
EMPTY_STRING_VIEW
)
==
0
assert
EMPTY_STRING_VIEW
.
find
(
'x'
)
<
0
assert
not
EMPTY_STRING_VIEW
.
match
(
re
.
compile
(
r'x'
))
assert
EMPTY_STRING_VIEW
.
match
(
re
.
compile
(
r'.*'
))
assert
len
(
EMPTY_STRING_VIEW
[
0
:
1
])
==
0
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
test/test_parser.py
View file @
802fcc4a
...
...
@@ -25,12 +25,16 @@ from functools import partial
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.toolkit
import
is_logging
,
logging
,
compile_python_object
from
DHParser.stringview
import
StringView
from
DHParser.error
import
Error
from
DHParser.parser
import
compile_source
,
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
try
:
import
pyximport
;
pyximport
.
install
()
from
DHParser.cstringview
import
StringView
except
ImportError
:
from
DHParser.stringview
import
StringView
class
TestInfiLoopsAndRecursion
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment