Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
e90ff942
Commit
e90ff942
authored
Feb 19, 2019
by
eckhart
Browse files
- README.txt updated
parent
cf39d143
Changes
6
Hide whitespace changes
Inline
Side-by-side
DHParser/VERALTET/cstringview.pyx
deleted
100644 → 0
View file @
cf39d143
"""cstringview.pyx - a cython-version of the stringview class for speedup
slicing strings without copying
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
StringView provides string-slicing without copying.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
import
collections
from
typing
import
Optional
,
Iterable
,
Tuple
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
cdef
inline
int
pack_index
(
int
index
,
int
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
cdef
real_indices
(
begin
,
end
,
int
len
):
cdef
int
begin_i
=
0
if
begin
is
None
else
begin
cdef
int
end_i
=
len
if
end
is
None
else
end
return
pack_index
(
begin_i
,
len
),
pack_index
(
end_i
,
len
)
class
StringView
(
collections
.
abc
.
Sized
):
""""
A rudimentary StringView class, just enough for the use cases
in parser.py. The difference between a StringView and the python
builtin strings is that StringView-objects do slicing without
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
self
.
begin
,
self
.
end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
self
.
fullstring_flag
=
(
self
.
begin
==
0
and
self
.
len
==
len
(
self
.
text
))
def
__bool__
(
self
):
return
self
.
end
>
self
.
begin
# and bool(self.text)
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
if
self
.
fullstring_flag
:
# optimization: avoid slicing/copying
return
self
.
text
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
self
.
text
=
self
.
text
[
self
.
begin
:
self
.
end
]
self
.
begin
=
0
self
.
len
=
len
(
self
.
text
)
self
.
end
=
self
.
len
self
.
fullstring_flag
=
True
return
self
.
text
def
__eq__
(
self
,
other
):
return
len
(
other
)
==
len
(
self
)
and
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
__hash__
(
self
):
return
hash
(
str
(
self
))
# PERFORMANCE WARNING: This creates a copy of the string-slice
def
__add__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
str
(
self
)
+
other
)
else
:
return
StringView
(
str
(
self
)
+
str
(
other
))
def
__radd__
(
self
,
other
):
if
isinstance
(
other
,
str
):
return
(
other
+
str
(
self
))
else
:
return
StringView
(
str
(
other
)
+
str
(
self
))
def
__getitem__
(
self
,
index
):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
start
,
stop
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
count
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
count
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
count
(
sub
,
self
.
begin
,
self
.
end
)
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
self
.
fullstring_flag
:
return
self
.
text
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
text
.
rfind
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
rfind
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
match
(
self
,
regex
):
return
regex
.
match
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
index
(
self
,
absolute_index
:
int
)
->
int
:
"""
Converts an index for a string watched by a StringView object
to an index relative to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv.match(re.compile('I'))
>>> match.end()
3
>>> sv.index(match.end())
1
"""
return
absolute_index
-
self
.
begin
def
indices
(
self
,
absolute_indices
:
Iterable
[
int
])
->
Tuple
[
int
,
...]:
"""Converts indices for a string watched by a StringView object
to indices relative to the string view object. See also: `sv_index()`
"""
return
tuple
(
index
-
self
.
begin
for
index
in
absolute_indices
)
def
search
(
self
,
regex
):
return
regex
.
search
(
self
.
text
,
pos
=
self
.
begin
,
endpos
=
self
.
end
)
def
strip
(
self
):
cdef
int
begin
,
end
if
self
.
fullstring_flag
:
return
self
.
text
.
strip
()
else
:
begin
=
self
.
begin
end
=
self
.
end
while
begin
<
end
and
self
.
text
[
begin
]
in
'
\n\t
'
:
begin
+=
1
while
end
>
begin
and
self
.
text
[
end
]
in
'
\n\t
'
:
end
-=
1
return
self
.
text
[
begin
:
end
]
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
def
split
(
self
,
sep
=
None
):
cdef
int
i
,
k
,
l
if
self
.
fullstring_flag
:
return
self
.
text
.
split
(
sep
)
else
:
pieces
=
[]
l
=
len
(
sep
)
k
=
0
i
=
self
.
find
(
sep
,
k
)
while
i
>=
0
:
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
begin
+
i
])
k
=
i
+
l
i
=
self
.
find
(
sep
,
k
)
pieces
.
append
(
self
.
text
[
self
.
begin
+
k
:
self
.
end
])
return
pieces
# return str(self).split(sep, maxsplit) # PERFORMANCE WARNING: This creates a copy of the string
EMPTY_STRING_VIEW
=
StringView
(
''
)
DHParser/VERALTET/foreign_cython.py
deleted
100644 → 0
View file @
cf39d143
# This module has been shamelessly stolen from the Cython Python
# to C compiler (https://cython.org) so that DHParser does not
# cause an import error on machines where Cython is not installed
# The copyrigh to Cython is held by Greg Ewing. It is licensed
# under the Apache 2.0 licnese. (see cython.org for mor information
# cython.* namespace for pure mode.
from
__future__
import
absolute_import
__version__
=
"0.27"
try
:
from
__builtin__
import
basestring
except
ImportError
:
basestring
=
str
# BEGIN shameless copy from Cython/minivect/minitypes.py
class
_ArrayType
(
object
):
is_array
=
True
subtypes
=
[
'dtype'
]
def
__init__
(
self
,
dtype
,
ndim
,
is_c_contig
=
False
,
is_f_contig
=
False
,
inner_contig
=
False
,
broadcasting
=
None
):
self
.
dtype
=
dtype
self
.
ndim
=
ndim
self
.
is_c_contig
=
is_c_contig
self
.
is_f_contig
=
is_f_contig
self
.
inner_contig
=
inner_contig
or
is_c_contig
or
is_f_contig
self
.
broadcasting
=
broadcasting
def
__repr__
(
self
):
axes
=
[
":"
]
*
self
.
ndim
if
self
.
is_c_contig
:
axes
[
-
1
]
=
"::1"
elif
self
.
is_f_contig
:
axes
[
0
]
=
"::1"
return
"%s[%s]"
%
(
self
.
dtype
,
", "
.
join
(
axes
))
def
index_type
(
base_type
,
item
):
"""
Support array type creation by slicing, e.g. double[:, :] specifies
a 2D strided array of doubles. The syntax is the same as for
Cython memoryviews.
"""
class
InvalidTypeSpecification
(
Exception
):
pass
def
verify_slice
(
s
):
if
s
.
start
or
s
.
stop
or
s
.
step
not
in
(
None
,
1
):
raise
InvalidTypeSpecification
(
"Only a step of 1 may be provided to indicate C or "
"Fortran contiguity"
)
if
isinstance
(
item
,
tuple
):
step_idx
=
None
for
idx
,
s
in
enumerate
(
item
):
verify_slice
(
s
)
if
s
.
step
and
(
step_idx
or
idx
not
in
(
0
,
len
(
item
)
-
1
)):
raise
InvalidTypeSpecification
(
"Step may only be provided once, and only in the "
"first or last dimension."
)
if
s
.
step
==
1
:
step_idx
=
idx
return
_ArrayType
(
base_type
,
len
(
item
),
is_c_contig
=
step_idx
==
len
(
item
)
-
1
,
is_f_contig
=
step_idx
==
0
)
elif
isinstance
(
item
,
slice
):
verify_slice
(
item
)
return
_ArrayType
(
base_type
,
1
,
is_c_contig
=
bool
(
item
.
step
))
else
:
# int[8] etc.
assert
int
(
item
)
==
item
# array size must be a plain integer
array
(
base_type
,
item
)
# END shameless copy
compiled
=
False
_Unspecified
=
object
()
# Function decorators
def
_empty_decorator
(
x
):
return
x
def
locals
(
**
arg_types
):
return
_empty_decorator
def
test_assert_path_exists
(
*
paths
):
return
_empty_decorator
def
test_fail_if_path_exists
(
*
paths
):
return
_empty_decorator
class
_EmptyDecoratorAndManager
(
object
):
def
__call__
(
self
,
x
):
return
x
def
__enter__
(
self
):
pass
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
pass
class
_Optimization
(
object
):
pass
cclass
=
ccall
=
cfunc
=
_EmptyDecoratorAndManager
()
returns
=
wraparound
=
boundscheck
=
initializedcheck
=
nonecheck
=
\
overflowcheck
=
embedsignature
=
cdivision
=
cdivision_warnings
=
\
always_allows_keywords
=
profile
=
linetrace
=
infer_type
=
\
unraisable_tracebacks
=
freelist
=
\
lambda
_
:
_EmptyDecoratorAndManager
()
exceptval
=
lambda
_
=
None
,
check
=
True
:
_EmptyDecoratorAndManager
()
optimization
=
_Optimization
()
overflowcheck
.
fold
=
optimization
.
use_switch
=
\
optimization
.
unpack_method_calls
=
lambda
arg
:
_EmptyDecoratorAndManager
()
final
=
internal
=
type_version_tag
=
no_gc_clear
=
no_gc
=
_empty_decorator
_cython_inline
=
None
def
inline
(
f
,
*
args
,
**
kwds
):
if
isinstance
(
f
,
basestring
):
global
_cython_inline
if
_cython_inline
is
None
:
from
Cython.Build.Inline
import
cython_inline
as
_cython_inline
return
_cython_inline
(
f
,
*
args
,
**
kwds
)
else
:
assert
len
(
args
)
==
len
(
kwds
)
==
0
return
f
def
compile
(
f
):
from
Cython.Build.Inline
import
RuntimeCompiledFunction
return
RuntimeCompiledFunction
(
f
)
# Special functions
def
cdiv
(
a
,
b
):
q
=
a
/
b
if
q
<
0
:
q
+=
1
return
q
def
cmod
(
a
,
b
):
r
=
a
%
b
if
(
a
*
b
)
<
0
:
r
-=
b
return
r
# Emulated language constructs
def
cast
(
type
,
*
args
,
**
kwargs
):
kwargs
.
pop
(
'typecheck'
,
None
)
assert
not
kwargs
if
hasattr
(
type
,
'__call__'
):
return
type
(
*
args
)
else
:
return
args
[
0
]
def
sizeof
(
arg
):
return
1
def
typeof
(
arg
):
return
arg
.
__class__
.
__name__
# return type(arg)
def
address
(
arg
):
return
pointer
(
type
(
arg
))([
arg
])
def
declare
(
type
=
None
,
value
=
_Unspecified
,
**
kwds
):
if
type
not
in
(
None
,
object
)
and
hasattr
(
type
,
'__call__'
):
if
value
is
not
_Unspecified
:
return
type
(
value
)
else
:
return
type
()
else
:
return
value
class
_nogil
(
object
):
"""Support for 'with nogil' statement
"""
def
__enter__
(
self
):
pass
def
__exit__
(
self
,
exc_class
,
exc
,
tb
):
return
exc_class
is
None
nogil
=
_nogil
()
gil
=
_nogil
()
del
_nogil
# Emulated types
class
CythonMetaType
(
type
):
def
__getitem__
(
type
,
ix
):
return
array
(
type
,
ix
)
CythonTypeObject
=
CythonMetaType
(
'CythonTypeObject'
,
(
object
,),
{})
class
CythonType
(
CythonTypeObject
):
def
_pointer
(
self
,
n
=
1
):
for
i
in
range
(
n
):
self
=
pointer
(
self
)
return
self
class
PointerType
(
CythonType
):
def
__init__
(
self
,
value
=
None
):
if
isinstance
(
value
,
(
ArrayType
,
PointerType
)):
self
.
_items
=
[
cast
(
self
.
_basetype
,
a
)
for
a
in
value
.
_items
]
elif
isinstance
(
value
,
list
):
self
.
_items
=
[
cast
(
self
.
_basetype
,
a
)
for
a
in
value
]
elif
value
is
None
or
value
==
0
:
self
.
_items
=
[]
else
:
raise
ValueError
def
__getitem__
(
self
,
ix
):
if
ix
<
0
:
raise
IndexError
(
"negative indexing not allowed in C"
)
return
self
.
_items
[
ix
]
def
__setitem__
(
self
,
ix
,
value
):
if
ix
<
0
:
raise
IndexError
(
"negative indexing not allowed in C"
)
self
.
_items
[
ix
]
=
cast
(
self
.
_basetype
,
value
)
def
__eq__
(
self
,
value
):
if
value
is
None
and
not
self
.
_items
:
return
True
elif
type
(
self
)
!=
type
(
value
):
return
False
else
:
return
not
self
.
_items
and
not
value
.
_items
def
__repr__
(
self
):
return
"%s *"
%
(
self
.
_basetype
,)
class
ArrayType
(
PointerType
):
def
__init__
(
self
):
self
.
_items
=
[
None
]
*
self
.
_n
class
StructType
(
CythonType
):
def
__init__
(
self
,
cast_from
=
_Unspecified
,
**
data
):
if
cast_from
is
not
_Unspecified
:
# do cast
if
len
(
data
)
>
0
:
raise
ValueError
(
'Cannot accept keyword arguments when casting.'
)
if
type
(
cast_from
)
is
not
type
(
self
):
raise
ValueError
(
'Cannot cast from %s'
%
cast_from
)
for
key
,
value
in
cast_from
.
__dict__
.
items
():
setattr
(
self
,
key
,
value
)
else
:
for
key
,
value
in
data
.
items
():
setattr
(
self
,
key
,
value
)
def
__setattr__
(
self
,
key
,
value
):
if
key
in
self
.
_members
:
self
.
__dict__
[
key
]
=
cast
(
self
.
_members
[
key
],
value
)
else
:
raise
AttributeError
(
"Struct has no member '%s'"
%
key
)
class
UnionType
(
CythonType
):
def
__init__
(
self
,
cast_from
=
_Unspecified
,
**
data
):
if
cast_from
is
not
_Unspecified
:
# do type cast
if
len
(
data
)
>
0
:
raise
ValueError
(
'Cannot accept keyword arguments when casting.'
)
if
isinstance
(
cast_from
,
dict
):
datadict
=
cast_from
elif
type
(
cast_from
)
is
type
(
self
):
datadict
=
cast_from
.
__dict__
else
:
raise
ValueError
(
'Cannot cast from %s'
%
cast_from
)
else
:
datadict
=
data
if
len
(
datadict
)
>
1
:
raise
AttributeError
(
"Union can only store one field at a time."
)
for
key
,
value
in
datadict
.
items
():
setattr
(
self
,
key
,
value
)
def
__setattr__
(
self
,
key
,
value
):
if
key
in
'__dict__'
:
CythonType
.
__setattr__
(
self
,
key
,
value
)
elif
key
in
self
.
_members
:
self
.
__dict__
=
{
key
:
cast
(
self
.
_members
[
key
],
value
)}
else
:
raise
AttributeError
(
"Union has no member '%s'"
%
key
)
def
pointer
(
basetype
):
class
PointerInstance
(
PointerType
):
_basetype
=
basetype
return
PointerInstance
def
array
(
basetype
,
n
):
class
ArrayInstance
(
ArrayType
):
_basetype
=
basetype
_n
=
n
return
ArrayInstance
def
struct
(
**
members
):
class
StructInstance
(
StructType
):
_members
=
members
for
key
in
members
:
setattr
(
StructInstance
,
key
,
None
)
return
StructInstance
def
union
(
**
members
):
class
UnionInstance
(
UnionType
):
_members
=
members
for
key
in
members
:
setattr
(
UnionInstance
,
key
,
None
)
return
UnionInstance
class
typedef
(
CythonType
):
def
__init__
(
self
,
type
,
name
=
None
):
self
.
_basetype
=
type
self
.
name
=
name
def
__call__
(
self
,
*
arg
):
value
=
cast
(
self
.
_basetype
,
*
arg
)
return
value
def
__repr__
(
self
):
return
self
.
name
or
str
(
self
.
_basetype
)
__getitem__
=
index_type
class
_FusedType
(
CythonType
):
pass
def
fused_type
(
*
args
):
if
not
args
:
raise
TypeError
(
"Expected at least one type as argument"
)
# Find the numeric type with biggest rank if all types are numeric
rank
=
-
1
for
type
in
args
:
if
type
not
in
(
py_int
,
py_long
,
py_float
,
py_complex
):
break
if
type_ordering
.
index
(
type
)
>
rank
:
result_type
=
type
else
:
return
result_type
# Not a simple numeric type, return a fused type instance. The result
# isn't really meant to be used, as we can't keep track of the context in
# pure-mode. Casting won't do anything in this case.
return
_FusedType
()
def
_specialized_from_args
(
signatures
,
args
,
kwargs
):
"Perhaps this should be implemented in a TreeFragment in Cython code"
raise
Exception
(
"yet to be implemented"
)
py_int
=
typedef
(
int
,
"int"
)
try
:
py_long
=
typedef
(
long
,
"long"
)
except
NameError
:
# Py3
py_long
=
typedef
(
int
,
"long"
)