Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
a471a1e0
Commit
a471a1e0
authored
Aug 31, 2017
by
di68kap
Browse files
- parsers.py: string slicing replaces by toolkit.StringView
parent
481891e3
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/parser.py
View file @
a471a1e0
...
...
@@ -77,7 +77,8 @@ except ImportError:
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
ParserBase
,
\
Node
,
TransformationFunc
from
DHParser.toolkit
import
TextView
,
load_if_file
,
error_messages
,
line_col
from
DHParser.toolkit
import
StringView
,
EMPTY_STRING_VIEW
,
sv_match
,
sv_index
,
sv_search
,
\
load_if_file
,
error_messages
,
line_col
__all__
=
(
'PreprocessorFunc'
,
'HistoryRecord'
,
...
...
@@ -161,7 +162,7 @@ class HistoryRecord:
# type: List['Parser']
self
.
node
=
node
# type: Node
self
.
remaining
=
remaining
# type: int
document
=
call_stack
[
-
1
].
grammar
.
document__
if
call_stack
else
''
document
=
call_stack
[
-
1
].
grammar
.
document__
.
text
if
call_stack
else
''
self
.
line_col
=
line_col
(
document
,
len
(
document
)
-
remaining
)
# type: Tuple[int, int]
def
__str__
(
self
):
...
...
@@ -229,11 +230,13 @@ def add_parser_guard(parser_func):
that takes care of memoizing, left recursion and optionally tracing
(aka "history tracking") of parser calls. Returns the wrapped call.
"""
def
guarded_call
(
parser
:
'Parser'
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
def
guarded_call
(
parser
:
'Parser'
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
assert
isinstance
(
text
,
StringView
)
def
memoized
(
parser
,
location
):
node
=
parser
.
visited
[
location
]
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rest
=
TextView
(
grammar
.
document__
,
-
rlen
)
if
rlen
else
''
rest
=
grammar
.
document__
[
-
rlen
:]
if
rlen
else
EMPTY_STRING_VIEW
return
node
,
rest
# NOTE: An older and simpler implementation of memoization
# relied on `parser.visited[location] == node, rest`. Although,
...
...
@@ -267,6 +270,7 @@ def add_parser_guard(parser_func):
# run original __call__ method
node
,
rest
=
parser_func
(
parser
,
text
)
assert
isinstance
(
rest
,
StringView
)
if
node
is
None
:
# retrieve an earlier match result (from left recursion) if it exists
...
...
@@ -302,7 +306,7 @@ def add_parser_guard(parser_func):
node
=
Node
(
None
,
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))]
+
" ..."
)
node
.
add_error
(
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
rest
=
''
rest
=
EMPTY_STRING_VIEW
return
node
,
rest
...
...
@@ -409,7 +413,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
self
.
cycle_detection
=
set
()
# type: Set[Callable]
return
self
def
__call__
(
self
,
text
:
Text
View
)
->
Tuple
[
Node
,
Text
View
]:
def
__call__
(
self
,
text
:
String
View
)
->
Tuple
[
Node
,
String
View
]:
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the matching string."""
...
...
@@ -724,8 +728,8 @@ class Grammar:
def
_reset__
(
self
):
self
.
document__
=
""
# type:
s
tr
self
.
_reversed__
=
""
# type:
s
tr
self
.
document__
=
EMPTY_STRING_VIEW
# type:
S
tr
ingView
self
.
_reversed__
=
EMPTY_STRING_VIEW
# type:
S
tr
ingView
# variables stored and recalled by Capture and Retrieve parsers
self
.
variables__
=
dict
()
# type: Dict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
...
...
@@ -742,7 +746,7 @@ class Grammar:
@
property
def
reversed__
(
self
)
->
str
:
if
not
self
.
_reversed__
:
self
.
_reversed__
=
self
.
document__
[::
-
1
]
self
.
_reversed__
=
StringView
(
self
.
document__
.
text
[::
-
1
]
)
return
self
.
_reversed__
...
...
@@ -784,13 +788,13 @@ class Grammar:
else
:
self
.
_dirty_flag__
=
True
self
.
history_tracking__
=
is_logging
()
self
.
document__
=
document
self
.
last_rb__loc__
=
len
(
document
)
+
1
# rollback location
self
.
document__
=
StringView
(
document
)
self
.
last_rb__loc__
=
len
(
self
.
document
__
)
+
1
# rollback location
parser
=
self
[
start_parser
]
if
isinstance
(
start_parser
,
str
)
else
start_parser
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a different grammar object!"
\
" %s vs. %s"
%
(
str
(
self
),
str
(
parser
.
grammar
))
stitches
=
[]
# type: List[Node]
rest
=
document
rest
=
self
.
document
__
if
not
rest
:
result
,
ignore
=
parser
(
rest
)
if
result
is
None
:
...
...
@@ -883,7 +887,7 @@ class Grammar:
document.
"""
def
prepare_line
(
record
):
excerpt
=
self
.
document__
.
__getitem__
(
record
.
extent
)[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
self
.
document__
.
text
.
__getitem__
(
record
.
extent
)[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
"'%s'"
%
excerpt
if
len
(
excerpt
)
<
25
else
"'%s...'"
%
excerpt
return
record
.
stack
,
record
.
status
,
excerpt
...
...
@@ -985,7 +989,7 @@ class PreprocessorToken(Parser):
assert
RX_PREPROCESSOR_TOKEN
.
match
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
...
...
@@ -1040,10 +1044,10 @@ class RegExp(Parser):
regexp
=
self
.
regexp
.
pattern
return
RegExp
(
regexp
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
match
=
text
[
0
:
1
]
!=
BEGIN_TOKEN
and
self
.
regexp
.
match
(
text
)
# ESC starts a preprocessor token.
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
match
=
text
[
0
:
1
]
!=
BEGIN_TOKEN
and
sv_match
(
self
.
regexp
,
text
)
# ESC starts a preprocessor token.
if
match
:
end
=
match
.
end
()
end
=
sv_index
(
match
.
end
()
,
text
)
return
Node
(
self
,
text
[:
end
]),
text
[
end
:]
return
None
,
text
...
...
@@ -1114,9 +1118,9 @@ class RE(Parser):
regexp
=
self
.
main
.
regexp
.
pattern
return
self
.
__class__
(
regexp
,
self
.
wL
,
self
.
wR
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
# assert self.main.regexp.pattern != "@"
t
=
text
# type:
s
tr
t
=
text
# type:
S
tr
ingView
wL
,
t
=
self
.
wspLeft
(
t
)
main
,
t
=
self
.
main
(
t
)
if
main
:
...
...
@@ -1264,7 +1268,7 @@ class Optional(UnaryOperator):
"Nesting options with required elements is contradictory: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text
=
self
.
parser
(
text
)
if
node
:
return
Node
(
self
,
node
),
text
...
...
@@ -1289,7 +1293,7 @@ class ZeroOrMore(Optional):
EBNF-Notation: `{ ... }`
EBNF-Example: `sentence = { /\w+,?/ } "."`
"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
n
=
len
(
text
)
+
1
while
text
and
len
(
text
)
<
n
:
...
...
@@ -1314,9 +1318,9 @@ class OneOrMore(UnaryOperator):
"Use ZeroOrMore instead of nesting OneOrMore and Optional: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
text_
=
text
# type:
s
tr
text_
=
text
# type:
S
tr
ingView
n
=
len
(
text
)
+
1
while
text_
and
len
(
text_
)
<
n
:
n
=
len
(
text_
)
...
...
@@ -1340,9 +1344,9 @@ class Series(NaryOperator):
super
(
Series
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
text_
=
text
# type:
s
tr
text_
=
text
# type:
S
tr
ingView
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text_
)
if
not
node
:
...
...
@@ -1400,7 +1404,7 @@ class Alternative(NaryOperator):
assert
all
(
not
isinstance
(
p
,
Optional
)
for
p
in
self
.
parsers
[:
-
1
])
self
.
been_here
=
dict
()
# type: Dict[int, int]
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text
)
if
node
:
...
...
@@ -1447,11 +1451,13 @@ class FlowOperator(UnaryOperator):
class
Required
(
FlowOperator
):
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
RX_ARGUMENT
=
re
.
compile
(
r
'\s(\S)'
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
node
,
text_
=
self
.
parser
(
text
)
if
not
node
:
m
=
re
.
search
(
r
'\s(\S)'
,
text
)
i
=
max
(
1
,
m
.
regs
[
1
][
0
])
if
m
else
1
m
=
sv_search
(
Required
.
RX_ARGUMENT
,
text
)
#
re.search(r'\s(\S)', text)
i
=
max
(
1
,
sv_index
(
m
.
regs
[
1
][
0
]
,
text
)
)
if
m
else
1
node
=
Node
(
self
,
text
[:
i
])
text_
=
text
[
i
:]
# assert False, "*"+text[:i]+"*"
...
...
@@ -1467,7 +1473,7 @@ class Lookahead(FlowOperator):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Lookahead
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text_
=
self
.
parser
(
text
)
if
self
.
sign
(
node
is
not
None
):
return
Node
(
self
,
''
),
text
...
...
@@ -1512,9 +1518,9 @@ class Lookbehind(FlowOperator):
self
.
regexp
=
p
.
main
.
regexp
if
isinstance
(
p
,
RE
)
else
p
.
regexp
super
(
Lookbehind
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
backwards_text
=
self
.
grammar
.
reversed__
[
len
(
text
):]
# self.grammar.document__[-len(text) - 1::-1]
if
self
.
sign
(
self
.
regexp
.
match
(
backwards_text
)):
if
self
.
sign
(
sv_match
(
self
.
regexp
,
backwards_text
)):
return
Node
(
self
,
''
),
text
else
:
return
None
,
text
...
...
@@ -1548,7 +1554,7 @@ class Capture(UnaryOperator):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text_
=
self
.
parser
(
text
)
if
node
:
stack
=
self
.
grammar
.
variables__
.
setdefault
(
self
.
name
,
[])
...
...
@@ -1590,13 +1596,13 @@ class Retrieve(Parser):
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
filter
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
return
self
.
call
(
text
)
# allow call method to be called from subclass circumventing the parser guard
def
__repr__
(
self
):
return
':'
+
self
.
symbol
.
repr
def
call
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
call
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
try
:
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
...
...
@@ -1612,7 +1618,7 @@ class Retrieve(Parser):
class
Pop
(
Retrieve
):
"""STILL EXPERIMENTAL!!!"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
nd
,
txt
=
super
(
Pop
,
self
).
call
(
text
)
# call() instead of __call__() to avoid parser guard
if
nd
and
not
nd
.
error_flag
:
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
...
...
@@ -1644,7 +1650,7 @@ class Synonym(UnaryOperator):
class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text
=
self
.
parser
(
text
)
if
node
:
return
Node
(
self
,
node
),
text
...
...
@@ -1684,7 +1690,7 @@ class Forward(Parser):
duplicate
.
set
(
parser
)
return
duplicate
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
return
self
.
parser
(
text
)
def
__repr__
(
self
):
...
...
DHParser/syntaxtree.py
View file @
a471a1e0
...
...
@@ -31,7 +31,7 @@ except ImportError:
from
.typing34
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
from
DHParser.toolkit
import
is_logging
,
log_dir
,
Text
View
,
line_col
,
identity
from
DHParser.toolkit
import
is_logging
,
log_dir
,
String
View
,
line_col
,
identity
__all__
=
(
'WHITESPACE_PTYPE'
,
'MockParser'
,
...
...
@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser()
Error
=
NamedTuple
(
'Error'
,
[(
'pos'
,
int
),
(
'msg'
,
str
)])
ChildrenType
=
Tuple
[
'Node'
,
...]
StrictResultType
=
Union
[
ChildrenType
,
Text
View
,
str
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
Text
View
,
str
,
None
]
StrictResultType
=
Union
[
ChildrenType
,
String
View
,
str
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
String
View
,
str
,
None
]
def
flatten_sxpr
(
sxpr
:
str
)
->
str
:
...
...
@@ -189,6 +189,7 @@ class Node:
__slots__
=
[
'_result'
,
'children'
,
'_errors'
,
'_len'
,
'_pos'
,
'parser'
,
'error_flag'
]
def
__init__
(
self
,
parser
,
result
:
ResultType
)
->
None
:
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
...
...
@@ -251,7 +252,7 @@ class Node:
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
str
(
result
)
\
if
isinstance
(
result
,
Text
View
)
else
result
or
''
# type: StrictResultType
if
isinstance
(
result
,
String
View
)
else
result
or
''
# type: StrictResultType
self
.
children
=
cast
(
ChildrenType
,
self
.
_result
)
\
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
# type: ChildrenType
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
children
)
# type: bool
...
...
DHParser/toolkit.py
View file @
a471a1e0
...
...
@@ -43,14 +43,18 @@ except ImportError:
import
sys
try
:
from
typing
import
Any
,
List
,
Tuple
,
Optional
from
typing
import
Any
,
List
,
Tuple
,
Collection
,
Union
,
Optional
except
ImportError
:
from
.typing34
import
Any
,
List
,
Tuple
,
Optional
from
.typing34
import
Any
,
List
,
Tuple
,
Collection
,
Union
,
Optional
__all__
=
(
'logging'
,
'is_logging'
,
'log_dir'
,
'logfile_basename'
,
'StringView'
,
'sv_match'
,
'sv_index'
,
'sv_search'
,
# 'supress_warnings',
# 'warnings',
# 'repr_call',
...
...
@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os
.
rmdir
(
log_dirname
)
class
TextView
:
__slots__
=
[
'text'
,
'begin'
,
'end'
]
class
StringView
:
""""A rudimentary StringView class, just enough for the use cases
in parswer.py.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
begin
or
0
# type: int # TODO: Negative Values!!!
self
.
end
=
end
or
len
(
text
)
# type: int
self
.
begin
,
self
.
end
=
StringView
.
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
@
staticmethod
def
real_indices
(
begin
,
end
,
len
):
def
pack
(
index
,
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack
(
begin
,
len
),
pack
(
end
,
len
)
def
__bool__
(
self
):
return
bool
(
self
.
text
)
and
self
.
end
>
self
.
begin
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
return
self
.
text
[
self
.
begin
:
self
.
end
]
def
__getitem__
(
self
,
index
):
assert
isinstance
(
index
,
slice
),
"Minimal implementation of TextView just allows slicing."
start
=
index
.
start
or
0
stop
=
index
.
stop
or
(
self
.
end
-
self
.
begin
)
return
TextView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
assert
isinstance
(
index
,
slice
),
"As of now, StringView only allows slicing."
assert
index
.
step
is
None
or
index
.
step
==
1
,
\
"Step sizes other than 1 are not yet supported by StringView"
start
,
stop
=
StringView
.
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
StringView
.
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
sv_match
(
regex
,
sv
:
StringView
):
return
regex
.
match
(
sv
.
text
,
pos
=
sv
.
begin
,
endpos
=
sv
.
end
)
def
sv_index
(
absolute_index
:
Union
[
int
,
Collection
],
sv
:
StringView
)
->
Union
[
int
,
tuple
]:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv_match(re.compile('I'), sv)
>>> match.end()
3
>>> sv_index(match.end(), sv)
1
"""
try
:
return
absolute_index
-
sv
.
begin
except
TypeError
:
return
tuple
(
index
-
sv
.
begin
for
index
in
absolute_index
)
def
sv_search
(
regex
,
sv
:
StringView
):
return
regex
.
search
(
sv
.
text
,
pos
=
sv
.
begin
,
endpos
=
sv
.
end
)
EMPTY_STRING_VIEW
=
StringView
(
''
)
# def repr_call(f, parameter_list) -> str:
...
...
examples/LaTeX/tst_LaTeX_docs.py
View file @
a471a1e0
...
...
@@ -49,7 +49,7 @@ def fail_on_error(src, result):
sys
.
exit
(
1
)
def
t
e
st
():
def
tst
_func
():
with
toolkit
.
logging
(
False
):
files
=
os
.
listdir
(
'testdata'
)
files
.
sort
()
...
...
@@ -87,7 +87,8 @@ def mem_profile(func):
print
(
stat
)
if
__name__
==
"__main__"
:
cpu_profile
(
test
)
cpu_profile
(
tst_func
)
test/test_DHParser.py
View file @
a471a1e0
...
...
@@ -26,4 +26,5 @@ sys.path.extend(['../', './'])
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
\ No newline at end of file
runner
(
""
,
globals
())
test/test_parser.py
View file @
a471a1e0
...
...
@@ -24,7 +24,7 @@ from functools import partial
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.toolkit
import
is_logging
,
logging
,
compile_python_object
from
DHParser.toolkit
import
is_logging
,
logging
,
StringView
,
compile_python_object
from
DHParser.parser
import
compile_source
,
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
...
...
@@ -152,7 +152,7 @@ class TestRegex:
assert
result
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'abc+def'
)
node
,
rest
=
parser
.
regex
(
StringView
(
'abc+def'
)
)
assert
rest
==
''
assert
node
.
parser
.
name
==
"regex"
assert
str
(
node
)
==
'abc+def'
...
...
test/test_syntaxtree.py
View file @
a471a1e0
...
...
@@ -97,6 +97,7 @@ class TestNode:
transform
=
get_ebnf_transformer
()
compiler
=
get_ebnf_compiler
()
tree
=
parser
(
ebnf
)
print
(
tree
.
as_sxpr
())
tree_copy
=
copy
.
deepcopy
(
tree
)
transform
(
tree_copy
)
res1
=
compiler
(
tree_copy
)
...
...
test/test_toolkit.py
View file @
a471a1e0
...
...
@@ -23,9 +23,112 @@ limitations under the License.
import
concurrent.futures
import
os
import
sys
try
:
import
regex
as
re
except
ImportError
:
import
re
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.toolkit
import
load_if_file
,
logging
,
log_dir
,
is_logging
from
DHParser.toolkit
import
load_if_file
,
logging
,
log_dir
,
is_logging
,
StringView
,
\
sv_match
,
sv_search
,
EMPTY_STRING_VIEW
class
TestStringView
:
def
test_real_indices
(
self
):
assert
StringView
.
real_indices
(
3
,
5
,
10
)
==
(
3
,
5
)
assert
StringView
.
real_indices
(
None
,
None
,
10
)
==
(
0
,
10
)
assert
StringView
.
real_indices
(
-
2
,
-
1
,
10
)
==
(
8
,
9
)
assert
StringView
.
real_indices
(
-
3
,
11
,
10
)
==
(
7
,
10
)
assert
StringView
.
real_indices
(
-
5
,
-
12
,
10
)
==
(
5
,
0
)
assert
StringView
.
real_indices
(
-
12
,
-
5
,
10
)
==
(
0
,
5
)
assert
StringView
.
real_indices
(
7
,
6
,
10
)
==
(
7
,
6
)
assert
StringView
.
real_indices
(
None
,
0
,
10
)
==
(
0
,
0
)
def
test_creation
(
self
):
s
=
"0123456789"
assert
str
(
StringView
(
s
))
==
s
assert
str
(
StringView
(
s
,
3
,
4
))
==
'3'
assert
str
(
StringView
(
s
,
-
4
))
==
'6789'
def
test_equality
(
self
):
s
=
"0123456789"
assert
StringView
(
s
)
==
s
assert
StringView
(
s
,
3
,
4
)
==
'3'
assert
StringView
(
s
,
-
4
)
==
'6789'
def
test_slicing
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
==
'0123456789'
assert
sv
[
3
:
4
]
==
'3'
assert
sv
[
-
3
:
-
1
]
==
'78'
assert
sv
[
4
:
3
]
==
''
assert
sv
[:
4
]
==
'0123'
assert
sv
[
4
:]
==
'456789'
assert
sv
[
-
2
:]
==
'89'
assert
sv
[:
-
5
]
==
'01234'
assert
isinstance
(
sv
[
3
:
5
],
StringView
)
def
test_len
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
len
(
sv
)
==
10
assert
sv
.
len
==
10
assert
len
(
sv
[
5
:
5
])
==
0
assert
len
(
sv
[
7
:
4
])
==
0
assert
len
(
sv
[
-
12
:
-
2
])
==
8
assert
len
(
sv
[
-
12
:
12
])
==
10
def
test_bool
(
self
):
assert
not
StringView
(
''
)
assert
StringView
(
'x'
)
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
not
sv
[
5
:
4
]
assert
sv
[
4
:
5
],
str
(
sv
[
4
:
5
])
assert
not
sv
[
3
:
3
]
assert
not
sv
[
12
:
13
]
assert
sv
[
0
:
20
]
def
test_sv_match
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv_match
(
re
.
compile
(
r
'\d'
),
sv
)
assert
sv_match
(
re
.
compile
(
r
'\d+'
),
sv
)
assert
not
sv_match
(
re
.
compile
(
r
' '
),
sv
)
assert
sv_match
(
re
.
compile
(
r
'45'
),
sv
[
4
:])
def
test_sv_search
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv_search
(
re
.
compile
(
r
'5'
),
sv
)
assert
not
sv_search
(
re
.
compile
(
r
' '
),
sv
)
assert
sv_search
(
re
.
compile
(
r
'5'
),
sv
[
5
:])
assert
not
sv_search
(
re
.
compile
(
r
'9'
),
sv
[:
9
])
def
test_find
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
find
(
'5'
)
==
5
assert
sv
.
find
(
' '
)
<
0
assert
sv
.
find
(
'0'
,
1
)
<
0
assert
sv
.
find
(
'9'
,
0
,
8
)
<
0
assert
sv
.
find
(
'45'
,
1
,
8
)
==
4
def
test_startswith
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
startswith
(
'012'
)
assert
sv
.
startswith
(
'123'
,
1
)
assert
not
sv
.
startswith
(
'123'
,
1
,
3
)
def
test_EMPTY_STRING_VIEW
(
self
):
assert
len
(
EMPTY_STRING_VIEW
)
==
0
assert
EMPTY_STRING_VIEW
.
find
(
'x'
)
<
0
assert
not
sv_match
(
re
.
compile
(
r
'x'
),
EMPTY_STRING_VIEW
)
assert
sv_match
(
re
.
compile
(
r
'.*'
),
EMPTY_STRING_VIEW
)
assert
len
(
EMPTY_STRING_VIEW
[
0
:
1
])
==
0