Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
a471a1e0
Commit
a471a1e0
authored
Aug 31, 2017
by
di68kap
Browse files
- parsers.py: string slicing replaces by toolkit.StringView
parent
481891e3
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/parser.py
View file @
a471a1e0
...
@@ -77,7 +77,8 @@ except ImportError:
...
@@ -77,7 +77,8 @@ except ImportError:
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
ParserBase
,
\
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
ParserBase
,
\
Node
,
TransformationFunc
Node
,
TransformationFunc
from
DHParser.toolkit
import
TextView
,
load_if_file
,
error_messages
,
line_col
from
DHParser.toolkit
import
StringView
,
EMPTY_STRING_VIEW
,
sv_match
,
sv_index
,
sv_search
,
\
load_if_file
,
error_messages
,
line_col
__all__
=
(
'PreprocessorFunc'
,
__all__
=
(
'PreprocessorFunc'
,
'HistoryRecord'
,
'HistoryRecord'
,
...
@@ -161,7 +162,7 @@ class HistoryRecord:
...
@@ -161,7 +162,7 @@ class HistoryRecord:
# type: List['Parser']
# type: List['Parser']
self
.
node
=
node
# type: Node
self
.
node
=
node
# type: Node
self
.
remaining
=
remaining
# type: int
self
.
remaining
=
remaining
# type: int
document
=
call_stack
[
-
1
].
grammar
.
document__
if
call_stack
else
''
document
=
call_stack
[
-
1
].
grammar
.
document__
.
text
if
call_stack
else
''
self
.
line_col
=
line_col
(
document
,
len
(
document
)
-
remaining
)
# type: Tuple[int, int]
self
.
line_col
=
line_col
(
document
,
len
(
document
)
-
remaining
)
# type: Tuple[int, int]
def
__str__
(
self
):
def
__str__
(
self
):
...
@@ -229,11 +230,13 @@ def add_parser_guard(parser_func):
...
@@ -229,11 +230,13 @@ def add_parser_guard(parser_func):
that takes care of memoizing, left recursion and optionally tracing
that takes care of memoizing, left recursion and optionally tracing
(aka "history tracking") of parser calls. Returns the wrapped call.
(aka "history tracking") of parser calls. Returns the wrapped call.
"""
"""
def
guarded_call
(
parser
:
'Parser'
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
def
guarded_call
(
parser
:
'Parser'
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
assert
isinstance
(
text
,
StringView
)
def
memoized
(
parser
,
location
):
def
memoized
(
parser
,
location
):
node
=
parser
.
visited
[
location
]
node
=
parser
.
visited
[
location
]
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rest
=
TextView
(
grammar
.
document__
,
-
rlen
)
if
rlen
else
''
rest
=
grammar
.
document__
[
-
rlen
:]
if
rlen
else
EMPTY_STRING_VIEW
return
node
,
rest
return
node
,
rest
# NOTE: An older and simpler implementation of memoization
# NOTE: An older and simpler implementation of memoization
# relied on `parser.visited[location] == node, rest`. Although,
# relied on `parser.visited[location] == node, rest`. Although,
...
@@ -267,6 +270,7 @@ def add_parser_guard(parser_func):
...
@@ -267,6 +270,7 @@ def add_parser_guard(parser_func):
# run original __call__ method
# run original __call__ method
node
,
rest
=
parser_func
(
parser
,
text
)
node
,
rest
=
parser_func
(
parser
,
text
)
assert
isinstance
(
rest
,
StringView
)
if
node
is
None
:
if
node
is
None
:
# retrieve an earlier match result (from left recursion) if it exists
# retrieve an earlier match result (from left recursion) if it exists
...
@@ -302,7 +306,7 @@ def add_parser_guard(parser_func):
...
@@ -302,7 +306,7 @@ def add_parser_guard(parser_func):
node
=
Node
(
None
,
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))]
+
" ..."
)
node
=
Node
(
None
,
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))]
+
" ..."
)
node
.
add_error
(
"maximum recursion depth of parser reached; "
node
.
add_error
(
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
"potentially due to too many errors!"
)
rest
=
''
rest
=
EMPTY_STRING_VIEW
return
node
,
rest
return
node
,
rest
...
@@ -409,7 +413,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
...
@@ -409,7 +413,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
self
.
cycle_detection
=
set
()
# type: Set[Callable]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
return
self
return
self
def
__call__
(
self
,
text
:
Text
View
)
->
Tuple
[
Node
,
Text
View
]:
def
__call__
(
self
,
text
:
String
View
)
->
Tuple
[
Node
,
String
View
]:
"""Applies the parser to the given `text` and returns a node with
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the results or None as well as the text at the position right behind
the matching string."""
the matching string."""
...
@@ -724,8 +728,8 @@ class Grammar:
...
@@ -724,8 +728,8 @@ class Grammar:
def
_reset__
(
self
):
def
_reset__
(
self
):
self
.
document__
=
""
# type:
s
tr
self
.
document__
=
EMPTY_STRING_VIEW
# type:
S
tr
ingView
self
.
_reversed__
=
""
# type:
s
tr
self
.
_reversed__
=
EMPTY_STRING_VIEW
# type:
S
tr
ingView
# variables stored and recalled by Capture and Retrieve parsers
# variables stored and recalled by Capture and Retrieve parsers
self
.
variables__
=
dict
()
# type: Dict[str, List[str]]
self
.
variables__
=
dict
()
# type: Dict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
...
@@ -742,7 +746,7 @@ class Grammar:
...
@@ -742,7 +746,7 @@ class Grammar:
@
property
@
property
def
reversed__
(
self
)
->
str
:
def
reversed__
(
self
)
->
str
:
if
not
self
.
_reversed__
:
if
not
self
.
_reversed__
:
self
.
_reversed__
=
self
.
document__
[::
-
1
]
self
.
_reversed__
=
StringView
(
self
.
document__
.
text
[::
-
1
]
)
return
self
.
_reversed__
return
self
.
_reversed__
...
@@ -784,13 +788,13 @@ class Grammar:
...
@@ -784,13 +788,13 @@ class Grammar:
else
:
else
:
self
.
_dirty_flag__
=
True
self
.
_dirty_flag__
=
True
self
.
history_tracking__
=
is_logging
()
self
.
history_tracking__
=
is_logging
()
self
.
document__
=
document
self
.
document__
=
StringView
(
document
)
self
.
last_rb__loc__
=
len
(
document
)
+
1
# rollback location
self
.
last_rb__loc__
=
len
(
self
.
document
__
)
+
1
# rollback location
parser
=
self
[
start_parser
]
if
isinstance
(
start_parser
,
str
)
else
start_parser
parser
=
self
[
start_parser
]
if
isinstance
(
start_parser
,
str
)
else
start_parser
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a different grammar object!"
\
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a different grammar object!"
\
" %s vs. %s"
%
(
str
(
self
),
str
(
parser
.
grammar
))
" %s vs. %s"
%
(
str
(
self
),
str
(
parser
.
grammar
))
stitches
=
[]
# type: List[Node]
stitches
=
[]
# type: List[Node]
rest
=
document
rest
=
self
.
document
__
if
not
rest
:
if
not
rest
:
result
,
ignore
=
parser
(
rest
)
result
,
ignore
=
parser
(
rest
)
if
result
is
None
:
if
result
is
None
:
...
@@ -883,7 +887,7 @@ class Grammar:
...
@@ -883,7 +887,7 @@ class Grammar:
document.
document.
"""
"""
def
prepare_line
(
record
):
def
prepare_line
(
record
):
excerpt
=
self
.
document__
.
__getitem__
(
record
.
extent
)[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
self
.
document__
.
text
.
__getitem__
(
record
.
extent
)[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
"'%s'"
%
excerpt
if
len
(
excerpt
)
<
25
else
"'%s...'"
%
excerpt
excerpt
=
"'%s'"
%
excerpt
if
len
(
excerpt
)
<
25
else
"'%s...'"
%
excerpt
return
record
.
stack
,
record
.
status
,
excerpt
return
record
.
stack
,
record
.
status
,
excerpt
...
@@ -985,7 +989,7 @@ class PreprocessorToken(Parser):
...
@@ -985,7 +989,7 @@ class PreprocessorToken(Parser):
assert
RX_PREPROCESSOR_TOKEN
.
match
(
token
)
assert
RX_PREPROCESSOR_TOKEN
.
match
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
end
=
text
.
find
(
END_TOKEN
,
1
)
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
if
end
<
0
:
...
@@ -1040,10 +1044,10 @@ class RegExp(Parser):
...
@@ -1040,10 +1044,10 @@ class RegExp(Parser):
regexp
=
self
.
regexp
.
pattern
regexp
=
self
.
regexp
.
pattern
return
RegExp
(
regexp
,
self
.
name
)
return
RegExp
(
regexp
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
match
=
text
[
0
:
1
]
!=
BEGIN_TOKEN
and
self
.
regexp
.
match
(
text
)
# ESC starts a preprocessor token.
match
=
text
[
0
:
1
]
!=
BEGIN_TOKEN
and
sv_match
(
self
.
regexp
,
text
)
# ESC starts a preprocessor token.
if
match
:
if
match
:
end
=
match
.
end
()
end
=
sv_index
(
match
.
end
()
,
text
)
return
Node
(
self
,
text
[:
end
]),
text
[
end
:]
return
Node
(
self
,
text
[:
end
]),
text
[
end
:]
return
None
,
text
return
None
,
text
...
@@ -1114,9 +1118,9 @@ class RE(Parser):
...
@@ -1114,9 +1118,9 @@ class RE(Parser):
regexp
=
self
.
main
.
regexp
.
pattern
regexp
=
self
.
main
.
regexp
.
pattern
return
self
.
__class__
(
regexp
,
self
.
wL
,
self
.
wR
,
self
.
name
)
return
self
.
__class__
(
regexp
,
self
.
wL
,
self
.
wR
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
# assert self.main.regexp.pattern != "@"
# assert self.main.regexp.pattern != "@"
t
=
text
# type:
s
tr
t
=
text
# type:
S
tr
ingView
wL
,
t
=
self
.
wspLeft
(
t
)
wL
,
t
=
self
.
wspLeft
(
t
)
main
,
t
=
self
.
main
(
t
)
main
,
t
=
self
.
main
(
t
)
if
main
:
if
main
:
...
@@ -1264,7 +1268,7 @@ class Optional(UnaryOperator):
...
@@ -1264,7 +1268,7 @@ class Optional(UnaryOperator):
"Nesting options with required elements is contradictory: "
\
"Nesting options with required elements is contradictory: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text
=
self
.
parser
(
text
)
node
,
text
=
self
.
parser
(
text
)
if
node
:
if
node
:
return
Node
(
self
,
node
),
text
return
Node
(
self
,
node
),
text
...
@@ -1289,7 +1293,7 @@ class ZeroOrMore(Optional):
...
@@ -1289,7 +1293,7 @@ class ZeroOrMore(Optional):
EBNF-Notation: `{ ... }`
EBNF-Notation: `{ ... }`
EBNF-Example: `sentence = { /\w+,?/ } "."`
EBNF-Example: `sentence = { /\w+,?/ } "."`
"""
"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
results
=
()
# type: Tuple[Node, ...]
n
=
len
(
text
)
+
1
n
=
len
(
text
)
+
1
while
text
and
len
(
text
)
<
n
:
while
text
and
len
(
text
)
<
n
:
...
@@ -1314,9 +1318,9 @@ class OneOrMore(UnaryOperator):
...
@@ -1314,9 +1318,9 @@ class OneOrMore(UnaryOperator):
"Use ZeroOrMore instead of nesting OneOrMore and Optional: "
\
"Use ZeroOrMore instead of nesting OneOrMore and Optional: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
results
=
()
# type: Tuple[Node, ...]
text_
=
text
# type:
s
tr
text_
=
text
# type:
S
tr
ingView
n
=
len
(
text
)
+
1
n
=
len
(
text
)
+
1
while
text_
and
len
(
text_
)
<
n
:
while
text_
and
len
(
text_
)
<
n
:
n
=
len
(
text_
)
n
=
len
(
text_
)
...
@@ -1340,9 +1344,9 @@ class Series(NaryOperator):
...
@@ -1340,9 +1344,9 @@ class Series(NaryOperator):
super
(
Series
,
self
).
__init__
(
*
parsers
,
name
=
name
)
super
(
Series
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
assert
len
(
self
.
parsers
)
>=
1
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
results
=
()
# type: Tuple[Node, ...]
results
=
()
# type: Tuple[Node, ...]
text_
=
text
# type:
s
tr
text_
=
text
# type:
S
tr
ingView
for
parser
in
self
.
parsers
:
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text_
)
node
,
text_
=
parser
(
text_
)
if
not
node
:
if
not
node
:
...
@@ -1400,7 +1404,7 @@ class Alternative(NaryOperator):
...
@@ -1400,7 +1404,7 @@ class Alternative(NaryOperator):
assert
all
(
not
isinstance
(
p
,
Optional
)
for
p
in
self
.
parsers
[:
-
1
])
assert
all
(
not
isinstance
(
p
,
Optional
)
for
p
in
self
.
parsers
[:
-
1
])
self
.
been_here
=
dict
()
# type: Dict[int, int]
self
.
been_here
=
dict
()
# type: Dict[int, int]
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
for
parser
in
self
.
parsers
:
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text
)
node
,
text_
=
parser
(
text
)
if
node
:
if
node
:
...
@@ -1447,11 +1451,13 @@ class FlowOperator(UnaryOperator):
...
@@ -1447,11 +1451,13 @@ class FlowOperator(UnaryOperator):
class
Required
(
FlowOperator
):
class
Required
(
FlowOperator
):
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
RX_ARGUMENT
=
re
.
compile
(
r
'\s(\S)'
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
node
,
text_
=
self
.
parser
(
text
)
node
,
text_
=
self
.
parser
(
text
)
if
not
node
:
if
not
node
:
m
=
re
.
search
(
r
'\s(\S)'
,
text
)
m
=
sv_search
(
Required
.
RX_ARGUMENT
,
text
)
#
re.search(r'\s(\S)', text)
i
=
max
(
1
,
m
.
regs
[
1
][
0
])
if
m
else
1
i
=
max
(
1
,
sv_index
(
m
.
regs
[
1
][
0
]
,
text
)
)
if
m
else
1
node
=
Node
(
self
,
text
[:
i
])
node
=
Node
(
self
,
text
[:
i
])
text_
=
text
[
i
:]
text_
=
text
[
i
:]
# assert False, "*"+text[:i]+"*"
# assert False, "*"+text[:i]+"*"
...
@@ -1467,7 +1473,7 @@ class Lookahead(FlowOperator):
...
@@ -1467,7 +1473,7 @@ class Lookahead(FlowOperator):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Lookahead
,
self
).
__init__
(
parser
,
name
)
super
(
Lookahead
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text_
=
self
.
parser
(
text
)
node
,
text_
=
self
.
parser
(
text
)
if
self
.
sign
(
node
is
not
None
):
if
self
.
sign
(
node
is
not
None
):
return
Node
(
self
,
''
),
text
return
Node
(
self
,
''
),
text
...
@@ -1512,9 +1518,9 @@ class Lookbehind(FlowOperator):
...
@@ -1512,9 +1518,9 @@ class Lookbehind(FlowOperator):
self
.
regexp
=
p
.
main
.
regexp
if
isinstance
(
p
,
RE
)
else
p
.
regexp
self
.
regexp
=
p
.
main
.
regexp
if
isinstance
(
p
,
RE
)
else
p
.
regexp
super
(
Lookbehind
,
self
).
__init__
(
parser
,
name
)
super
(
Lookbehind
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
backwards_text
=
self
.
grammar
.
reversed__
[
len
(
text
):]
# self.grammar.document__[-len(text) - 1::-1]
backwards_text
=
self
.
grammar
.
reversed__
[
len
(
text
):]
# self.grammar.document__[-len(text) - 1::-1]
if
self
.
sign
(
self
.
regexp
.
match
(
backwards_text
)):
if
self
.
sign
(
sv_match
(
self
.
regexp
,
backwards_text
)):
return
Node
(
self
,
''
),
text
return
Node
(
self
,
''
),
text
else
:
else
:
return
None
,
text
return
None
,
text
...
@@ -1548,7 +1554,7 @@ class Capture(UnaryOperator):
...
@@ -1548,7 +1554,7 @@ class Capture(UnaryOperator):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text_
=
self
.
parser
(
text
)
node
,
text_
=
self
.
parser
(
text
)
if
node
:
if
node
:
stack
=
self
.
grammar
.
variables__
.
setdefault
(
self
.
name
,
[])
stack
=
self
.
grammar
.
variables__
.
setdefault
(
self
.
name
,
[])
...
@@ -1590,13 +1596,13 @@ class Retrieve(Parser):
...
@@ -1590,13 +1596,13 @@ class Retrieve(Parser):
def
__deepcopy__
(
self
,
memo
):
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
filter
,
self
.
name
)
return
self
.
__class__
(
self
.
symbol
,
self
.
filter
,
self
.
name
)
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
return
self
.
call
(
text
)
# allow call method to be called from subclass circumventing the parser guard
return
self
.
call
(
text
)
# allow call method to be called from subclass circumventing the parser guard
def
__repr__
(
self
):
def
__repr__
(
self
):
return
':'
+
self
.
symbol
.
repr
return
':'
+
self
.
symbol
.
repr
def
call
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
call
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
try
:
try
:
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
value
=
self
.
filter
(
stack
)
...
@@ -1612,7 +1618,7 @@ class Retrieve(Parser):
...
@@ -1612,7 +1618,7 @@ class Retrieve(Parser):
class
Pop
(
Retrieve
):
class
Pop
(
Retrieve
):
"""STILL EXPERIMENTAL!!!"""
"""STILL EXPERIMENTAL!!!"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
nd
,
txt
=
super
(
Pop
,
self
).
call
(
text
)
# call() instead of __call__() to avoid parser guard
nd
,
txt
=
super
(
Pop
,
self
).
call
(
text
)
# call() instead of __call__() to avoid parser guard
if
nd
and
not
nd
.
error_flag
:
if
nd
and
not
nd
.
error_flag
:
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
...
@@ -1644,7 +1650,7 @@ class Synonym(UnaryOperator):
...
@@ -1644,7 +1650,7 @@ class Synonym(UnaryOperator):
class, in which case it would be unclear whether the parser
class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
"""
"""
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
node
,
text
=
self
.
parser
(
text
)
node
,
text
=
self
.
parser
(
text
)
if
node
:
if
node
:
return
Node
(
self
,
node
),
text
return
Node
(
self
,
node
),
text
...
@@ -1684,7 +1690,7 @@ class Forward(Parser):
...
@@ -1684,7 +1690,7 @@ class Forward(Parser):
duplicate
.
set
(
parser
)
duplicate
.
set
(
parser
)
return
duplicate
return
duplicate
def
__call__
(
self
,
text
:
s
tr
)
->
Tuple
[
Node
,
s
tr
]:
def
__call__
(
self
,
text
:
S
tr
ingView
)
->
Tuple
[
Node
,
S
tr
ingView
]:
return
self
.
parser
(
text
)
return
self
.
parser
(
text
)
def
__repr__
(
self
):
def
__repr__
(
self
):
...
...
DHParser/syntaxtree.py
View file @
a471a1e0
...
@@ -31,7 +31,7 @@ except ImportError:
...
@@ -31,7 +31,7 @@ except ImportError:
from
.typing34
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
from
.typing34
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
from
DHParser.toolkit
import
is_logging
,
log_dir
,
Text
View
,
line_col
,
identity
from
DHParser.toolkit
import
is_logging
,
log_dir
,
String
View
,
line_col
,
identity
__all__
=
(
'WHITESPACE_PTYPE'
,
__all__
=
(
'WHITESPACE_PTYPE'
,
'MockParser'
,
'MockParser'
,
...
@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser()
...
@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser()
Error
=
NamedTuple
(
'Error'
,
[(
'pos'
,
int
),
(
'msg'
,
str
)])
Error
=
NamedTuple
(
'Error'
,
[(
'pos'
,
int
),
(
'msg'
,
str
)])
ChildrenType
=
Tuple
[
'Node'
,
...]
ChildrenType
=
Tuple
[
'Node'
,
...]
StrictResultType
=
Union
[
ChildrenType
,
Text
View
,
str
]
StrictResultType
=
Union
[
ChildrenType
,
String
View
,
str
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
Text
View
,
str
,
None
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
String
View
,
str
,
None
]
def
flatten_sxpr
(
sxpr
:
str
)
->
str
:
def
flatten_sxpr
(
sxpr
:
str
)
->
str
:
...
@@ -189,6 +189,7 @@ class Node:
...
@@ -189,6 +189,7 @@ class Node:
__slots__
=
[
'_result'
,
'children'
,
'_errors'
,
'_len'
,
'_pos'
,
'parser'
,
'error_flag'
]
__slots__
=
[
'_result'
,
'children'
,
'_errors'
,
'_len'
,
'_pos'
,
'parser'
,
'error_flag'
]
def
__init__
(
self
,
parser
,
result
:
ResultType
)
->
None
:
def
__init__
(
self
,
parser
,
result
:
ResultType
)
->
None
:
"""Initializes the ``Node``-object with the ``Parser``-Instance
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
that generated the node and the parser's result.
...
@@ -251,7 +252,7 @@ class Node:
...
@@ -251,7 +252,7 @@ class Node:
# or isinstance(result, Node)
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
# or isinstance(result, str)), str(result)
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
str
(
result
)
\
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
str
(
result
)
\
if
isinstance
(
result
,
Text
View
)
else
result
or
''
# type: StrictResultType
if
isinstance
(
result
,
String
View
)
else
result
or
''
# type: StrictResultType
self
.
children
=
cast
(
ChildrenType
,
self
.
_result
)
\
self
.
children
=
cast
(
ChildrenType
,
self
.
_result
)
\
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
# type: ChildrenType
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
# type: ChildrenType
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
children
)
# type: bool
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
children
)
# type: bool
...
...
DHParser/toolkit.py
View file @
a471a1e0
...
@@ -43,14 +43,18 @@ except ImportError:
...
@@ -43,14 +43,18 @@ except ImportError:
import
sys
import
sys
try
:
try
:
from
typing
import
Any
,
List
,
Tuple
,
Optional
from
typing
import
Any
,
List
,
Tuple
,
Collection
,
Union
,
Optional
except
ImportError
:
except
ImportError
:
from
.typing34
import
Any
,
List
,
Tuple
,
Optional
from
.typing34
import
Any
,
List
,
Tuple
,
Collection
,
Union
,
Optional
__all__
=
(
'logging'
,
__all__
=
(
'logging'
,
'is_logging'
,
'is_logging'
,
'log_dir'
,
'log_dir'
,
'logfile_basename'
,
'logfile_basename'
,
'StringView'
,
'sv_match'
,
'sv_index'
,
'sv_search'
,
# 'supress_warnings',
# 'supress_warnings',
# 'warnings',
# 'warnings',
# 'repr_call',
# 'repr_call',
...
@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
...
@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os
.
rmdir
(
log_dirname
)
os
.
rmdir
(
log_dirname
)
class
TextView
:
class
StringView
:
__slots__
=
[
'text'
,
'begin'
,
'end'
]
""""A rudimentary StringView class, just enough for the use cases
in parswer.py.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
text
=
text
# type: str
self
.
begin
=
begin
or
0
# type: int # TODO: Negative Values!!!
self
.
begin
,
self
.
end
=
StringView
.
real_indices
(
begin
,
end
,
len
(
text
))
self
.
end
=
end
or
len
(
text
)
# type: int
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
@
staticmethod
def
real_indices
(
begin
,
end
,
len
):
def
pack
(
index
,
len
):
index
=
index
if
index
>=
0
else
index
+
len
return
0
if
index
<
0
else
len
if
index
>
len
else
index
if
begin
is
None
:
begin
=
0
if
end
is
None
:
end
=
len
return
pack
(
begin
,
len
),
pack
(
end
,
len
)
def
__bool__
(
self
):
return
bool
(
self
.
text
)
and
self
.
end
>
self
.
begin
def
__len__
(
self
):
return
self
.
len
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
text
[
self
.
begin
:
self
.
end
]
return
self
.
text
[
self
.
begin
:
self
.
end
]
def
__getitem__
(
self
,
index
):
def
__getitem__
(
self
,
index
):
assert
isinstance
(
index
,
slice
),
"Minimal implementation of TextView just allows slicing."
assert
isinstance
(
index
,
slice
),
"As of now, StringView only allows slicing."
start
=
index
.
start
or
0
assert
index
.
step
is
None
or
index
.
step
==
1
,
\
stop
=
index
.
stop
or
(
self
.
end
-
self
.
begin
)
"Step sizes other than 1 are not yet supported by StringView"
return
TextView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
start
,
stop
=
StringView
.
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
def
__eq__
(
self
,
other
):
return
str
(
self
)
==
str
(
other
)
# PERFORMANCE WARNING: This creates copies of the strings
def
find
(
self
,
sub
,
start
=
None
,
end
=
None
)
->
int
:
if
start
is
None
and
end
is
None
:
return
self
.
text
.
find
(
sub
,
self
.
begin
,
self
.
end
)
-
self
.
begin
else
:
start
,
end
=
StringView
.
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
:
start
+=
self
.
begin
end
=
self
.
end
if
end
is
None
else
self
.
begin
+
end
return
self
.
text
.
startswith
(
prefix
,
start
,
end
)
def
sv_match
(
regex
,
sv
:
StringView
):
return
regex
.
match
(
sv
.
text
,
pos
=
sv
.
begin
,
endpos
=
sv
.
end
)
def
sv_index
(
absolute_index
:
Union
[
int
,
Collection
],
sv
:
StringView
)
->
Union
[
int
,
tuple
]:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv_match(re.compile('I'), sv)
>>> match.end()
3
>>> sv_index(match.end(), sv)
1
"""
try
:
return
absolute_index
-
sv
.
begin
except
TypeError
:
return
tuple
(
index
-
sv
.
begin
for
index
in
absolute_index
)
def
sv_search
(
regex
,
sv
:
StringView
):
return
regex
.
search
(
sv
.
text
,