Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
87a21857
Commit
87a21857
authored
Nov 29, 2019
by
Eckhart Arnold
Browse files
tracy.py: tracing debugger added (still needs refinement)
parent
befa3611
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/__init__.py
View file @
87a21857
...
...
@@ -31,6 +31,7 @@ from .stringview import *
from
.syntaxtree
import
*
from
.testing
import
*
from
.toolkit
import
*
from
.trace
import
*
from
.transform
import
*
from
.versionnumber
import
*
...
...
@@ -47,6 +48,7 @@ __all__ = (compile.__all__ +
syntaxtree
.
__all__
+
testing
.
__all__
+
toolkit
.
__all__
+
trace
.
__all__
+
transform
.
__all__
+
versionnumber
.
__all__
)
...
...
DHParser/parse.pxd
View file @
87a21857
...
...
@@ -21,6 +21,7 @@ cdef class Parser:
# def __add__(self, other)
# def __or__(self, other)
cpdef
_parse
(
self
,
text
)
cpdef
set_proxy
(
self
,
proxy
)
cpdef
_apply
(
self
,
func
,
flip
)
cpdef
apply
(
self
,
func
)
...
...
@@ -44,7 +45,7 @@ cdef class Grammar:
cdef
public
object
document__
cdef
public
object
_reversed__
cdef
public
int
document_length__
cdef
public
list
document_lbreaks__
cdef
public
list
_
document_lbreaks__
cdef
public
object
variables__
cdef
public
list
rollback__
cdef
public
int
last_rb__loc__
...
...
DHParser/parse.py
View file @
87a21857
...
...
@@ -213,6 +213,7 @@ EMPTY_NODE = FrozenNode(':EMPTY__', '')
ApplyFunc
=
Callable
[[
'Parser'
],
None
]
FlagFunc
=
Callable
[[
ApplyFunc
,
Set
[
ApplyFunc
]],
bool
]
ParseFunc
=
Callable
[[
'Parser'
,
StringView
],
Tuple
[
Optional
[
Node
],
StringView
]]
def
copy_parser_attrs
(
src
:
'Parser'
,
duplicate
:
'Parser'
):
...
...
@@ -262,13 +263,16 @@ class Parser:
contained parser is repeated zero times.
Attributes and Properties:
pname: The parser's name or a (possibly empty) alias name in case
of an anonymous parser.
anonymous: A property indicating that the parser remains anynomous
anonymous with respect to the nodes it returns. For performance
reasons this is implemented as an object variable rather
than a property. This property must always be equal to
`self.tag_name[0] == ":"`.
drop_content: A property (for performance reasons implemented as
simple field) that, if set, induces the parser not to return
the parsed content or sub-tree if it has matched but the
...
...
@@ -276,9 +280,11 @@ class Parser:
dropped from the concrete syntax tree already. Only
anonymous (or pseudo-anonymous) parsers are allowed to
drop content.
tag_name: The tag_name for the nodes that are created by
the parser. If the parser is named, this is the same as
`pname`, otherwise it is the name of the parser's type.
visited: Mapping of places this parser has already been to
during the current parsing process onto the results the
parser returned at the respective place. This dictionary
...
...
@@ -294,6 +300,10 @@ class Parser:
(recursively) a second time, if it has already been
applied to this parser.
proxied: The original `_parse()`-method is stored here, if a
proxy (e.g. a tracing debugger) is installed via the
`set_proxy()`-method.
_grammar: A reference to the Grammar object to which the parser
is attached.
"""
...
...
@@ -305,6 +315,9 @@ class Parser:
self
.
drop_content
=
False
# type: bool
self
.
tag_name
=
self
.
ptype
# type: str
self
.
cycle_detection
=
set
()
# type: Set[ApplyFunc]
# this indirection is required for Cython-compatibility
self
.
__parse
=
self
.
_parse
# type: ParseMethod
# self.proxied = None # type: Optional[ParseMethod]
try
:
self
.
_grammar
=
GRAMMAR_PLACEHOLDER
# type: Grammar
except
NameError
:
...
...
@@ -314,7 +327,7 @@ class Parser:
def
__deepcopy__
(
self
,
memo
):
""" Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the constructor,
derived parser-class changes the signature of the
`__init__`-
constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
...
...
@@ -413,11 +426,10 @@ class Parser:
((
self
.
repr
if
self
.
tag_name
in
(
':RegExp'
,
':Token'
,
':DropToken'
)
else
(
self
.
pname
or
self
.
tag_name
)),
location
))
grammar
.
moving_forward__
=
True
error
=
None
# finally, the actual parser call!
try
:
node
,
rest
=
self
.
_parse
(
text
)
node
,
rest
=
self
.
_
_
parse
(
text
)
except
ParserError
as
pe
:
# catching up with parsing after an error occurred
gap
=
len
(
text
)
-
len
(
pe
.
rest
)
...
...
@@ -462,7 +474,7 @@ class Parser:
if
history_tracking__
:
grammar
.
call_stack__
.
pop
()
raise
ParserError
(
Node
(
self
.
tag_name
,
result
).
with_pos
(
location
),
text
,
pe
.
error
,
first_throw
=
False
)
error
=
pe
.
error
# needed for history tracking
grammar
.
most_recent_
error
__
=
pe
.
error
# needed for history tracking
if
left_recursion_depth__
:
self
.
recursion_counter
[
location
]
-=
1
...
...
@@ -509,12 +521,13 @@ class Parser:
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
))
grammar
.
history__
.
append
(
record
)
elif
error
:
elif
grammar
.
most_recent_
error
__
:
# error_nid = id(node) # type: int
# if error_nid in grammar.tree__.error_nodes:
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
),
[
error
])
[
grammar
.
most_recent_error__
])
grammar
.
most_recent_error__
=
None
grammar
.
history__
.
append
(
record
)
grammar
.
moving_forward__
=
False
grammar
.
call_stack__
.
pop
()
...
...
@@ -539,13 +552,30 @@ class Parser:
"""
return
Alternative
(
self
,
other
)
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the matching string."""
raise
NotImplementedError
def
set_proxy
(
self
,
proxy
:
Optional
[
ParseFunc
]):
"""Sets a proxy that replaces the _parse()-method. The original
parse-method is copied to the `proxied`-filed of the Parser object and
can be called by the proxy. Call `set_proxy` with `None` to remove
a previously set proxy. Typical use case is the installation of a
tracing debugger. See module `trace`.
"""
if
proxy
is
None
:
self
.
__parse
=
self
.
_parse
else
:
if
type
(
proxy
)
!=
type
(
self
.
_parse
):
# assume that proxy is a function
proxy
=
proxy
.
__get__
(
self
,
type
(
self
))
else
:
# if proxy is a method it must be a method od self
assert
proxy
.
__self__
==
self
self
.
__parse
=
proxy
@
property
def
grammar
(
self
)
->
'Grammar'
:
try
:
...
...
@@ -889,9 +919,10 @@ class Grammar:
and, eventually, i.e. one day in the future, for tracing through
the parsing process.
history__: A list of parser-call-stacks. A parser-call-stack is
appended to the list each time a parser either matches, fails
or if a parser-error occurs.
history__: A list of history records. A history record is appended to
the list each time a parser either matches, fails or if a
parser-error occurs. See class `log.HistoryRecord`. History
records store copies of the current call stack.
moving_forward__: This flag indicates that the parsing process is currently
moving forward . It is needed to reduce noise in history recording
...
...
@@ -907,6 +938,9 @@ class Grammar:
detected. This is used to avoid reduplicating warning messages
about left recursion.
most_recent_error__: The most recent parser error that has occurred
or `None`. This can be read by tracers. See module `trace`
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
In some situations it may drastically increase parsing time, so
...
...
@@ -1077,6 +1111,7 @@ class Grammar:
self
.
moving_forward__
=
False
# type: bool
self
.
recursion_locations__
=
set
()
# type: Set[int]
self
.
last_recursion_location__
=
-
1
# type: int
self
.
most_recent_error__
=
None
# type: Optional[ParserError]
@
property
...
...
@@ -2734,6 +2769,10 @@ class Forward(Parser):
# for the exceptional case in class Synonym where the ._parse method is called directly
return
self
.
parser
(
text
)
def
set_proxy
(
self
,
proxy
:
Optional
[
ParseFunc
]):
"""`set_proxy` has no effects on Forward-objects!"""
return
def
__cycle_guard
(
self
,
func
,
alt_return
):
"""
Returns the value of `func()` or `alt_return` if a cycle has
...
...
DHParser/stringview.pxd
View file @
87a21857
...
...
@@ -17,7 +17,7 @@ cdef int last_char(str text, int begin, int end, str chars)
cdef
int
pack_index
(
int
index
,
int
length
)
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
,
length
=
cython
.
int
)
c
p
def
real_indices
(
begin
,
end
,
int
length
)
cdef
(
cython
.
int
,
cython
.
int
)
real_indices
(
begin
,
end
,
int
length
)
cdef
class
StringView
:
cdef
str
_text
...
...
DHParser/stringview.py
View file @
87a21857
...
...
@@ -43,7 +43,7 @@ except ImportError:
import
DHParser.shadow_cython
as
cython
__all__
=
(
'StringView'
,
'real_indices'
,
'EMPTY_STRING_VIEW'
)
__all__
=
(
'StringView'
,
'
slow_
real_indices'
,
'EMPTY_STRING_VIEW'
)
@
cython
.
cfunc
...
...
@@ -92,6 +92,8 @@ def pack_index(index: int, length: int) -> int:
return
0
if
index
<
0
else
length
if
index
>
length
else
index
@
cython
.
cfunc
@
cython
.
returns
((
cython
.
int
,
cython
.
int
))
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
,
length
=
cython
.
int
)
def
real_indices
(
begin
:
Optional
[
int
],
end
:
Optional
[
int
],
...
...
@@ -104,6 +106,13 @@ def real_indices(begin: Optional[int],
return
pack_index
(
cbegin
,
length
),
pack_index
(
cend
,
length
)
def
slow_real_indices
(
begin
:
Optional
[
int
],
end
:
Optional
[
int
],
length
:
int
)
->
Tuple
[
int
,
int
]:
"""Python callable real-indices function for testing."""
return
real_indices
(
begin
,
end
,
length
)
class
StringView
:
# collections.abc.Sized
"""
A rudimentary StringView class, just enough for the use cases
...
...
DHParser/trace.py
View file @
87a21857
...
...
@@ -20,7 +20,8 @@ Module ``trace`` provides trace-debugging functionality for the
parser. The tracers are added or removed via monkey patching to
all or some particular parsers of a grammar and trace the actions
of these parsers, making use of the `call_stack__`, `history__`
and `moving_forward__`-hooks in the Grammar object.
and `moving_forward__`, `most_recent_error__`-hooks in the
Grammar-object.
This allows for more flexible and at the same time more focused
tracing of the parsing process than the (older) parsing-history-
...
...
@@ -28,45 +29,63 @@ tracking-mechanism in the `parse` module, which will eventually
be superceded by tracing.
"""
from
typing
import
Tuple
,
Optional
from
typing
import
Tuple
,
Optional
,
List
,
Collection
,
Union
from
DHParser.stringview
import
StringView
from
DHParser.syntaxtree
import
Node
,
REGEXP_PTYPE
,
TOKEN_PTYPE
from
DHParser.log
import
HistoryRecord
from
DHParser.parse
import
Parser
Error
from
DHParser.parse
import
Parser
,
ParserError
,
Grammar
,
ParseFunc
#######################################################################
#
# tracing of the parsing process
# (a light-weight alternative to full history recording)
#
#######################################################################
__all__
=
(
'trace_history'
,
'with_all_descendants'
,
'with_unnamed_descendants'
,
'set_tracer'
)
def
parse_prox
y
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
def
trace_histor
y
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
grammar
=
self
.
_grammar
location
=
grammar
.
document_length__
-
text
.
_len
grammar
.
call_stack__
.
append
(
((
self
.
repr
if
self
.
tag_name
in
(
REGEXP_PTYPE
,
TOKEN_PTYPE
)
else
(
self
.
pname
or
self
.
tag_name
)),
location
))
grammar
.
moving_forward__
=
True
error
=
[]
try
:
node
,
text_
=
self
.
_p
roxied_parse_method
(
text
)
node
,
rest
=
self
.
_p
arse
(
text
)
except
ParserError
as
pe
:
error
=
[
pe
]
grammar
.
call_stack__
.
pop
()
raise
pe
# Mind that memoized parser calls will not appear in the history record!
# Don't track returning parsers except in case an error has occurred!
if
grammar
.
moving_forward__
or
error
:
if
grammar
.
moving_forward__
or
grammar
.
most_recent_error__
:
errors
=
[
grammar
.
most_recent_error__
]
if
grammar
.
most_recent_error__
else
[]
grammar
.
history__
.
append
(
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
),
error
))
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
),
error
s
))
grammar
.
moving_forward__
=
False
grammar
.
call_stack__
.
pop
()
return
node
,
text
return
node
,
rest
def
with_all_descendants
(
root
:
Parser
)
->
List
[
Parser
]:
"""Returns a list with the parser `root` and all of its descendants."""
descendants
=
[]
def
visit
(
parser
:
Parser
):
descendants
.
append
(
parser
)
root
.
apply
(
visit
)
return
descendants
def
with_unnamed_descendants
(
root
:
Parser
)
->
List
[
Parser
]:
"""Returns a list that contains the parser `root` and """
descendants
=
[
root
]
for
parser
in
root
.
sub_parsers
():
if
not
parser
.
pname
:
descendants
.
extend
(
with_unnamed_descendants
(
parser
))
return
descendants
def
set_tracer
(
parsers
:
Union
[
Parser
,
Collection
[
Parser
]],
tracer
:
Optional
[
ParseFunc
]):
if
isinstance
(
parsers
,
Parser
):
parsers
=
[
parsers
]
for
parser
in
parsers
:
parser
.
set_proxy
(
tracer
)
test/test_stringview.py
View file @
87a21857
...
...
@@ -26,19 +26,19 @@ scriptpath = os.path.dirname(__file__) or '.'
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
scriptpath
,
'..'
)))
from
DHParser.toolkit
import
re
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
,
real_indices
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
,
slow_
real_indices
class
TestStringView
:
def
test_real_indices
(
self
):
assert
real_indices
(
3
,
5
,
10
)
==
(
3
,
5
)
assert
real_indices
(
None
,
None
,
10
)
==
(
0
,
10
)
assert
real_indices
(
-
2
,
-
1
,
10
)
==
(
8
,
9
)
assert
real_indices
(
-
3
,
11
,
10
)
==
(
7
,
10
)
assert
real_indices
(
-
5
,
-
12
,
10
)
==
(
5
,
0
)
assert
real_indices
(
-
12
,
-
5
,
10
)
==
(
0
,
5
)
assert
real_indices
(
7
,
6
,
10
)
==
(
7
,
6
)
assert
real_indices
(
None
,
0
,
10
)
==
(
0
,
0
)
def
test_
slow_
real_indices
(
self
):
assert
slow_
real_indices
(
3
,
5
,
10
)
==
(
3
,
5
)
assert
slow_
real_indices
(
None
,
None
,
10
)
==
(
0
,
10
)
assert
slow_
real_indices
(
-
2
,
-
1
,
10
)
==
(
8
,
9
)
assert
slow_
real_indices
(
-
3
,
11
,
10
)
==
(
7
,
10
)
assert
slow_
real_indices
(
-
5
,
-
12
,
10
)
==
(
5
,
0
)
assert
slow_
real_indices
(
-
12
,
-
5
,
10
)
==
(
0
,
5
)
assert
slow_
real_indices
(
7
,
6
,
10
)
==
(
7
,
6
)
assert
slow_
real_indices
(
None
,
0
,
10
)
==
(
0
,
0
)
def
test_creation
(
self
):
s
=
"0123456789"
...
...
test/test_trace.py
0 → 100644
View file @
87a21857
#!/usr/bin/python3
"""test_trace.py - unit tests for the trace-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import
os
import
sys
scriptpath
=
os
.
path
.
dirname
(
__file__
)
or
'.'
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
scriptpath
,
'..'
)))
from
DHParser
import
grammar_provider
,
with_all_descendants
,
with_unnamed_descendants
,
\
set_tracer
,
trace_history
,
log_parsing_history
,
start_logging
class
TestTrace
:
def
setup
(
self
):
minilang
=
"""
expr = term { ("+"|"-") term }
term = factor { ("*"|"/") factor }
factor = /[0-9]+/~ | "(" expr ")"
"""
self
.
gr
=
grammar_provider
(
minilang
)()
# def tear_down(self):
# os.remove('trace.log')
def
test_trace
(
self
):
all_desc
=
with_all_descendants
(
self
.
gr
.
root_parser__
)
set_tracer
(
all_desc
,
trace_history
)
st
=
self
.
gr
(
'2*(3+4)'
)
start_logging
()
log_parsing_history
(
self
.
gr
,
'trace.log'
)
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment