Commit 87a21857 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

tracy.py: tracing debugger added (still needs refinement)

parent befa3611
......@@ -31,6 +31,7 @@ from .stringview import *
from .syntaxtree import *
from .testing import *
from .toolkit import *
from .trace import *
from .transform import *
from .versionnumber import *
......@@ -47,6 +48,7 @@ __all__ = (compile.__all__ +
syntaxtree.__all__ +
testing.__all__ +
toolkit.__all__ +
trace.__all__ +
transform.__all__ +
versionnumber.__all__)
......
......@@ -21,6 +21,7 @@ cdef class Parser:
# def __add__(self, other)
# def __or__(self, other)
cpdef _parse(self, text)
cpdef set_proxy(self, proxy)
cpdef _apply(self, func, flip)
cpdef apply(self, func)
......@@ -44,7 +45,7 @@ cdef class Grammar:
cdef public object document__
cdef public object _reversed__
cdef public int document_length__
cdef public list document_lbreaks__
cdef public list _document_lbreaks__
cdef public object variables__
cdef public list rollback__
cdef public int last_rb__loc__
......
......@@ -213,6 +213,7 @@ EMPTY_NODE = FrozenNode(':EMPTY__', '')
ApplyFunc = Callable[['Parser'], None]
FlagFunc = Callable[[ApplyFunc, Set[ApplyFunc]], bool]
ParseFunc = Callable[['Parser', StringView], Tuple[Optional[Node], StringView]]
def copy_parser_attrs(src: 'Parser', duplicate: 'Parser'):
......@@ -262,13 +263,16 @@ class Parser:
contained parser is repeated zero times.
Attributes and Properties:
pname: The parser's name or a (possibly empty) alias name in case
of an anonymous parser.
anonymous: A property indicating that the parser remains anynomous
anonymous with respect to the nodes it returns. For performance
reasons this is implemented as an object variable rather
than a property. This property must always be equal to
`self.tag_name[0] == ":"`.
drop_content: A property (for performance reasons implemented as
simple field) that, if set, induces the parser not to return
the parsed content or sub-tree if it has matched but the
......@@ -276,9 +280,11 @@ class Parser:
dropped from the concrete syntax tree already. Only
anonymous (or pseudo-anonymous) parsers are allowed to
drop content.
tag_name: The tag_name for the nodes that are created by
the parser. If the parser is named, this is the same as
`pname`, otherwise it is the name of the parser's type.
visited: Mapping of places this parser has already been to
during the current parsing process onto the results the
parser returned at the respective place. This dictionary
......@@ -294,6 +300,10 @@ class Parser:
(recursively) a second time, if it has already been
applied to this parser.
proxied: The original `_parse()`-method is stored here, if a
proxy (e.g. a tracing debugger) is installed via the
`set_proxy()`-method.
_grammar: A reference to the Grammar object to which the parser
is attached.
"""
......@@ -305,6 +315,9 @@ class Parser:
self.drop_content = False # type: bool
self.tag_name = self.ptype # type: str
self.cycle_detection = set() # type: Set[ApplyFunc]
# this indirection is required for Cython-compatibility
self.__parse = self._parse # type: ParseMethod
# self.proxied = None # type: Optional[ParseMethod]
try:
self._grammar = GRAMMAR_PLACEHOLDER # type: Grammar
except NameError:
......@@ -314,7 +327,7 @@ class Parser:
def __deepcopy__(self, memo):
""" Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the constructor,
derived parser-class changes the signature of the `__init__`-constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
......@@ -413,11 +426,10 @@ class Parser:
((self.repr if self.tag_name in (':RegExp', ':Token', ':DropToken')
else (self.pname or self.tag_name)), location))
grammar.moving_forward__ = True
error = None
# finally, the actual parser call!
try:
node, rest = self._parse(text)
node, rest = self.__parse(text)
except ParserError as pe:
# catching up with parsing after an error occurred
gap = len(text) - len(pe.rest)
......@@ -462,7 +474,7 @@ class Parser:
if history_tracking__: grammar.call_stack__.pop()
raise ParserError(Node(self.tag_name, result).with_pos(location),
text, pe.error, first_throw=False)
error = pe.error # needed for history tracking
grammar.most_recent_error__ = pe.error # needed for history tracking
if left_recursion_depth__:
self.recursion_counter[location] -= 1
......@@ -509,12 +521,13 @@ class Parser:
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text))
grammar.history__.append(record)
elif error:
elif grammar.most_recent_error__:
# error_nid = id(node) # type: int
# if error_nid in grammar.tree__.error_nodes:
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text),
[error])
[grammar.most_recent_error__])
grammar.most_recent_error__ = None
grammar.history__.append(record)
grammar.moving_forward__ = False
grammar.call_stack__.pop()
......@@ -539,13 +552,30 @@ class Parser:
"""
return Alternative(self, other)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the matching string."""
raise NotImplementedError
def set_proxy(self, proxy: Optional[ParseFunc]):
"""Sets a proxy that replaces the _parse()-method. The original
parse-method is copied to the `proxied`-filed of the Parser object and
can be called by the proxy. Call `set_proxy` with `None` to remove
a previously set proxy. Typical use case is the installation of a
tracing debugger. See module `trace`.
"""
if proxy is None:
self.__parse = self._parse
else:
if type(proxy) != type(self._parse):
# assume that proxy is a function
proxy = proxy.__get__(self, type(self))
else:
# if proxy is a method it must be a method od self
assert proxy.__self__ == self
self.__parse = proxy
@property
def grammar(self) -> 'Grammar':
try:
......@@ -889,9 +919,10 @@ class Grammar:
and, eventually, i.e. one day in the future, for tracing through
the parsing process.
history__: A list of parser-call-stacks. A parser-call-stack is
appended to the list each time a parser either matches, fails
or if a parser-error occurs.
history__: A list of history records. A history record is appended to
the list each time a parser either matches, fails or if a
parser-error occurs. See class `log.HistoryRecord`. History
records store copies of the current call stack.
moving_forward__: This flag indicates that the parsing process is currently
moving forward . It is needed to reduce noise in history recording
......@@ -907,6 +938,9 @@ class Grammar:
detected. This is used to avoid reduplicating warning messages
about left recursion.
most_recent_error__: The most recent parser error that has occurred
or `None`. This can be read by tracers. See module `trace`
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
In some situations it may drastically increase parsing time, so
......@@ -1077,6 +1111,7 @@ class Grammar:
self.moving_forward__ = False # type: bool
self.recursion_locations__ = set() # type: Set[int]
self.last_recursion_location__ = -1 # type: int
self.most_recent_error__ = None # type: Optional[ParserError]
@property
......@@ -2734,6 +2769,10 @@ class Forward(Parser):
# for the exceptional case in class Synonym where the ._parse method is called directly
return self.parser(text)
def set_proxy(self, proxy: Optional[ParseFunc]):
"""`set_proxy` has no effects on Forward-objects!"""
return
def __cycle_guard(self, func, alt_return):
"""
Returns the value of `func()` or `alt_return` if a cycle has
......
......@@ -17,7 +17,7 @@ cdef int last_char(str text, int begin, int end, str chars)
cdef int pack_index(int index, int length)
@cython.locals(cbegin=cython.int, cend=cython.int, length=cython.int)
cpdef real_indices(begin, end, int length)
cdef (cython.int, cython.int) real_indices(begin, end, int length)
cdef class StringView:
cdef str _text
......
......@@ -43,7 +43,7 @@ except ImportError:
import DHParser.shadow_cython as cython
__all__ = ('StringView', 'real_indices', 'EMPTY_STRING_VIEW')
__all__ = ('StringView', 'slow_real_indices', 'EMPTY_STRING_VIEW')
@cython.cfunc
......@@ -92,6 +92,8 @@ def pack_index(index: int, length: int) -> int:
return 0 if index < 0 else length if index > length else index
@cython.cfunc
@cython.returns((cython.int, cython.int))
@cython.locals(cbegin=cython.int, cend=cython.int, length=cython.int)
def real_indices(begin: Optional[int],
end: Optional[int],
......@@ -104,6 +106,13 @@ def real_indices(begin: Optional[int],
return pack_index(cbegin, length), pack_index(cend, length)
def slow_real_indices(begin: Optional[int],
end: Optional[int],
length: int) -> Tuple[int, int]:
"""Python callable real-indices function for testing."""
return real_indices(begin, end, length)
class StringView: # collections.abc.Sized
"""
A rudimentary StringView class, just enough for the use cases
......
......@@ -20,7 +20,8 @@ Module ``trace`` provides trace-debugging functionality for the
parser. The tracers are added or removed via monkey patching to
all or some particular parsers of a grammar and trace the actions
of these parsers, making use of the `call_stack__`, `history__`
and `moving_forward__`-hooks in the Grammar object.
and `moving_forward__`, `most_recent_error__`-hooks in the
Grammar-object.
This allows for more flexible and at the same time more focused
tracing of the parsing process than the (older) parsing-history-
......@@ -28,45 +29,63 @@ tracking-mechanism in the `parse` module, which will eventually
be superceded by tracing.
"""
from typing import Tuple, Optional
from typing import Tuple, Optional, List, Collection, Union
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node, REGEXP_PTYPE, TOKEN_PTYPE
from DHParser.log import HistoryRecord
from DHParser.parse import ParserError
from DHParser.parse import Parser, ParserError, Grammar, ParseFunc
#######################################################################
#
# tracing of the parsing process
# (a light-weight alternative to full history recording)
#
#######################################################################
__all__ = ('trace_history', 'with_all_descendants', 'with_unnamed_descendants', 'set_tracer')
def parse_proxy(self, text: StringView) -> Tuple[Optional[Node], StringView]:
def trace_history(self, text: StringView) -> Tuple[Optional[Node], StringView]:
grammar = self._grammar
location = grammar.document_length__ - text._len
grammar.call_stack__.append(
((self.repr if self.tag_name in (REGEXP_PTYPE, TOKEN_PTYPE)
else (self.pname or self.tag_name)), location))
grammar.moving_forward__ = True
error = []
try:
node, text_ = self._proxied_parse_method(text)
node, rest = self._parse(text)
except ParserError as pe:
error = [pe]
grammar.call_stack__.pop()
raise pe
# Mind that memoized parser calls will not appear in the history record!
# Don't track returning parsers except in case an error has occurred!
if grammar.moving_forward__ or error:
if grammar.moving_forward__ or grammar.most_recent_error__:
errors = [grammar.most_recent_error__] if grammar.most_recent_error__ else []
grammar.history__.append(HistoryRecord(
grammar.call_stack__, node, text, grammar.line_col__(text), error))
grammar.call_stack__, node, text, grammar.line_col__(text), errors))
grammar.moving_forward__ = False
grammar.call_stack__.pop()
return node, text
return node, rest
def with_all_descendants(root: Parser) -> List[Parser]:
"""Returns a list with the parser `root` and all of its descendants."""
descendants = []
def visit(parser: Parser):
descendants.append(parser)
root.apply(visit)
return descendants
def with_unnamed_descendants(root: Parser) -> List[Parser]:
"""Returns a list that contains the parser `root` and """
descendants = [root]
for parser in root.sub_parsers():
if not parser.pname:
descendants.extend(with_unnamed_descendants(parser))
return descendants
def set_tracer(parsers: Union[Parser, Collection[Parser]], tracer: Optional[ParseFunc]):
if isinstance(parsers, Parser):
parsers = [parsers]
for parser in parsers:
parser.set_proxy(tracer)
......@@ -26,19 +26,19 @@ scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.toolkit import re
from DHParser.stringview import StringView, EMPTY_STRING_VIEW, real_indices
from DHParser.stringview import StringView, EMPTY_STRING_VIEW, slow_real_indices
class TestStringView:
def test_real_indices(self):
assert real_indices(3, 5, 10) == (3, 5)
assert real_indices(None, None, 10) == (0, 10)
assert real_indices(-2, -1, 10) == (8, 9)
assert real_indices(-3, 11, 10) == (7, 10)
assert real_indices(-5, -12, 10) == (5, 0)
assert real_indices(-12, -5, 10) == (0, 5)
assert real_indices(7, 6, 10) == (7, 6)
assert real_indices(None, 0, 10) == (0, 0)
def test_slow_real_indices(self):
assert slow_real_indices(3, 5, 10) == (3, 5)
assert slow_real_indices(None, None, 10) == (0, 10)
assert slow_real_indices(-2, -1, 10) == (8, 9)
assert slow_real_indices(-3, 11, 10) == (7, 10)
assert slow_real_indices(-5, -12, 10) == (5, 0)
assert slow_real_indices(-12, -5, 10) == (0, 5)
assert slow_real_indices(7, 6, 10) == (7, 6)
assert slow_real_indices(None, 0, 10) == (0, 0)
def test_creation(self):
s = "0123456789"
......
#!/usr/bin/python3
"""test_trace.py - unit tests for the trace-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser import grammar_provider, with_all_descendants, with_unnamed_descendants, \
set_tracer, trace_history, log_parsing_history, start_logging
class TestTrace:
def setup(self):
minilang = """
expr = term { ("+"|"-") term }
term = factor { ("*"|"/") factor }
factor = /[0-9]+/~ | "(" expr ")"
"""
self.gr = grammar_provider(minilang)()
# def tear_down(self):
# os.remove('trace.log')
def test_trace(self):
all_desc = with_all_descendants(self.gr.root_parser__)
set_tracer(all_desc, trace_history)
st = self.gr('2*(3+4)')
start_logging()
log_parsing_history(self.gr, 'trace.log')
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment