Commit c2ead30a authored by eckhart's avatar eckhart
Browse files

trace.py: more intuitive history recording with negative lookahead parsers

parent 3d1ad694
......@@ -258,9 +258,13 @@ def clear_logs(logfile_types=frozenset(['.cst', '.ast', '.log'])):
#
#######################################################################
class NoneNode(FrozenNode):
def __bool__(self):
return False
NONE_TAG = ":None"
NONE_NODE = FrozenNode(NONE_TAG, '')
NONE_NODE = NoneNode(NONE_TAG, '')
def freeze_callstack(call_stack: List[CallItem]) -> Tuple[CallItem, ...]:
......
......@@ -503,10 +503,9 @@ class Parser:
# otherwise also cache None-results
visited[location] = (None, rest)
else:
# assert node._pos < 0 or node == EMPTY_NODE
# if node._pos != EMPTY_NODE:
# assert node._pos < 0 or node is EMPTY_NODE
node._pos = location
# assert node._pos >= 0 or node == EMPTY_NODE, \
# assert node._pos >= 0 or node is EMPTY_NODE, \
# str("%i < %i" % (grammar.document_length__, location))
if (grammar.last_rb__loc__ < location
and (grammar.memoization__ or location in grammar.recursion_locations__)):
......@@ -3123,7 +3122,7 @@ class Synonym(UnaryParser):
if self.drop_content:
return EMPTY_NODE, text
if not self.anonymous:
if node == EMPTY_NODE:
if node is EMPTY_NODE:
return Node(self.tag_name, ''), text
if node.tag_name.startswith(':'):
# eliminate anonymous child-node on the fly
......
......@@ -29,7 +29,7 @@ from typing import Tuple, Optional, List, Iterable, Union
from DHParser.error import Error, RESUME_NOTICE
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node, REGEXP_PTYPE, TOKEN_PTYPE, WHITESPACE_PTYPE, ZOMBIE_TAG
from DHParser.log import freeze_callstack, HistoryRecord
from DHParser.log import freeze_callstack, HistoryRecord, NONE_NODE
from DHParser.parse import Grammar, Parser, ParserError, ParseFunc
from DHParser.toolkit import cython, line_col
......@@ -99,8 +99,11 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
# Mind that memoized parser calls will not appear in the history record!
# Don't track returning parsers except in case an error has occurred!
if ((grammar.moving_forward__ or (node and not self.anonymous))
and (self.tag_name != WHITESPACE_PTYPE)):
if ((self.tag_name != WHITESPACE_PTYPE)
and (grammar.moving_forward__
or (not self.anonymous
and (node
or grammar.history__ and grammar.history__[-1].node)))):
# record history
# TODO: Make dropping insignificant whitespace from history configurable
delta = text._len - rest._len
......@@ -108,7 +111,8 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
lc = line_col(grammar.document_lbreaks__, location)
record = HistoryRecord(grammar.call_stack__, hnd, rest, lc, [])
cs_len = len(record.call_stack)
if (not grammar.history__ or lc != grammar.history__[-1].line_col
if (not grammar.history__ or not node
or lc != grammar.history__[-1].line_col
or record.call_stack != grammar.history__[-1].call_stack[:cs_len]
or self == grammar.start_parser__):
grammar.history__.append(record)
......
......@@ -82,7 +82,7 @@ files and directories for sure, but those will not concern us for now::
Introduction.md - An introduction and appetizer for DHParser
In order to verify that the installation works, you can run the
`dhparser.py- script and, when asked, chose "3" for the self-test::
`dhparser.py`-script and, when asked, chose "3" for the self-test::
$ python DHParser/scripts/dhparser.py
Usage:
......@@ -615,14 +615,6 @@ parser matched, the last column displays exactly that section of the text that
the parser did match. If the parser did not match, the last column displays
the text that still lies ahead and has not yet been parsed.
.. note:: You may wonder, why in the parsing history `EOF` seems to match.
But in fact it is not EOF that matched, but only the part of EOF after
the "negative lookahead"-operator '!' (see "poetry.ebnf" for the definition
of EOF), which is the regular expression for an arbitrary character `/./`.
Now if that latter part of EOF matched, becuse of the negative lookahead
operator in front of it, EOF did in fact not match. (The Visualization
of negative lookahead operators might be ammended in the future.)
In our concrete example, we can see that the parser "WORD" matches "Life", but
not "Life’s" or "’s". And this ultimately leads to the failure of the parsing
process as a whole. The most simple solution would be to add the apostrophe to
......@@ -692,7 +684,7 @@ other hand, it might be convenient to have it in the tree never the less...)
The answer to these questions is that what our compilation
script yields is the *concrete syntax tree* of the parsed text. The concrete syntax tree
captures every minute syntactic detail described in the grammar and found in the text.
we have to transform it into an
we have to transform it into an
*abstract syntax tree* first, which is called thus because it abstracts from
all details that deem us irrelevant. Now, which details we consider as
irrelevant is almost entirely up to ourselves. And we should think carefully
......@@ -832,10 +824,10 @@ Now that everything is set, let's have a look at the result::
That is much better. There is but one slight blemish in the output: While all
nodes left a named nodes, i.e. nodes associated with a named parser, there are a
few anonymous <ANONYMOUS_Text__>-nodes. Here is a little exercise: Do away with those
<ANONYMOUS_Text__>-nodes by replacing them by something semantically more meaningful.
few anonymous `<ANONYMOUS_Text__>`-nodes. Here is a little exercise: Do away with those
`<ANON`YMOUS_Text__>`-nodes by replacing them by something semantically more meaningful.
Hint: Add a new symbol "delimiter" in the grammar definition "poetry.ebnf". (An
alternative strategy to extending the grammar would be to use the
``replace_parser`` operator. In the AST-transformation-table ANONYMOUS nodes are
indicated by a leading ':', thus ins the AST-transformation-table you have to write
":Text" instead pf "ANONYMOUS_Text__" which is merely the XML-compatible name.)
`:Text` instead pf `ANONYMOUS_Text__` which is merely the XML-compatible name.)
documentation_src/parsing_history.png

72.9 KB | W: | H:

documentation_src/parsing_history.png

93.3 KB | W: | H:

documentation_src/parsing_history.png
documentation_src/parsing_history.png
documentation_src/parsing_history.png
documentation_src/parsing_history.png
  • 2-up
  • Swipe
  • Onion skin
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment