Commit 225f8299 authored by di68kap's avatar di68kap
Browse files

Merge branch 'master' of https://gitlab.lrz.de/badw-it/DHParser

parents 23ff2da2 5618278f
......@@ -9,7 +9,7 @@ The best (and easiest) way to contribute at this stage is to try to implement
a small DSL with DHParser and report bugs and problems and make suggestions
for further development. Have a look at the README.md-file to get started.
Please the code from the git repository. Because code still changes quickly,
Please, use the code from the git repository. Because code still changes quickly,
any prepackaged builds may be outdated. The repository is here:
https://gitlab.lrz.de/badw-it/DHParser
......@@ -25,8 +25,8 @@ bigger projects, below:
Ideas for further development
=============================
Better error reporting
----------------------
Better error reporting I
------------------------
A problem with error reporting consists in the fact that at best only the very
first parsing error is reported accurately and then triggers a number of pure
......@@ -49,10 +49,44 @@ left recursion stack, etc. without making the parser guard (see
Also, a good variety of test cases would be desirable.
Optimizations
-------------
Better error reporting II
-------------------------
**Early discarding of nodes**:
Yet another means to improve error reporting would be to supplement
the required operator "&" with an its forbidden operator, say "!&"
that would raise an error message, if some parser matches at a place
where it really shouldn't. [Add some examples here.]
Optimization and Enhancement: Two-way-Traversal for AST-Transformation
----------------------------------------------------------------------
AST-transformation are done via a depth-first tree-traversal, that is,
the traversal function first goes all the way up the tree to the leaf
nodes and calls the transformation routines successively on the way
down. The routines are picked from the transformation-table which is a
dictionary mapping Node's tag names to sequences of transformation functions.
The
rationale for depth-first is that it is easier to transform a node, if
all of its children have already been transformed, i.e. simplified.
However, there are quite a few cases where depth-last would be better.
For example if you know you are going to discard a whole branch starting
from a certain node, it is a waste to transform all the child nodes
first.
As the tree is traversed anyway, there no good reason why certain
transformation routines should not already be called on the way up.
Of course, as most routines
more or less assume depth first, we would need two transformation tables
one for the routines that are called on the way up. And one for the
routines that are called on the way down.
This should be fairly easy to implement.
Optimization: Early discarding of nodes
---------------------------------------
Reason: `traverse_recursive` and `Node.result-setter` are top time consumers!
Allow to specify parsers/nodes, the result of which
......
......@@ -87,11 +87,11 @@ try:
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, \\
traverse, remove_children_if, merge_children, is_anonymous, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
......@@ -120,6 +120,14 @@ def compile_src(source, log_dir=''):
if __name__ == "__main__":
if len(sys.argv) > 1:
try:
grammar_file_name = os.path.basename(__file__).replace('Compiler.py', '.ebnf')
if grammar_changed({NAME}Grammar, grammar_file_name):
print("Grammar has changed. Please recompile Grammar first.")
sys.exit(1)
except FileNotFoundError:
print('Could not check for changed grammar, because grammar file "%s" was not found!'
% grammar_file_name)
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
......
......@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
......@@ -77,56 +77,53 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
class EBNFGrammar(Grammar):
r"""
Parser for an EBNF source file, with this grammar::
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and
# eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be
# ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure
# it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading
# or trailing whitespace of a regular expression
# will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols,
# e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE
# see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
r"""Parser for an EBNF source file, with this grammar:
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation"
......@@ -135,9 +132,12 @@ class EBNFGrammar(Grammar):
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
......@@ -147,18 +147,16 @@ class EBNFGrammar(Grammar):
group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(
Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_),
mandatory=1)
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)),
EOF, mandatory=2)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -385,6 +383,7 @@ class EBNFCompiler(Compiler):
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
......@@ -415,7 +414,7 @@ class EBNFCompiler(Compiler):
self.definitions = {} # type: Dict[str, str]
self.deferred_tasks = [] # type: List[Callable]
self.root_symbol = "" # type: str
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
self.directives = {'whitespace': self.WHITESPACE['vertical'],
'comment': '',
'literalws': {'right'},
'tokens': set(), # alt. 'preprocessor_tokens'
......@@ -494,6 +493,12 @@ class EBNFCompiler(Compiler):
return '\n'.join(compiler)
def verify_transformation_table(self, transtable):
"""
Checks for symbols that occur in the transformation-table but have
never been defined in the grammar. Usually, this kind of
inconsistency results from an error like a typo in the transformation
table.
"""
assert self._dirty_flag
table_entries = set(expand_table(transtable).keys()) - {'*', '+', '~'}
symbols = self.rules.keys()
......@@ -528,6 +533,8 @@ class EBNFCompiler(Compiler):
# add special fields for Grammar class
definitions.append((self.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % self.WHITESPACE_KEYWORD))
definitions.append(('wspR__', self.WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', self.WHITESPACE_KEYWORD
......@@ -906,9 +913,13 @@ class EBNFCompiler(Compiler):
return symbol
def on_literal(self, node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')' # return 'Token(' + ',
# '.merge_children([node.result]) + ')' ?
def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
def on_regexp(self, node: Node) -> str:
......@@ -942,6 +953,10 @@ class EBNFCompiler(Compiler):
return parser + ', '.join([arg] + name) + ')'
def on_whitespace(self, node: Node) -> str:
return 'whitespace__'
def on_list_(self, node) -> Set[str]:
assert node.children
return set(item.result.strip() for item in node.children)
......
......@@ -203,10 +203,10 @@ class HistoryRecord:
FAIL = "FAIL"
Snapshot = collections.namedtuple('Snapshot', ['line', 'column', 'stack', 'status', 'text'])
COLGROUP = '<colgroup>\n<col style="width:2%"/><col style="width:2%"/><col style="width:75"/>' \
'<col style="width:6%"/><col style="width:15%"/>\n</colgroup>'
HEADINGS = ('<tr><th>L</th><th>C</th><th>parser calling sequence</th>'
'<th>success</th><th>text to parse</th></tr>')
COLGROUP = '<colgroup>\n<col style="width:2%"/><col style="width:2%"/><col ' \
'style="width:75%"/><col style="width:6%"/><col style="width:15%"/>\n</colgroup>'
HEADINGS = ('<tr><th>L</th><th>C</th><th>parser call sequence</th>'
'<th>success</th><th>text matched or failed</th></tr>')
HTML_LEAD_IN = ('<!DOCTYPE html>\n'
'<html>\n<head>\n<meta charset="utf-8"/>\n<style>\n'
'td,th {font-family:monospace; '
......@@ -289,7 +289,7 @@ class HistoryRecord:
@property
def stack(self) -> str:
return "->".join((p.repr if p.ptype == ':RegExp' else p.name or p.ptype)
return "->".join((p.repr if p.ptype in {':RegExp', ':PlainText'} else p.name or p.ptype)
for p in self.call_stack)
@property
......
......@@ -48,6 +48,7 @@ __all__ = ('Parser',
'Grammar',
'PreprocessorToken',
'RegExp',
'Whitespace',
'RE',
'Token',
'mixin_comment',
......@@ -445,16 +446,18 @@ class Grammar:
history_tracking__: A flag indicating that the parsing history shall
be tracked
wsp_left_parser__: A parser for the default left-adjacent-whitespace
or the :class:zombie-parser if the
default is empty. The default whitespace will be used by parsers
:class:`Token` and, if no other parsers are passed to its constructor,
by parser :class:`RE`.
whitespace__: A parser for the implicit optional whitespace (or the
:class:zombie-parser if the default is empty). The default
whitespace will be used by parsers :class:`Token` and, if no
other parsers are passed to its constructor, by parser
:class:`RE`. It can also be place explicitly in the
EBNF-Grammar via the "~"-sign.
wsp_right_parser__: The same for the default right-adjacent-whitespace.
Both wsp_left_parser__ and wsp_right_parser__ merely serve the
purpose to avoid having to specify the default whitespace
explicitly every time an :class:`RE`-parser-object is created.
wsp_left_parser__: The same as ``whitespace`` for
left-adjacent-whitespace.
wsp_right_parser__: The same as ``whitespace`` for
right-adjacent-whitespace.
_dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset
......@@ -591,18 +594,22 @@ class Grammar:
# do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.wspL__:
self.wsp_left_parser__ = Whitespace(self.wspL__) # type: ParserBase
self.wsp_left_parser__.grammar = self
self.all_parsers__.add(self.wsp_left_parser__) # don't you forget about me...
else:
self.wsp_left_parser__ = ZOMBIE_PARSER
if self.wspR__:
self.wsp_right_parser__ = Whitespace(self.wspR__) # type: ParserBase
self.wsp_right_parser__.grammar = self
self.all_parsers__.add(self.wsp_right_parser__) # don't you forget about me...
if self.WSP__:
try:
probe = self.whitespace__
assert self.whitespace__.regexp.pattern == self.WSP__
except AttributeError:
self.whitespace__ = Whitespace(self.WSP__)
self.whitespace__.grammar = self
self.all_parsers__.add(self.whitespace__) # don't you forget about me...
else:
self.wsp_right_parser__ = ZOMBIE_PARSER
self.whitespace__ = ZOMBIE_PARSER
assert not self.wspL__ or self.wspL__ == self.WSP__
assert not self.wspR__ or self.wspR__ == self.WSP__
self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER
self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER
self.root__.apply(self._add_parser__)
......@@ -884,6 +891,9 @@ class PlainText(Parser):
return Node(self, self.text, True), text[self.len:]
return None, text
def __repr__(self):
return ("'%s'" if self.text.find("'") <= 0 else '"%s"') % self.text
class RegExp(Parser):
r"""
......@@ -941,6 +951,28 @@ class Whitespace(RegExp):
assert WHITESPACE_PTYPE == ":Whitespace"
#######################################################################
#######################################################################
#
# WARNING: The following code is hard to maintain, because it
# introduces a special case, i.e. a parser with child parsers that is
# not a descandent of the NaryOperator and, because it itneracts
# With the constructor of the Grammar class (see the instantiations of
# the Whitespace-class, there).
#
# That is all the more regrettable, as class RE basically just
# introduces syntactical sugar for
#
# Series(whitespace__, RegExp('something'), whitespace__)
#
# What to do? Throw the syntactical sugar out? :-( Or find a more
# robust solution for that kind of syntactical sugar? Or just leave
# it be?
#
######################################################################
######################################################################
class RE(Parser):
r"""
Regular Expressions with optional leading or trailing whitespace.
......@@ -982,9 +1014,8 @@ class RE(Parser):
wL (str or regexp): Left whitespace regular expression,
i.e. either ``None``, the empty string or a regular
expression (e.g. "\s*") that defines whitespace. An
empty string means no whitespace will be skipped,
``None`` means that the default whitespace will be
used.
empty string means no whitespace will be skipped; ``None``
means that the default whitespace will be used.
wR (str or regexp): Right whitespace regular expression.
See above.
name: The optional name of the parser.
......
......@@ -39,6 +39,7 @@ __all__ = ('ParserBase',
'MockParser',
'ZombieParser',
'ZOMBIE_PARSER',
'ZOMBIE_NODE',
'Node',
'mock_syntax_tree',
'flatten_sxpr')
......@@ -724,6 +725,9 @@ class Node(collections.abc.Sized):
return sum(child.tree_size() for child in self.children) + 1
ZOMBIE_NODE = Node(ZOMBIE_PARSER, '')
def mock_syntax_tree(sxpr):
"""
Generates a tree of nodes from an S-expression. The main purpose of this is
......
......@@ -30,7 +30,7 @@ for CST -> AST transformations.
import inspect
from functools import partial, reduce, singledispatch
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, MockParser
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, MockParser, ZOMBIE_NODE
from DHParser.toolkit import expand_table, smart_list, re, typing
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
List, Sequence, Union, Text
......@@ -108,7 +108,7 @@ def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
dispatch on the first parameter after the context parameter.
Decorating a transformation-function that has more than merely the
``node``-parameter with ``transformation_factory`` creates a
``context``-parameter with ``transformation_factory`` creates a
function with the same name, which returns a partial-function that
takes just the context-parameter.
......@@ -158,7 +158,7 @@ def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
f = singledispatch(f)
try:
if len(params) == 1 and issubclass(p1type, Container) \
and not issubclass(p1type, Text) and not issubclass(p1type, ByteString):
and not (issubclass(p1type, Text) or issubclass(p1type, ByteString)):
def gen_special(*args):
c = set(args) if issubclass(p1type, AbstractSet) else \
list(args) if issubclass(p1type, Sequence) else args
......@@ -241,8 +241,8 @@ def traverse(root_node: Node,
# Is this optimazation really needed?
if '__cache__' in processing_table:
# assume that processing table has already been expanded
table = processing_table
cache = processing_table['__cache__']
table = processing_table # type: ProcessingTableType
cache = processing_table['__cache__'] # type: Dictionary[str, List[Callable]]
else:
# normalize processing_table entries by turning single values
# into lists with a single value
......@@ -261,13 +261,15 @@ def traverse(root_node: Node,
# cache = {} # type: Dict[str, List[Callable]]
def traverse_recursive(context):
nonlocal cache
node = context[-1]
if node.children:
context.append(ZOMBIE_NODE)
for child in node.result:
context.append(child)
context[-1] = child
traverse_recursive(context) # depth first
node.error_flag = max(node.error_flag, child.error_flag) # propagate error flag
context.pop()
context.pop()
key = key_func(node)
try:
......@@ -385,8 +387,7 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
"""Checks whether the last node in the context has `ptype == TOKEN_PTYPE`
and it's content matches one of the given tokens. Leading and trailing
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match. If only ":" is given all anonymous tokens but no other
tokens are a match.
any token is a match.
"""
def stripped(nd: Node) -> str:
"""Removes leading and trailing whitespace-nodes from content."""
......@@ -453,26 +454,26 @@ def _reduce_child(node: Node, child: Node):
node.result = child.result
def _pick_child(context: List[Node], criteria: CriteriaType):
"""Returns the first child that meets the criteria."""
if isinstance(criteria, int):
try:
return context[-1].children[criteria]
except IndexError:
return None
elif isinstance(criteria, str):
for child in context[-1].children:
if child.tag_name == criteria:
return child
return None
else: # assume criteria has type ConditionFunc
for child in context[-1].children:
context.append(child)
evaluation = criteria(context)
context.pop()
if evaluation:
return child
return None
# def _pick_child(context: List[Node], criteria: CriteriaType):
# """Returns the first child that meets the criteria."""
# if isinstance(criteria, int):
# try:
# return context[-1].children[criteria]
# except IndexError:
# return None
# elif isinstance(criteria, str):
# for child in context[-1].children:
# if child.tag_name == criteria:
# return child
# return None
# else: # assume criteria has type ConditionFunc
# for child in context[-1].children:
# context.append(child)
# evaluation = criteria(context)
# context.pop()
# if evaluation:
# return child
# return None
#######################################################################
......@@ -598,15 +599,16 @@ def flatten(context: List[Node], condition: Callable=is_anonymous, recursive: bo
node = context[-1]
if node.children:
new_result = [] # type: List[Node]
context.append(ZOMBIE_NODE)
for child in node.children:
context.append(child)
context[-1] = child
if child.children and condition(context):
if recursive:
flatten(context, condition, recursive)
new_result.extend(child.children)
else:
new_result.append(child)
context.pop()
context.pop()
node.result = tuple(new_result)
......
......@@ -51,7 +51,7 @@ EBNF_TEMPLATE = r"""-grammar
#
#######################################################################
document = //~ { WORD } §EOF # root parser: a sequence of words preceded by whitespace
document = ~ { WORD } §EOF # root parser: a sequence of words preceded by whitespace
# until the end of file
#######################################################################
......@@ -75,6 +75,7 @@ F1: two words
TEST_DOCUMENT_TEMPLATE = r'''[match:document]
M1: """This is a sequence of words
extending over several lines"""
M2: """ This sequence contains leading whitespace"""
[fail:document]
F1: """This test should fail, because neither
......@@ -282,7 +283,12 @@ def main():
parameter) or runs a quick self-test.
"""
if len(sys.argv) > 1:
if os.path.exists(sys.argv[1]) and os.path.isfile(sys.argv[1]):