Commit f9ea4738 authored by eckhart's avatar eckhart

- DHParser/parse.py Parser.__call__() and class Grammar: added left recursion warning

parent a33838c1
......@@ -102,7 +102,7 @@ from DHParser import logging, is_filename, load_if_file, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
reduce_anonymous_nodes, error_on, recompile_grammar, GLOBALS
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
......
......@@ -60,6 +60,15 @@ __all__ = ('get_ebnf_preprocessor',
'CompilerFactoryFunc')
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
########################################################################
#
# EBNF scanning
......@@ -562,7 +571,7 @@ class EBNFCompiler(Compiler):
tt_name = self.grammar_name + '_AST_transformation_table'
transtable = [tt_name + ' = {',
' # AST Transformations for the ' + self.grammar_name + '-grammar']
transtable.append(' "<": reduce_anonymous_nodes,')
transtable.append(' "<": flatten_anonymous_nodes,')
for name in self.rules:
transformations = '[]'
# rule = self.definitions[name]
......@@ -1276,11 +1285,3 @@ def compile_ebnf(ebnf_source: str, branding: str = 'DSL') \
get_ebnf_transformer(),
get_ebnf_compiler(branding, ebnf_source))
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
......@@ -73,6 +73,7 @@ class Error:
REDECLARED_TOKEN_WARNING = ErrorCode(120)
UNUSED_ERROR_HANDLING_WARNING = ErrorCode(130)
LEFT_RECURSION_WARING = ErrorCode(140)
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING = ErrorCode(610)
......
......@@ -39,6 +39,7 @@ cdef class Grammar:
cdef public list history__
cdef public bint moving_forward__
cdef public set recursion_locations__
cdef public int last_recursion_location__
cdef class PreprocessorToken(Parser):
pass
......
......@@ -39,7 +39,8 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, FrozenNode, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing, cython
from DHParser.toolkit import sane_parser_name, escape_control_characters, get_config_value,\
CONFIG_PRESET, re, typing, cython
from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, Optional, Any
......@@ -82,8 +83,13 @@ __all__ = ('Parser',
'Forward')
########################################################################
#
# Presets
#
########################################################################
EMPTY_NODE = FrozenNode(':EMPTY__', '')
CONFIG_PRESET['flatten_tree_while_parsing'] = True
########################################################################
......@@ -98,6 +104,7 @@ LEFT_RECURSION_DEPTH = 8 # type: int
# set too high. PyPy allows higher values than CPython
MAX_DROPOUTS = 3 # type: int
# stop trying to recover parsing after so many errors
EMPTY_NODE = FrozenNode(':EMPTY__', '')
class ParserError(Exception):
......@@ -330,6 +337,11 @@ class Parser:
if location in self.visited:
node, rest = self.visited[location]
# TODO: maybe add a warning about occurrence of left-recursion here?
if location != grammar.last_recursion_location__:
grammar.tree__.add_error(node, Error("Left recursion encountered. "
"Refactor grammar to avoid slow parsing.",
node.pos, Error.LEFT_RECURSION_WARING))
grammar.last_recursion_location__ = location
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
elif grammar.memoization__:
......@@ -671,8 +683,11 @@ class Grammar:
recursion_locations__: Stores the locations where left recursion was
detected. Needed to provide minimal memoization for the left
recursion detection algorithm, but, strictly speaking, superfluous
if full memoization is enabled. (See :func:`add_parser_guard` and its
local function :func:`guarded_call`)
if full memoization is enabled. (See :func:`Parser.__call__()`)
last_recursion_location__: Last location where left recursion was
detected. This is used to avoid reduplicating warning messages
about left recursion.
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
......@@ -682,6 +697,12 @@ class Grammar:
left_recursion_handling__: Turns left recursion handling on or off.
If turned off, a recursion error will result in case of left
recursion.
flatten_tree__: If True (default), anonymous nodes will be flattened
during parsing already. This greatly reduces the concrete syntax
tree and simplifies and speeds up abstract syntax tree generation.
The initial value will be read from the config variable
'flatten_tree_while_parsing' upon class instantiation.
"""
python_src__ = '' # type: str
root__ = PARSER_PLACEHOLDER # type: Parser
......@@ -736,6 +757,7 @@ class Grammar:
self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool
self.left_recursion_handling__ = True # type: bool
self.flatten_tree__ = get_config_value('flatten_tree_while_parsing') # type: bool
self._reset__()
# prepare parsers in the class, first
......@@ -784,6 +806,7 @@ class Grammar:
# also needed for call stack tracing
self.moving_forward__ = False # type: bool
self.recursion_locations__ = set() # type: Set[int]
self.last_recursion_location__ = -1 # type: int
@property
......@@ -1247,40 +1270,52 @@ class DropWhitespace(Whitespace):
class MetaParser(Parser):
# TODO: Allow to turn optimization off
def _return_value(self, node: Optional[Node]) -> Node:
# Node(self.tag_name, node) # unoptimized code
"""
Generate a return node if a single node has been returned from
any descendant parsers. Empty nodes will be dropped silently.
If `self` is an unnamed parser, a non-empty descendant node
will be passed through. If the descendant node is anonymous,
it will be dropped and only its result will be kept.
In all other cases or if the optimization is turned off by
setting `grammar.flatten_tree__` to False, a new node will be
generated and the descendant node will be its gingle child.
"""
assert node is None or isinstance(node, Node)
if node:
if self.pname:
if node.tag_name[0] == ':': # faster than node.is_anonymous()
return Node(self.tag_name, node._result)
return Node(self.tag_name, node)
return node
if self.pname:
return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
if self.grammar.flatten_tree__:
if node:
if self.pname:
if node.tag_name[0] == ':': # faster than node.is_anonymous()
return Node(self.tag_name, node._result)
return Node(self.tag_name, node)
return node
elif self.pname:
return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
return Node(self.tag_name, node or ()) # unoptimized code
@cython.locals(N=cython.int)
def _return_values(self, results: Tuple[Node, ...]) -> Node:
# return Node(self.tag_name, results) # unoptimized code
assert isinstance(results, tuple)
N = len(results)
if N > 1:
nr = []
for child in results:
if child.children and child.tag_name[0] == ':': # faster than c.is_anonymous():
nr.extend(child.children)
else:
nr.append(child)
return Node(self.tag_name, tuple(nr))
if self.grammar.flatten_tree__:
nr = []
for child in results:
if child.children and child.tag_name[0] == ':': # faster than c.is_anonymous():
nr.extend(child.children)
else:
nr.append(child)
return Node(self.tag_name, tuple(nr))
return Node(self.tag_name, results) # unoptimized code
elif N == 1:
return self._return_value(results[0])
elif self.pname:
return Node(self.tag_name, ())
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
elif self.grammar.flatten_tree__:
if self.pname:
return Node(self.tag_name, ())
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
return Node(self.tag_name, results) # unoptimized code
class UnaryParser(MetaParser):
......
......@@ -47,7 +47,7 @@ __all__ = ('TransformationDict',
'traverse',
'is_named',
'update_attr',
'reduce_anonymous_nodes',
'flatten_anonymous_nodes',
'replace_by_single_child',
'reduce_single_child',
'replace_or_reduce',
......@@ -559,12 +559,13 @@ def _reduce_child(node: Node, child: Node):
# _reduce_child(context[-1], child)
def reduce_anonymous_nodes(context: List[Node]):
def flatten_anonymous_nodes(context: List[Node]):
"""
Reduces (non-recursively) all anonymous non-leaf children by adding
their result to the result of the last node in the context. If the
last node is anonymous itself, it will be replaced by a single child.
Also drops any empty anonymous nodes.
Flattens non-recursively all anonymous non-leaf children by adding
their result to the result of the parent node. Empty anonymous children
will be dropped altogether. If the parent node (i.e. `context[-1]) is
anonymous itself and has only one child node, it will be replaced by
its single child node.
"""
node = context[-1]
if node.children:
......
......@@ -33,7 +33,7 @@ from DHParser import logging, is_filename, load_if_file, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, reduce_anonymous_nodes, GLOBALS
error_on, recompile_grammar, flatten_anonymous_nodes, GLOBALS
#######################################################################
......@@ -98,7 +98,7 @@ def get_grammar() -> ArithmeticGrammar:
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"<": reduce_anonymous_nodes,
"<": flatten_anonymous_nodes,
"expression": [],
"term": [reduce_single_child],
"factor": [reduce_single_child],
......
......@@ -33,7 +33,7 @@ from DHParser import logging, is_filename, load_if_file, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
reduce_anonymous_nodes, error_on, recompile_grammar, GLOBALS
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
#######################################################################
......@@ -107,7 +107,7 @@ def get_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table = {
# AST Transformations for the EBNF-grammar
"<": reduce_anonymous_nodes,
"<": flatten_anonymous_nodes,
"syntax": [],
"definition": [],
"directive": [],
......
......@@ -22,7 +22,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Po
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
reduce_single_child, replace_by_single_child, remove_whitespace, reduce_anonymous_nodes, \
reduce_single_child, replace_by_single_child, remove_whitespace, flatten_anonymous_nodes, \
flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
from DHParser.log import logging
......@@ -219,7 +219,7 @@ drop_expendables = remove_children_if(lambda context: is_empty(context) or
LaTeX_AST_transformation_table = {
# AST Transformations for the LaTeX-grammar
"<": [reduce_anonymous_nodes, flatten_structure],
"<": [flatten_anonymous_nodes, flatten_structure],
"latexdoc": [],
"preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
"document": [flatten_structure],
......
......@@ -26,7 +26,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, Token, DropToken, DropWhitespace, \
traverse, remove_children_if, is_anonymous, GLOBALS, reduce_anonymous_nodes, \
traverse, remove_children_if, is_anonymous, GLOBALS, flatten_anonymous_nodes, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -194,7 +194,7 @@ def get_grammar() -> XMLGrammar:
XML_AST_transformation_table = {
# AST Transformations for the XML-grammar
"<": [reduce_anonymous_nodes, remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"<": [flatten_anonymous_nodes, remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"document": [flatten(lambda context: context[-1].tag_name == 'prolog', recursive=False)],
"prolog": [],
"XMLDecl": [],
......
......@@ -19,7 +19,7 @@ try:
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_preprocessor, \
PreprocessorToken, Whitespace, DropWhitespace, DropToken, reduce_anonymous_nodes, \
PreprocessorToken, Whitespace, DropWhitespace, DropToken, flatten_anonymous_nodes, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
......@@ -140,7 +140,7 @@ def get_grammar() -> XMLSnippetGrammar:
XMLSnippet_AST_transformation_table = {
# AST Transformations for the XMLSnippet-grammar
"<": reduce_anonymous_nodes,
"<": flatten_anonymous_nodes,
"document": [],
"prolog": [],
"XMLDecl": [],
......
......@@ -52,7 +52,7 @@ class TestCompileFunctions:
assert callable(factory)
parser = factory()
result = parser("5 + 3 * 4")
assert not result.error_flag, str(result.errors_sorted)
assert not is_error(result.error_flag), str(result.errors_sorted)
result = parser("5A + 4B ** 4C")
assert is_error(result.error_flag)
......
......@@ -24,9 +24,9 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.toolkit import compile_python_object, get_config_value, set_config_value
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error
from DHParser.error import Error, is_error
from DHParser.parse import Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError, MetaParser, EMPTY_NODE
......@@ -70,8 +70,8 @@ class TestInfiLoopsAndRecursion:
parser = grammar_provider(minilang)()
assert parser
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
assert snippet == str(syntax_tree)
assert not is_error(syntax_tree.error_flag), str(syntax_tree.errors_sorted)
assert snippet == syntax_tree.content, str(syntax_tree)
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
log_parsing_history(parser, "test_LeftRecursion_direct")
......@@ -87,8 +87,8 @@ class TestInfiLoopsAndRecursion:
parser = grammar_provider(minilang)()
assert parser
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.errors_sorted
assert snippet == str(syntax_tree)
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
assert snippet == syntax_tree.content
def test_indirect_left_recursion1(self):
minilang = """
......@@ -101,14 +101,14 @@ class TestInfiLoopsAndRecursion:
assert parser
snippet = "8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.errors_sorted
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
snippet = "7 + 8 * 4"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.errors_sorted
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
snippet = "9 + 8 * (4 + 3)"
syntax_tree = parser(snippet)
assert not syntax_tree.error_flag, syntax_tree.errors_sorted
assert snippet == str(syntax_tree)
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
assert snippet == syntax_tree.content
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
log_parsing_history(parser, "test_LeftRecursion_indirect")
......@@ -788,7 +788,10 @@ class TestEarlyTokenWhitespaceDrop:
class TestMetaParser:
def test_meta_parser(self):
save = get_config_value('flatten_tree_while_parsing')
set_config_value('flatten_tree_while_parsing', True)
mp = MetaParser()
mp.grammar = Grammar() # override placeholder warning
mp.pname = "named"
mp.tag_name = mp.pname
nd = mp._return_value(Node('tagged', 'non-empty'))
......@@ -828,6 +831,7 @@ class TestMetaParser:
assert not nd.children
assert not nd.content
assert mp._return_value(None) == EMPTY_NODE
set_config_value('flatten_tree_while_parsing', save)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment