The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 06d5fa5a authored by eckhart's avatar eckhart
Browse files

- static parser checking infrastructure readded

parent 90f7d39f
......@@ -37,32 +37,41 @@ __all__ = ('CONFIG_PRESET',)
CONFIG_PRESET = dict() # type: Dict[Hashable, Any]
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
########################################################################
#
# parser configuration
#
########################################################################
# Flattens anonymous nodes, by removing the node and adding its children
# to the parent node in place of the removed node. This is a very useful
# optimization that should be turned on except for learning or teaching
# purposes, in which case a concrete syntax tree that more diligently
# reflects the parser structure may be helpful.
# Default value: True
CONFIG_PRESET['flatten_tree_while_parsing'] = True
# # Carries out static analysis on the the parser tree before parsing starts
# # to ensure its correctness. Possible values are:
# # 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# # already. Any errors it revealed will be located in the EBNF
# # source code. This naturally only works for parser that are
# # generated from an EBNF syntax declaration.
# # 'late' - static analysis is carried out when instantiating a Grammar
# # (sub-)class. This works also for parser trees that are
# # handwritten in Python using the parser classes from module
# # `parse`. It slightly slows down instantiation of Grammar
# # classes, though.
# # 'none' - no static analysis at all (not recommended).
# # Default value: "early"
# CONFIG_PRESET['static_analysis'] = "early"
# Maximum depth of parser's left recursion
# This limit should not be set to high, because the left recursion
# catching algorithm can take exponential time, and, of course,
# because of python's recursion depth limit
# Left recursion handling can be turned off by setting this value to zero
# Default value: 5
CONFIG_PRESET['left_recursion_depth'] = 5
# Maximum allowed number of retries after errors where the parser
# would exit before the complete document has been parsed. Should
# not be set too high, because automatic retry works poorly.
# This value does not affect the @resume-directive.
# Default value: 3
CONFIG_PRESET['max_parser_dropouts'] = 3
########################################################################
#
# syntaxtree configuration
#
########################################################################
# Defines the output format for the serialization of syntax trees.
# Possible values are:
......@@ -83,8 +92,43 @@ CONFIG_PRESET['default_serialization'] = "S-expression"
# form by DhParser.syntaxtree.serialize() and other functions
# that use serialize(), like, for example, the reporting functions
# in DHParser.testing.
# Default value: 120
CONFIG_PRESET['flatten_sxpr_threshold'] = 120
########################################################################
#
# ebnf compiler configuration
#
########################################################################
# Carries out static analysis on the the parser tree before parsing starts
# to ensure its correctness. Possible values are:
# 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# already. Any errors it revealed will be located in the EBNF
# source code. This naturally only works for parser that are
# generated from an EBNF syntax declaration.
# 'late' - static analysis is carried out when instantiating a Grammar
# (sub-)class. This works also for parser trees that are
# handwritten in Python using the parser classes from module
# `parse`. It slightly slows down instantiation of Grammar
# classes, though.
# 'none' - no static analysis at all (not recommended).
# Default value: "early"
CONFIG_PRESET['static_analysis'] = "none"
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
########################################################################
#
# testing framework configuration
#
########################################################################
# Allows (coarse-grained) parallelization for running tests via the
# Python multiprocessing module
# Default value: True
......
......@@ -95,7 +95,7 @@ from DHParser import logging, is_filename, load_if_file, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
......@@ -330,7 +330,7 @@ def get_grammar() -> {NAME}Grammar:
TRANSFORMER_FACTORY = '''
def {NAME}Transform() -> TransformationDict:
def {NAME}Transform() -> TransformationFunc:
return partial(traverse, processing_table={NAME}_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
......@@ -915,20 +915,20 @@ class EBNFCompiler(Compiler):
self.definitions.update(definitions)
grammar_python_src = self.assemble_parser(definitions, node)
# if get_config_value('static_analysis') == 'early':
# try:
# grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
# self.grammar_name)
# _ = grammar_class()
# grammar_python_src = grammar_python_src.replace(
# 'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
# except NameError:
# pass # undefined name in the grammar are already caught and reported
# except GrammarError as error:
# for sym, prs, err in error.errors:
# symdef_node = self.rules[sym][0]
# err.pos = self.rules[sym][0].pos
# self.tree.add_error(symdef_node, err)
if get_config_value('static_analysis') == 'early':
try:
grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
self.grammar_name)
_ = grammar_class()
grammar_python_src = grammar_python_src.replace(
'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
except NameError:
pass # undefined name in the grammar are already caught and reported
except GrammarError as error:
for sym, prs, err in error.errors:
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
return grammar_python_src
......
......@@ -93,11 +93,6 @@ __all__ = ('Parser',
########################################################################
LEFT_RECURSION_DEPTH = 8 # type: int
# because of python's recursion depth limit, this value ought not to be
# set too high. PyPy allows higher values than CPython
MAX_DROPOUTS = 3 # type: int
# stop trying to recover parsing after so many errors
EMPTY_NODE = FrozenNode(':EMPTY__', '')
......@@ -283,8 +278,8 @@ class Parser:
return self.visited[location]
# break left recursion at the maximum allowed depth
if grammar.left_recursion_handling__:
if self.recursion_counter[location] > LEFT_RECURSION_DEPTH:
if grammar.left_recursion_depth__:
if self.recursion_counter[location] > grammar.left_recursion_depth__:
grammar.recursion_locations__.add(location)
return None, text
self.recursion_counter[location] += 1
......@@ -297,10 +292,6 @@ class Parser:
try:
# PARSER CALL: run _parse() method
node, rest = self._parse(text)
# TODO: infinite loop protection. Definition "infinite loop":
# 1. same parser, 2. same postion, 3. same recursion depth
# if is_logging() and self.pname:
# print(len(text), len(grammar.call_stack__), bool(node), location in self.visited, self.pname, text)
except ParserError as error:
# does this play well with variable setting? add rollback clause here? tests needed...
gap = len(text) - len(error.rest)
......@@ -334,8 +325,7 @@ class Parser:
raise ParserError(Node(self.tag_name, result).with_pos(location),
text, first_throw=False)
if grammar.left_recursion_handling__:
if grammar.left_recursion_depth__:
self.recursion_counter[location] -= 1
# don't clear recursion_locations__ !!!
......@@ -634,13 +624,13 @@ class Grammar:
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
# static_analysis_pending__: True as long as no static analysis (see the method
# with the same name for more information) has been done to check
# parser tree for correctness. Static analysis
# is done at instantiation and the flag is then set to false, but it
# can also be carried out once the class has been generated
# (by DHParser.ebnf.EBNFCompiler) and then be set to false in the
# definition of the grammar class already.
static_analysis_pending__: True as long as no static analysis (see the method
with the same name for more information) has been done to check
parser tree for correctness. Static analysis
is done at instantiation and the flag is then set to false, but it
can also be carried out once the class has been generated
(by DHParser.ebnf.EBNFCompiler) and then be set to false in the
definition of the grammar class already.
python__src__: For the purpose of debugging and inspection, this field can
take the python src of the concrete grammar class
......@@ -725,15 +715,19 @@ class Grammar:
In some situations it may drastically increase parsing time, so
it is safer to leave it on. (Default: on)
left_recursion_handling__: Turns left recursion handling on or off.
If turned off, a recursion error will result in case of left
recursion.
flatten_tree__: If True (default), anonymous nodes will be flattened
during parsing already. This greatly reduces the concrete syntax
tree and simplifies and speeds up abstract syntax tree generation.
The initial value will be read from the config variable
'flatten_tree_while_parsing' upon class instantiation.
left_recursion_depth__: the maximum allowed depth for left-recursion.
A depth of zero means that no left recursion handling will
take place. See 'left_recursion_depth' in config.py.
max_parser_dropouts__: Maximum allowed number of retries after errors
where the parser would exit before the complete document has
been parsed. See config.py
"""
python_src__ = '' # type: str
root__ = PARSER_PLACEHOLDER # type: Parser
......@@ -743,7 +737,7 @@ class Grammar:
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
# static_analysis_pending__ = [True] # type: List[bool]
static_analysis_pending__ = [True] # type: List[bool]
@classmethod
......@@ -788,8 +782,9 @@ class Grammar:
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
self.memoization__ = True # type: bool
self.left_recursion_handling__ = True # type: bool
self.flatten_tree__ = get_config_value('flatten_tree_while_parsing') # type: bool
self.flatten_tree__ = get_config_value('flatten_tree_while_parsing') # type: bool
self.left_recursion_depth__ = get_config_value('left_recursion_depth') # type: int
self.max_parser_dropouts__ = get_config_value('max_parser_dropouts') # type: int
self._reset__()
# prepare parsers in the class, first
......@@ -804,15 +799,15 @@ class Grammar:
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
# if self.__class__.static_analysis_pending__ \
# and get_config_value('static_analysis') in {'early', 'late'}:
# try:
# result = self.static_analysis()
# if result:
# raise GrammarError(result)
# self.__class__.static_analysis_pending__.pop()
# except (NameError, AttributeError):
# pass # don't fail the initialization of PLACEHOLDER
if self.__class__.static_analysis_pending__ \
and get_config_value('static_analysis') in {'early', 'late'}:
try:
result = self.static_analysis()
if result:
raise GrammarError(result)
self.__class__.static_analysis_pending__.pop()
except (NameError, AttributeError):
pass # don't fail the initialization of PLACEHOLDER
def __getitem__(self, key):
......@@ -945,7 +940,7 @@ class Grammar:
self.document_lbreaks__ = linebreaks(document) if self.history_tracking__ else []
self.last_rb__loc__ = -1 # rollback location
parser = self[start_parser] if isinstance(start_parser, str) else start_parser
self.start_parser__ = parser
self.start_parser__ = parser.parser if isinstance(parser, Forward) else parser
assert parser.grammar == self, "Cannot run parsers from a different grammar object!" \
" %s vs. %s" % (str(self), str(parser.grammar))
result = None # type: Optional[Node]
......@@ -965,7 +960,7 @@ class Grammar:
result, 'Parser "%s" did not match empty document.' % str(parser),
Error.PARSER_DID_NOT_MATCH)
while rest and len(stitches) < MAX_DROPOUTS:
while rest and len(stitches) < self.max_parser_dropouts__:
result, rest = parser(rest)
if rest:
fwd = rest.find("\n") + 1 or len(rest)
......@@ -989,7 +984,7 @@ class Grammar:
+ (("! trying to recover"
+ (" but stopping history recording at this point."
if self.history_tracking__ else "..."))
if len(stitches) < MAX_DROPOUTS
if len(stitches) < self.max_parser_dropouts__
else " too often! Terminating parser.")
error_code = Error.PARSER_STOPPED_BEFORE_END
stitches.append(Node(ZOMBIE_TAG, skip).with_pos(tail_pos(stitches)))
......@@ -1074,23 +1069,23 @@ class Grammar:
return line_col(self.document_lbreaks__, self.document_length__ - len(text))
# def static_analysis(self) -> List[GrammarErrorType]:
# """
# EXPERIMENTAL (does not catch infinite loops due to regular expressions...)
#
# Checks the parser tree statically for possible errors. At the moment only
# infinite loops will be detected.
# :return: a list of error-tuples consisting of the narrowest containing
# named parser (i.e. the symbol on which the failure occurred),
# the actual parser that failed and an error object.
# """
# error_list = [] # type: List[GrammarErrorType]
#
# def visit_parser(parser: Parser) -> None:
# nonlocal error_list
#
# self.root_parser__.apply(visit_parser)
# return error_list
def static_analysis(self) -> List[GrammarErrorType]:
"""
EXPERIMENTAL (does not catch infinite loops due to regular expressions...)
Checks the parser tree statically for possible errors. At the moment only
infinite loops will be detected.
:return: a list of error-tuples consisting of the narrowest containing
named parser (i.e. the symbol on which the failure occurred),
the actual parser that failed and an error object.
"""
error_list = [] # type: List[GrammarErrorType]
def visit_parser(parser: Parser) -> None:
nonlocal error_list
self.root_parser__.apply(visit_parser)
return error_list
def dsl_error_msg(parser: Parser, error_str: str) -> str:
......
......@@ -51,7 +51,7 @@ __all__ = ('TransformationDict',
'replace_by_single_child',
'reduce_single_child',
'replace_or_reduce',
'replace_parser',
'exchange_parser',
'collapse',
'collapse_if',
# 'merge_children',
......@@ -654,7 +654,7 @@ def replace_or_reduce(context: List[Node], condition: Callable = is_named):
@transformation_factory
def replace_parser(context: List[Node], name: str):
def exchange_parser(context: List[Node], name: str):
"""
Replaces the parser of a Node with a mock parser with the given
name.
......
......@@ -12,26 +12,47 @@
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
#######################################################################
#
#: Structure and Components
#: Expressions
#
#######################################################################
expression = term { EXPR_OP~ term}
term = factor { TERM_OP~ factor}
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
expression = addition | subtraction | term
addition = (term "+" (addition|term)) | (subtraction "+" term)
subtraction = expression "-" term
#######################################################################
#
#: "Leaf"-Expressions
#: Terms
#
#######################################################################
EXPR_OP = /\+/ | /-/
TERM_OP = /\*/ | /\//
SIGN = /-/
term = multiplication | division | factor
multiplication = factor ["*"] term
division = term "/" (multiplication | factor)
#######################################################################
#
#: Factors
#
#######################################################################
factor = [sign] ( NUMBER | VARIABLE | group )
sign = PLUS | MINUS
group = "(" §expression ")"
#######################################################################
#
#: Tokens
#
#######################################################################
PLUS = /\+/
MINUS = /-/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
......@@ -12,7 +12,7 @@ from functools import partial
import os
import sys
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
sys.path.extend(['../../', '../', './'])
try:
import regex as re
......@@ -29,7 +29,7 @@ from DHParser import logging, is_filename, load_if_file, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
......@@ -58,8 +58,11 @@ def get_preprocessor() -> PreprocessorFunc:
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
addition = Forward()
expression = Forward()
source_hash__ = "588e988cfef8ace70244463ad9c64fc7"
multiplication = Forward()
term = Forward()
source_hash__ = "6707df7f53e835c1e97330f132324ce8"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -70,13 +73,17 @@ class ArithmeticGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
SIGN = RegExp('-')
TERM_OP = Alternative(RegExp('\\*'), RegExp('/'))
EXPR_OP = Alternative(RegExp('\\+'), RegExp('-'))
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__))
factor = Series(Option(SIGN), Alternative(NUMBER, VARIABLE, group), ZeroOrMore(Alternative(VARIABLE, group)))
term = Series(factor, ZeroOrMore(Series(TERM_OP, dwsp__, factor)))
expression.set(Series(term, ZeroOrMore(Series(EXPR_OP, dwsp__, term))))
MINUS = RegExp('-')
PLUS = RegExp('\\+')
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__), mandatory=1)
sign = Alternative(PLUS, MINUS)
factor = Series(Option(sign), Alternative(NUMBER, VARIABLE, group))
division = Series(term, Series(DropToken("/"), dwsp__), Alternative(multiplication, factor))
multiplication.set(Series(factor, Option(Series(DropToken("*"), dwsp__)), term))
term.set(Alternative(multiplication, division, factor))
subtraction = Series(expression, Series(DropToken("-"), dwsp__), term)
addition.set(Alternative(Series(term, Series(DropToken("+"), dwsp__), Alternative(addition, term)), Series(subtraction, Series(DropToken("+"), dwsp__), term)))
expression.set(Alternative(addition, subtraction, term))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......@@ -97,21 +104,15 @@ def get_grammar() -> ArithmeticGrammar:
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"<": flatten_anonymous_nodes,
"expression": [],
"term": [reduce_single_child],
"factor": [reduce_single_child],
"group": [remove_tokens('(', ')'), replace_by_single_child],
"NUMBER": [],
"VARIABLE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
# "<": flatten_anonymous_nodes,
"expression, term, sign, group, factor": [replace_by_single_child],
}
def ArithmeticTransform() -> TransformationDict:
def ArithmeticTransform() -> TransformationFunc:
return partial(traverse, processing_table=Arithmetic_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
......
#!/usr/bin/python3
import sys
LOGGING = True
sys.path.extend(['../../', '../', './'])
from DHParser import grammar_provider, logging, CONFIG_PRESET
CONFIG_PRESET['ast_serialization'] = "S-expression"
CONFIG_PRESET['test_parallelization'] = False
CONFIG_PRESET['left_recursion_depth'] = 2
arithmetic_syntax = """
expression = addition | subtraction
addition = (expression | term) "+" (expression | term)
subtraction = (expression | term) "-" (expression | term)
term = multiplication | division
multiplication = (term | factor) "*" (term | factor)
division = (term | factor) "/" (term | factor)
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" §expression ")"
SIGN = /[+-]/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
"""
if __name__ == "__main__":
arithmetic = grammar_provider(arithmetic_syntax)()
assert arithmetic
with logging():
syntax_tree = arithmetic("(a + b) * (a - b)")
......@@ -5,8 +5,6 @@ M3: "-2.71828"
M4: "-x"
M5: "(2 + x)"
M6: "-(a * b)"
M7: "4x"
M8: "-2x"
[fail:factor]
F1: "x4"
......
......@@ -2,8 +2,11 @@
[match:term]
M1: "2 * 4"
M2: "2 / 4"
M3: "(2 + 4) * (2 - 4)"
M4: "(a + b)(a - b)"
M3: "(2 * 4)(4 / 4)"
M4: "(2 + 4) * (2 - 4)"
M5: "(a + b)(a - b)"
M6: "4x"
M7: "-2x"
[ast:term]
......
[match:expression]
M1: "2 + x"
M2: "5 + 3 + 2"
M3: "5 - 3 - 2"
M4: "5 + 3 - 2"
M5: "5 - 3 + 2"
[ast:expression]
......
......@@ -8,7 +8,7 @@ import sys
LOGGING = False
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
sys.path.extend(['../../', '../', './'])
scriptpath = os.path.dirname(__file__)
......@@ -24,6 +24,7 @@ except ModuleNotFoundError:
CONFIG_PRESET['ast_serialization'] = "S-expression"
CONFIG_PRESET['test_parallelization'] = True
def recompile_grammar(grammar_src, force):
......
......@@ -14,42 +14,24 @@
#######################################################################
#
#: Expressions