Commit 06d5fa5a authored by eckhart's avatar eckhart

- static parser checking infrastructure readded

parent 90f7d39f
......@@ -37,32 +37,41 @@ __all__ = ('CONFIG_PRESET',)
CONFIG_PRESET = dict() # type: Dict[Hashable, Any]
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
########################################################################
#
# parser configuration
#
########################################################################
# Flattens anonymous nodes, by removing the node and adding its children
# to the parent node in place of the removed node. This is a very useful
# optimization that should be turned on except for learning or teaching
# purposes, in which case a concrete syntax tree that more diligently
# reflects the parser structure may be helpful.
# Default value: True
CONFIG_PRESET['flatten_tree_while_parsing'] = True
# # Carries out static analysis on the the parser tree before parsing starts
# # to ensure its correctness. Possible values are:
# # 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# # already. Any errors it revealed will be located in the EBNF
# # source code. This naturally only works for parser that are
# # generated from an EBNF syntax declaration.
# # 'late' - static analysis is carried out when instantiating a Grammar
# # (sub-)class. This works also for parser trees that are
# # handwritten in Python using the parser classes from module
# # `parse`. It slightly slows down instantiation of Grammar
# # classes, though.
# # 'none' - no static analysis at all (not recommended).
# # Default value: "early"
# CONFIG_PRESET['static_analysis'] = "early"
# Maximum depth of parser's left recursion
# This limit should not be set to high, because the left recursion
# catching algorithm can take exponential time, and, of course,
# because of python's recursion depth limit
# Left recursion handling can be turned off by setting this value to zero
# Default value: 5
CONFIG_PRESET['left_recursion_depth'] = 5
# Maximum allowed number of retries after errors where the parser
# would exit before the complete document has been parsed. Should
# not be set too high, because automatic retry works poorly.
# This value does not affect the @resume-directive.
# Default value: 3
CONFIG_PRESET['max_parser_dropouts'] = 3
########################################################################
#
# syntaxtree configuration
#
########################################################################
# Defines the output format for the serialization of syntax trees.
# Possible values are:
......@@ -83,8 +92,43 @@ CONFIG_PRESET['default_serialization'] = "S-expression"
# form by DhParser.syntaxtree.serialize() and other functions
# that use serialize(), like, for example, the reporting functions
# in DHParser.testing.
# Default value: 120
CONFIG_PRESET['flatten_sxpr_threshold'] = 120
########################################################################
#
# ebnf compiler configuration
#
########################################################################
# Carries out static analysis on the the parser tree before parsing starts
# to ensure its correctness. Possible values are:
# 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# already. Any errors it revealed will be located in the EBNF
# source code. This naturally only works for parser that are
# generated from an EBNF syntax declaration.
# 'late' - static analysis is carried out when instantiating a Grammar
# (sub-)class. This works also for parser trees that are
# handwritten in Python using the parser classes from module
# `parse`. It slightly slows down instantiation of Grammar
# classes, though.
# 'none' - no static analysis at all (not recommended).
# Default value: "early"
CONFIG_PRESET['static_analysis'] = "none"
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
########################################################################
#
# testing framework configuration
#
########################################################################
# Allows (coarse-grained) parallelization for running tests via the
# Python multiprocessing module
# Default value: True
......
......@@ -95,7 +95,7 @@ from DHParser import logging, is_filename, load_if_file, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
......@@ -330,7 +330,7 @@ def get_grammar() -> {NAME}Grammar:
TRANSFORMER_FACTORY = '''
def {NAME}Transform() -> TransformationDict:
def {NAME}Transform() -> TransformationFunc:
return partial(traverse, processing_table={NAME}_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
......@@ -915,20 +915,20 @@ class EBNFCompiler(Compiler):
self.definitions.update(definitions)
grammar_python_src = self.assemble_parser(definitions, node)
# if get_config_value('static_analysis') == 'early':
# try:
# grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
# self.grammar_name)
# _ = grammar_class()
# grammar_python_src = grammar_python_src.replace(
# 'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
# except NameError:
# pass # undefined name in the grammar are already caught and reported
# except GrammarError as error:
# for sym, prs, err in error.errors:
# symdef_node = self.rules[sym][0]
# err.pos = self.rules[sym][0].pos
# self.tree.add_error(symdef_node, err)
if get_config_value('static_analysis') == 'early':
try:
grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
self.grammar_name)
_ = grammar_class()
grammar_python_src = grammar_python_src.replace(
'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
except NameError:
pass # undefined name in the grammar are already caught and reported
except GrammarError as error:
for sym, prs, err in error.errors:
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
return grammar_python_src
......
This diff is collapsed.
......@@ -51,7 +51,7 @@ __all__ = ('TransformationDict',
'replace_by_single_child',
'reduce_single_child',
'replace_or_reduce',
'replace_parser',
'exchange_parser',
'collapse',
'collapse_if',
# 'merge_children',
......@@ -654,7 +654,7 @@ def replace_or_reduce(context: List[Node], condition: Callable = is_named):
@transformation_factory
def replace_parser(context: List[Node], name: str):
def exchange_parser(context: List[Node], name: str):
"""
Replaces the parser of a Node with a mock parser with the given
name.
......
......@@ -12,26 +12,47 @@
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
#######################################################################
#
#: Structure and Components
#: Expressions
#
#######################################################################
expression = term { EXPR_OP~ term}
term = factor { TERM_OP~ factor}
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
expression = addition | subtraction | term
addition = (term "+" (addition|term)) | (subtraction "+" term)
subtraction = expression "-" term
#######################################################################
#
#: "Leaf"-Expressions
#: Terms
#
#######################################################################
EXPR_OP = /\+/ | /-/
TERM_OP = /\*/ | /\//
SIGN = /-/
term = multiplication | division | factor
multiplication = factor ["*"] term
division = term "/" (multiplication | factor)
#######################################################################
#
#: Factors
#
#######################################################################
factor = [sign] ( NUMBER | VARIABLE | group )
sign = PLUS | MINUS
group = "(" §expression ")"
#######################################################################
#
#: Tokens
#
#######################################################################
PLUS = /\+/
MINUS = /-/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
......@@ -12,7 +12,7 @@ from functools import partial
import os
import sys
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
sys.path.extend(['../../', '../', './'])
try:
import regex as re
......@@ -29,7 +29,7 @@ from DHParser import logging, is_filename, load_if_file, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
......@@ -58,8 +58,11 @@ def get_preprocessor() -> PreprocessorFunc:
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
addition = Forward()
expression = Forward()
source_hash__ = "588e988cfef8ace70244463ad9c64fc7"
multiplication = Forward()
term = Forward()
source_hash__ = "6707df7f53e835c1e97330f132324ce8"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -70,13 +73,17 @@ class ArithmeticGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
SIGN = RegExp('-')
TERM_OP = Alternative(RegExp('\\*'), RegExp('/'))
EXPR_OP = Alternative(RegExp('\\+'), RegExp('-'))
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__))
factor = Series(Option(SIGN), Alternative(NUMBER, VARIABLE, group), ZeroOrMore(Alternative(VARIABLE, group)))
term = Series(factor, ZeroOrMore(Series(TERM_OP, dwsp__, factor)))
expression.set(Series(term, ZeroOrMore(Series(EXPR_OP, dwsp__, term))))
MINUS = RegExp('-')
PLUS = RegExp('\\+')
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__), mandatory=1)
sign = Alternative(PLUS, MINUS)
factor = Series(Option(sign), Alternative(NUMBER, VARIABLE, group))
division = Series(term, Series(DropToken("/"), dwsp__), Alternative(multiplication, factor))
multiplication.set(Series(factor, Option(Series(DropToken("*"), dwsp__)), term))
term.set(Alternative(multiplication, division, factor))
subtraction = Series(expression, Series(DropToken("-"), dwsp__), term)
addition.set(Alternative(Series(term, Series(DropToken("+"), dwsp__), Alternative(addition, term)), Series(subtraction, Series(DropToken("+"), dwsp__), term)))
expression.set(Alternative(addition, subtraction, term))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......@@ -97,21 +104,15 @@ def get_grammar() -> ArithmeticGrammar:
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"<": flatten_anonymous_nodes,
"expression": [],
"term": [reduce_single_child],
"factor": [reduce_single_child],
"group": [remove_tokens('(', ')'), replace_by_single_child],
"NUMBER": [],
"VARIABLE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
# "<": flatten_anonymous_nodes,
"expression, term, sign, group, factor": [replace_by_single_child],
}
def ArithmeticTransform() -> TransformationDict:
def ArithmeticTransform() -> TransformationFunc:
return partial(traverse, processing_table=Arithmetic_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
......
#!/usr/bin/python3
import sys
LOGGING = True
sys.path.extend(['../../', '../', './'])
from DHParser import grammar_provider, logging, CONFIG_PRESET
CONFIG_PRESET['ast_serialization'] = "S-expression"
CONFIG_PRESET['test_parallelization'] = False
CONFIG_PRESET['left_recursion_depth'] = 2
arithmetic_syntax = """
expression = addition | subtraction
addition = (expression | term) "+" (expression | term)
subtraction = (expression | term) "-" (expression | term)
term = multiplication | division
multiplication = (term | factor) "*" (term | factor)
division = (term | factor) "/" (term | factor)
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" §expression ")"
SIGN = /[+-]/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
"""
if __name__ == "__main__":
arithmetic = grammar_provider(arithmetic_syntax)()
assert arithmetic
with logging():
syntax_tree = arithmetic("(a + b) * (a - b)")
......@@ -5,8 +5,6 @@ M3: "-2.71828"
M4: "-x"
M5: "(2 + x)"
M6: "-(a * b)"
M7: "4x"
M8: "-2x"
[fail:factor]
F1: "x4"
......
......@@ -2,8 +2,11 @@
[match:term]
M1: "2 * 4"
M2: "2 / 4"
M3: "(2 + 4) * (2 - 4)"
M4: "(a + b)(a - b)"
M3: "(2 * 4)(4 / 4)"
M4: "(2 + 4) * (2 - 4)"
M5: "(a + b)(a - b)"
M6: "4x"
M7: "-2x"
[ast:term]
......
[match:expression]
M1: "2 + x"
M2: "5 + 3 + 2"
M3: "5 - 3 - 2"
M4: "5 + 3 - 2"
M5: "5 - 3 + 2"
[ast:expression]
......
......@@ -8,7 +8,7 @@ import sys
LOGGING = False
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
sys.path.extend(['../../', '../', './'])
scriptpath = os.path.dirname(__file__)
......@@ -24,6 +24,7 @@ except ModuleNotFoundError:
CONFIG_PRESET['ast_serialization'] = "S-expression"
CONFIG_PRESET['test_parallelization'] = True
def recompile_grammar(grammar_src, force):
......
......@@ -14,42 +14,24 @@
#######################################################################
#
#: Expressions
#: Structure and Components
#
#######################################################################
expression = addition | subtraction
addition = (expression | term) "+" (expression | term)
subtraction = (expression | term) "-" (expression | term)
#######################################################################
#
#: Terms
#
#######################################################################
term = multiplication | division
multiplication = (term | factor) "*" (term | factor)
division = (term | factor) "/" (term | factor)
expression = term { EXPR_OP term}
term = factor { TERM_OP factor}
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
#######################################################################
#
#: Factors
#: "Leaf"-Expressions
#
#######################################################################
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
#######################################################################
#
#: Tokens
#
#######################################################################
EXPR_OP = ( /\+/ | /-/ ) ~
TERM_OP = ( /\*/ | /\// ) ~
SIGN = /[-+]/
SIGN = /[+-]/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
......@@ -12,7 +12,7 @@ from functools import partial
import os
import sys
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
sys.path.extend(['../../', '../', './'])
try:
import regex as re
......@@ -29,7 +29,7 @@ from DHParser import logging, is_filename, load_if_file, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
......@@ -42,11 +42,11 @@ from DHParser import logging, is_filename, load_if_file, \
#
#######################################################################
def ArithmeticPreprocessor(text):
def ArithmeticFlatTreePreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return ArithmeticPreprocessor
return ArithmeticFlatTreePreprocessor
#######################################################################
......@@ -55,12 +55,11 @@ def get_preprocessor() -> PreprocessorFunc:
#
#######################################################################
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
class ArithmeticFlatTreeGrammar(Grammar):
r"""Parser for an ArithmeticFlatTree source file.
"""
expression = Forward()
term = Forward()
source_hash__ = "ba85985ea3917dbb90568f216cb1cbb2"
source_hash__ = "d0074a0a7b827e932608af86a5390de7"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -71,26 +70,24 @@ class ArithmeticGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
SIGN = RegExp('[+-]')
SIGN = RegExp('[-+]')
TERM_OP = Series(Alternative(RegExp('\\*'), RegExp('/')), dwsp__)
EXPR_OP = Series(Alternative(RegExp('\\+'), RegExp('-')), dwsp__)
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__))
factor = Series(Option(SIGN), Alternative(NUMBER, VARIABLE, group), ZeroOrMore(Alternative(VARIABLE, group)))
division = Series(Alternative(term, factor), Series(DropToken("/"), dwsp__), Alternative(term, factor))
multiplication = Series(Alternative(term, factor), Series(DropToken("*"), dwsp__), Alternative(term, factor))
term.set(Alternative(multiplication, division))
subtraction = Series(Alternative(expression, term), Series(DropToken("-"), dwsp__), Alternative(expression, term))
addition = Series(Alternative(expression, term), Series(DropToken("+"), dwsp__), Alternative(expression, term))
expression.set(Alternative(addition, subtraction))
term = Series(factor, ZeroOrMore(Series(TERM_OP, factor)))
expression.set(Series(term, ZeroOrMore(Series(EXPR_OP, term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
def get_grammar() -> ArithmeticFlatTreeGrammar:
global GLOBALS
try:
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
grammar = GLOBALS.ArithmeticFlatTree_00000001_grammar_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_grammar_singleton = ArithmeticGrammar()
GLOBALS.ArithmeticFlatTree_00000001_grammar_singleton = ArithmeticFlatTreeGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.Arithmetic_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
GLOBALS.ArithmeticFlatTree_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.ArithmeticFlatTree_00000001_grammar_singleton
return grammar
......@@ -100,30 +97,29 @@ def get_grammar() -> ArithmeticGrammar:
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
# "<": flatten_anonymous_nodes,
ArithmeticFlatTree_AST_transformation_table = {
# AST Transformations for the ArithmeticFlatTree-grammar
"<": flatten_anonymous_nodes,
"expression": [],
"term": [],
"factor": [],
"group": [],
"term": [reduce_single_child],
"factor": [reduce_single_child],
"group": [remove_tokens('(', ')'), replace_by_single_child],
"NUMBER": [],
"VARIABLE": []
# ":Token": reduce_single_child,
# "*": replace_by_single_child
"VARIABLE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
}
def ArithmeticTransform() -> TransformationDict:
return partial(traverse, processing_table=Arithmetic_AST_transformation_table.copy())
def ArithmeticFlatTreeTransform() -> TransformationFunc:
return partial(traverse, processing_table=ArithmeticFlatTree_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
try:
transformer = GLOBALS.Arithmetic_00000001_transformer_singleton
transformer = GLOBALS.ArithmeticFlatTree_00000001_transformer_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_transformer_singleton = ArithmeticTransform()
transformer = GLOBALS.Arithmetic_00000001_transformer_singleton
GLOBALS.ArithmeticFlatTree_00000001_transformer_singleton = ArithmeticFlatTreeTransform()
transformer = GLOBALS.ArithmeticFlatTree_00000001_transformer_singleton
return transformer
......@@ -133,12 +129,12 @@ def get_transformer() -> TransformationFunc:
#
#######################################################################
class ArithmeticCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a Arithmetic source file.
class ArithmeticFlatTreeCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a ArithmeticFlatTree source file.
"""
def __init__(self):
super(ArithmeticCompiler, self).__init__()
super(ArithmeticFlatTreeCompiler, self).__init__()
def _reset(self):
super()._reset()
......@@ -159,12 +155,12 @@ class ArithmeticCompiler(Compiler):
# return node
def get_compiler() -> ArithmeticCompiler:
def get_compiler() -> ArithmeticFlatTreeCompiler:
try:
compiler = GLOBALS.Arithmetic_00000001_compiler_singleton
compiler = GLOBALS.ArithmeticFlatTree_00000001_compiler_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_compiler_singleton = ArithmeticCompiler()
compiler = GLOBALS.Arithmetic_00000001_compiler_singleton
GLOBALS.ArithmeticFlatTree_00000001_compiler_singleton = ArithmeticFlatTreeCompiler()
compiler = GLOBALS.ArithmeticFlatTree_00000001_compiler_singleton
return compiler
......@@ -216,4 +212,4 @@ if __name__ == "__main__":
else:
print(result.as_xml() if isinstance(result, Node) else result)
else: