Commit 6e5b22ea authored by eckhart's avatar eckhart

- Early static analysis... work in progress!!!

parent 61e8e4f8
......@@ -26,8 +26,9 @@ import os
import platform
import stat
import DHParser.ebnf
from DHParser.compile import Compiler, compile_source
from DHParser.ebnf import EBNFCompiler, grammar_changed, \
from DHParser.ebnf import EBNFCompiler, grammar_changed, DHPARSER_IMPORTS, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors
......@@ -37,12 +38,12 @@ from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node
from DHParser.transform import TransformationFunc
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re, typing
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable, Optional, Callable
re
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable, Optional, \
Callable, Generator
__all__ = ('DHPARSER_IMPORTS',
'GrammarError',
__all__ = ('DefinitionError',
'CompilationError',
'load_compiler_suite',
'compileDSL',
......@@ -71,41 +72,6 @@ AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
END_SECTIONS_MARKER = "END OF DHPARSER-SECTIONS"
dhparserdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
DHPARSER_IMPORTS = '''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
DHPARSER_MAIN = '''
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
......@@ -156,19 +122,23 @@ class DSLException(Exception):
"""
Base class for DSL-exceptions.
"""
def __init__(self, errors):
def __init__(self, errors: Union[List[Error], Generator[Error, None, None]]):
assert isinstance(errors, Iterator) or isinstance(errors, list) \
or isinstance(errors, tuple)
self.errors = errors
self.errors = list(errors)
def __str__(self):
return '\n'.join(str(err) for err in self.errors)
if len(self.errors) == 1:
return str(self.errors[0])
return '\n' + '\n'.join(("%i. " % (i + 1) + str(err))
for i, err in enumerate(self.errors))
# return '\n'.join(str(err) for err in self.errors)
class GrammarError(DSLException):
class DefinitionError(DSLException):
"""
Raised when (already) the grammar of a domain specific language (DSL)
contains errors.
contains errors. Usually, these are repackaged parse.GrammarError(s).
"""
def __init__(self, errors, grammar_src):
super().__init__(errors)
......@@ -178,7 +148,8 @@ class GrammarError(DSLException):
class CompilationError(DSLException):
"""
Raised when a string or file in a domain specific language (DSL)
contains errors.
contains errors. These can also contain definition errors that
have been caught early.
"""
def __init__(self, errors, dsl_text, dsl_grammar, AST, result):
super().__init__(errors)
......@@ -215,7 +186,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
parser_py = cast(str, result)
if has_errors(messages):
raise GrammarError(only_errors(messages), grammar_src)
raise DefinitionError(only_errors(messages), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, r'\w+Grammar$')()
else:
# assume that dsl_grammar is a ParserHQ-object or Grammar class
......@@ -360,7 +331,7 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_transformer(),
get_ebnf_compiler(compiler_suite, source))
if has_errors(messages):
raise GrammarError(only_errors(messages), source)
raise DefinitionError(only_errors(messages), source)
preprocessor = get_ebnf_preprocessor
parser = get_ebnf_grammar
ast = get_ebnf_transformer
......@@ -476,7 +447,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
ebnf_compiler = cast(EBNFCompiler, compiler1)
global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \
AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \
DHPARSER_MAIN, DHPARSER_IMPORTS
DHPARSER_MAIN
f = None
try:
f = open(rootname + 'Compiler.py', 'r', encoding="utf-8")
......@@ -503,7 +474,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
if RX_WHITESPACE.fullmatch(outro):
outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS
imports = DHParser.ebnf.DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(preprocessor):
preprocessor = ebnf_compiler.gen_preprocessor_skeleton()
if RX_WHITESPACE.fullmatch(ast):
......
......@@ -32,11 +32,12 @@ import os
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
GrammarError
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, typing
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, compile_python_object, typing
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content
......@@ -67,6 +68,48 @@ __all__ = ('get_ebnf_preprocessor',
########################################################################
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
CONFIG_PRESET['early_static_analysis'] = True # do a static analysis right after ebnf compilation
########################################################################
#
# source code support
#
########################################################################
dhparserdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
DHPARSER_IMPORTS = '''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
########################################################################
......@@ -799,6 +842,7 @@ class EBNFCompiler(Compiler):
+ ' source file'
+ ('. Grammar:' if self.grammar_source and show_source else '.')]
definitions.append(('parser_initialization__', '["upon instantiation"]'))
definitions.append(('static_analysis_pending__', 'True'))
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.grammar_source, __version__)))
......@@ -877,7 +921,17 @@ class EBNFCompiler(Compiler):
# node.error_flag = max(node.error_flag, nd.error_flag)
self.definitions.update(definitions)
return self.assemble_parser(definitions, node)
grammar_python_src = self.assemble_parser(definitions, node)
if get_config_value('early_static_analysis'):
grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src, self.grammar_name)
try:
_ = grammar_class()
except GrammarError as error:
for sym, prs, err in error.errors:
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
return grammar_python_src
def on_definition(self, node: Node) -> Tuple[str, str]:
......
This diff is collapsed.
......@@ -43,7 +43,7 @@ text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ }
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+
EOF = !/./
\ No newline at end of file
......@@ -57,7 +57,7 @@ class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "d9a1a1b431a3185dab127be165a37719"
source_hash__ = "ece0314c999ac86f22796331c05efd62"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'//'
......@@ -65,8 +65,8 @@ class BibTeXGrammar(Grammar):
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('(?i).'))
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(Lookahead(RegExp('(?i)%')), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(Lookahead(RegExp('(?i)%')), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD = Series(RegExp('(?i)\\w+'), wsp__)
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
......
......@@ -68,7 +68,7 @@ Match test "entry" for parser "entry" failed:
organization = {Wikipedia}
}
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | /(?i)(?=%)/ ~}+".
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | &/(?i)%/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
......
......@@ -31,8 +31,8 @@ from DHParser import compile_source
from DHParser.error import has_errors, Error
from DHParser.syntaxtree import WHITESPACE_PTYPE
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, \
get_ebnf_compiler, compile_ebnf
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider
get_ebnf_compiler, compile_ebnf, DHPARSER_IMPORTS
from DHParser.dsl import CompilationError, compileDSL, grammar_provider
from DHParser.testing import grammar_unit
......@@ -724,6 +724,9 @@ class TestAllOfResume:
assert len(st.errors_sorted) == 1
class TestStaticAnalysis:
pass
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -29,10 +29,10 @@ from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error, is_error
from DHParser.parse import Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError, MetaParser, EMPTY_NODE
UnknownParserError, MetaParser, GrammarError, EMPTY_NODE
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider, CompilationError
from DHParser.syntaxtree import Node
......@@ -115,7 +115,16 @@ class TestInfiLoopsAndRecursion:
def test_infinite_loops(self):
minilang = """forever = { // } \n"""
snippet = " "
parser = grammar_provider(minilang)()
try:
parser_class = grammar_provider(minilang)
except CompilationError as error:
assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
print(error)
save = get_config_value('early_static_analysis')
set_config_value('early_static_analysis', False)
parser_class = grammar_provider(minilang)
parser = parser_class()
set_config_value('early_static_analysis', save)
syntax_tree = parser(snippet)
assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
res = parser.static_analysis()
......@@ -837,6 +846,46 @@ class TestMetaParser:
assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name
class TestStaticAnalysis:
bibtex_grammar = """# bad bibtex-grammar
@ whitespace = /\s*/
@ ignorecase = True
@ comment = //
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ } # BOOM !!!
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+ # BOOM !!!
EOF = !/./
"""
def test_static_analysis(self):
gr_class = grammar_provider(self.bibtex_grammar, 'BibTex')
try:
gr_instance = gr_class()
except GrammarError as error:
affected_parsers = {e[0] for e in error.errors}
assert affected_parsers == {'CONTENT_STRING', 'COMMA_TERMINATED_STRING'}
assert all(e[2].code == Error.INFINITE_LOOP for e in error.errors)
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -30,6 +30,7 @@ from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, remove_expendables, remove_empty, \
replace_by_single_child, reduce_single_child, flatten
from DHParser.dsl import grammar_provider
from DHParser.error import Error
from DHParser.testing import get_report, grammar_unit, unit_from_file, \
reset_unit
from DHParser.log import logging
......@@ -261,8 +262,8 @@ class TestLookahead:
"category": {
"match": {
1: """Mountains: big:
K2""",
2: """Rivers:""" # allowed because lookahaead failure occurs at end of file and is mandatory!
K2""", # case 1: matches only with lookahead (but should not fail in a test)
2: """Rivers:""" # case 2: lookahaead failure occurs at end of file and is mandatory. (should not fail as a test)
},
"fail": {
6: """Mountains: big:"""
......@@ -310,6 +311,13 @@ class TestLookahead:
assert not cst.error_flag
def test_unit_lookahead(self):
gr = self.grammar_fac()
# Case 1: Lookahead string is part of the test case; parser fails but for the lookahead
result = gr(self.cases['category']['match'][1], 'category', True)
assert any(e.code == Error.PARSER_LOOKAHEAD_MATCH_ONLY for e in result.errors)
# Case 2: Lookahead string is not part of the test case; parser matches but for the mandatory continuation
result = gr(self.cases['category']['match'][2], 'category', True)
assert any(e.code == Error.MANDATORY_CONTINUATION_AT_EOF for e in result.errors)
errata = grammar_unit(self.cases, self.grammar_fac, self.trans_fac)
assert not errata, str(errata)
errata = grammar_unit(self.fail_cases, self.grammar_fac, self.trans_fac)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment