Commit 6e5b22ea authored by eckhart's avatar eckhart
Browse files

- Early static analysis... work in progress!!!

parent 61e8e4f8
......@@ -26,8 +26,9 @@ import os
import platform
import stat
import DHParser.ebnf
from DHParser.compile import Compiler, compile_source
from DHParser.ebnf import EBNFCompiler, grammar_changed, \
from DHParser.ebnf import EBNFCompiler, grammar_changed, DHPARSER_IMPORTS, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors
......@@ -37,12 +38,12 @@ from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node
from DHParser.transform import TransformationFunc
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re, typing
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable, Optional, Callable
re
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable, Optional, \
Callable, Generator
__all__ = ('DHPARSER_IMPORTS',
'GrammarError',
__all__ = ('DefinitionError',
'CompilationError',
'load_compiler_suite',
'compileDSL',
......@@ -71,41 +72,6 @@ AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
END_SECTIONS_MARKER = "END OF DHPARSER-SECTIONS"
dhparserdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
DHPARSER_IMPORTS = '''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
DHPARSER_MAIN = '''
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
......@@ -156,19 +122,23 @@ class DSLException(Exception):
"""
Base class for DSL-exceptions.
"""
def __init__(self, errors):
def __init__(self, errors: Union[List[Error], Generator[Error, None, None]]):
assert isinstance(errors, Iterator) or isinstance(errors, list) \
or isinstance(errors, tuple)
self.errors = errors
self.errors = list(errors)
def __str__(self):
return '\n'.join(str(err) for err in self.errors)
if len(self.errors) == 1:
return str(self.errors[0])
return '\n' + '\n'.join(("%i. " % (i + 1) + str(err))
for i, err in enumerate(self.errors))
# return '\n'.join(str(err) for err in self.errors)
class GrammarError(DSLException):
class DefinitionError(DSLException):
"""
Raised when (already) the grammar of a domain specific language (DSL)
contains errors.
contains errors. Usually, these are repackaged parse.GrammarError(s).
"""
def __init__(self, errors, grammar_src):
super().__init__(errors)
......@@ -178,7 +148,8 @@ class GrammarError(DSLException):
class CompilationError(DSLException):
"""
Raised when a string or file in a domain specific language (DSL)
contains errors.
contains errors. These can also contain definition errors that
have been caught early.
"""
def __init__(self, errors, dsl_text, dsl_grammar, AST, result):
super().__init__(errors)
......@@ -215,7 +186,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
parser_py = cast(str, result)
if has_errors(messages):
raise GrammarError(only_errors(messages), grammar_src)
raise DefinitionError(only_errors(messages), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, r'\w+Grammar$')()
else:
# assume that dsl_grammar is a ParserHQ-object or Grammar class
......@@ -360,7 +331,7 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_transformer(),
get_ebnf_compiler(compiler_suite, source))
if has_errors(messages):
raise GrammarError(only_errors(messages), source)
raise DefinitionError(only_errors(messages), source)
preprocessor = get_ebnf_preprocessor
parser = get_ebnf_grammar
ast = get_ebnf_transformer
......@@ -476,7 +447,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
ebnf_compiler = cast(EBNFCompiler, compiler1)
global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \
AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \
DHPARSER_MAIN, DHPARSER_IMPORTS
DHPARSER_MAIN
f = None
try:
f = open(rootname + 'Compiler.py', 'r', encoding="utf-8")
......@@ -503,7 +474,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
if RX_WHITESPACE.fullmatch(outro):
outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS
imports = DHParser.ebnf.DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(preprocessor):
preprocessor = ebnf_compiler.gen_preprocessor_skeleton()
if RX_WHITESPACE.fullmatch(ast):
......
......@@ -32,11 +32,12 @@ import os
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
GrammarError
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, typing
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, compile_python_object, typing
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content
......@@ -67,6 +68,48 @@ __all__ = ('get_ebnf_preprocessor',
########################################################################
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
CONFIG_PRESET['early_static_analysis'] = True # do a static analysis right after ebnf compilation
########################################################################
#
# source code support
#
########################################################################
dhparserdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
DHPARSER_IMPORTS = '''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
########################################################################
......@@ -799,6 +842,7 @@ class EBNFCompiler(Compiler):
+ ' source file'
+ ('. Grammar:' if self.grammar_source and show_source else '.')]
definitions.append(('parser_initialization__', '["upon instantiation"]'))
definitions.append(('static_analysis_pending__', 'True'))
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.grammar_source, __version__)))
......@@ -877,7 +921,17 @@ class EBNFCompiler(Compiler):
# node.error_flag = max(node.error_flag, nd.error_flag)
self.definitions.update(definitions)
return self.assemble_parser(definitions, node)
grammar_python_src = self.assemble_parser(definitions, node)
if get_config_value('early_static_analysis'):
grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src, self.grammar_name)
try:
_ = grammar_class()
except GrammarError as error:
for sym, prs, err in error.errors:
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
return grammar_python_src
def on_definition(self, node: Node) -> Tuple[str, str]:
......
......@@ -46,6 +46,8 @@ from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, O
__all__ = ('Parser',
'UnknownParserError',
'GrammarErrorType',
'GrammarError',
'Grammar',
'EMPTY_NODE',
'PreprocessorToken',
......@@ -369,13 +371,13 @@ class Parser:
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
if grammar.moving_forward__:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text))
grammar.history__.append(record)
elif node:
nid = id(node) # type: int
if nid in grammar.tree__.error_nodes:
record = HistoryRecord(grammar.call_stack__, node or EMPTY_NODE, text,
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text),
grammar.tree__.error_nodes[nid])
grammar.history__.append(record)
......@@ -514,6 +516,24 @@ class UnknownParserError(KeyError):
is referred to that does not exist."""
GrammarErrorType = List[Tuple[str, Parser, Error]] # TODO: replace with a named tuple?
class GrammarError(Exception):
"""GrammarError will be raised if static analysis reveals errors
in the grammar.
"""
def __init__(self, static_analysis_result: List[GrammarErrorType]):
assert static_analysis_result # must not be empty
self.errors = static_analysis_result
def __str__(self):
if len(self.errors) == 1:
return str(self.errors[0][2])
return '\n' + '\n'.join(("%i. " % (i + 1) + str(err_tuple[2]))
for i, err_tuple in enumerate(self.errors))
class Grammar:
r"""
Class Grammar directs the parsing process and stores global state
......@@ -609,6 +629,14 @@ class Grammar:
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
static_analysis_pending__: True as long as no static analysis (see the method
with the same name for more information) has been done to check
parser tree for correctness (e.g. no infinite loops). Static analysis
is done at instiantiation and the flag is then set to false, but it
can also be carried out once the class has been generated
(by DHParser.ebnf.EBNFCompiler) and then be set to false in the
definition of the grammar clase already.
python__src__: For the purpose of debugging and inspection, this field can
take the python src of the concrete grammar class
(see `dsl.grammar_provider`).
......@@ -710,7 +738,7 @@ class Grammar:
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
static_analysis_done__ = False
static_analysis_pending__ = True # type: bool
@classmethod
......@@ -771,12 +799,12 @@ class Grammar:
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
if not self.__class__.static_analysis_done__:
if self.__class__.static_analysis_pending__:
try:
result = self.static_analysis()
if result:
raise AssertionError(str(result))
self.__class__.static_analysis_done__ = True
raise GrammarError(result)
self.__class__.static_analysis_pending__ = False
except (NameError, AttributeError):
pass # don't fail the initialization of PLACEHOLDER
......@@ -875,6 +903,40 @@ class Grammar:
predecessors to the node."""
return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
def lookahead_failure_only(parser):
"""EXPERIMENTAL!
Checks if failure to match document was only due to a succeeding
lookahead parser, which is a common design pattern that can break test
cases. (Testing for this case allows to modify the error message, so
that the testing framework can know that the failure is only a
test-case-artifact and no real failure.
(See test/test_testing.TestLookahead !)
"""
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
# # TODO: Checking match status of history__[-2] is inaccurate if ending
# # lookahead parser is part of an Alternative-parser !!!
# # (Need a test-case!)
# return last_record and parser != self.root_parser__ \
# and last_record.status == HistoryRecord.MATCH \
# and last_record.node.pos \
# + len(last_record.node) >= len(self.document__) \
# and any(tn in self and isinstance(self[tn], Lookahead)
# or tn[0] == ':' and issubclass(eval(tn[1:]), Lookahead)
# for tn in last_record.call_stack)
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
# TODO: Checking match status of history__[-2] is inaccurate if ending
# lookahead parser is part of an Alternative-parser !!!
# (Need a test-case!)
return last_record and parser != self.root_parser__ \
and any(self.history__[i].status == HistoryRecord.MATCH \
and self.history__[i].node.pos \
+ len(self.history__[i].node) >= len(self.document__) \
and any(tn in self and isinstance(self[tn], Lookahead)
or tn[0] == ':' and issubclass(eval(tn[1:]), Lookahead)
for tn in self.history__[i].call_stack)
for i in range(-2, -len(self.history__)-1, -1))
# assert isinstance(document, str), type(document)
if self._dirty_flag__:
self._reset__()
......@@ -901,9 +963,16 @@ class Grammar:
result, _ = parser(rest)
if result is None:
result = Node(ZOMBIE_TAG, '').with_pos(0)
self.tree__.new_error(result,
'Parser "%s" did not match empty document.' % str(parser),
Error.PARSER_DID_NOT_MATCH)
if lookahead_failure_only(parser):
self.tree__.new_error(
result, 'Parser "%s" did not match empty document except for lookahead'
% str(parser),
Error.PARSER_LOOKAHEAD_MATCH_ONLY)
else:
self.tree__.new_error(
result, 'Parser "%s" did not match empty document.' % str(parser),
Error.PARSER_DID_NOT_MATCH)
while rest and len(stitches) < MAX_DROPOUTS:
result, rest = parser(rest)
if rest:
......@@ -916,15 +985,7 @@ class Grammar:
str(HistoryRecord.last_match(self.history__)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
if last_record and parser != self.root_parser__ \
and last_record.status == HistoryRecord.MATCH \
and last_record.node.pos \
+ len(last_record.node) >= len(self.document__) \
and any(tn in self and isinstance(self[tn], Lookahead)
or tn[0] == ':' and issubclass(eval(tn[1:]), Lookahead)
for tn in last_record.call_stack):
if lookahead_failure_only(parser):
error_msg = 'Parser did not match except for lookahead! ' + err_info
error_code = Error.PARSER_LOOKAHEAD_MATCH_ONLY
else:
......@@ -1021,8 +1082,10 @@ class Grammar:
return line_col(self.document_lbreaks__, self.document_length__ - len(text))
def static_analysis(self) -> List[Tuple[str, Parser, Error]]:
def static_analysis(self) -> List[GrammarErrorType]:
"""
EXPERIMENTAL (does not catch inifinite loops due to regular expressions...)
Checks the parser tree statically for possible errors. At the moment only
infinite loops will be detected.
:return: a list of error-tuples consisting of the narrowest containing
......@@ -1030,7 +1093,7 @@ class Grammar:
the actual parser that failed and an error object.
"""
containing_named_parser = '' # type: str
error_list = [] # type: List[Tuple[str, Parser, Error]]
error_list = [] # type: List[GrammarErrorType]
def visit_parser(parser: Parser) -> None:
nonlocal containing_named_parser, error_list
......@@ -1038,7 +1101,7 @@ class Grammar:
containing_named_parser = parser.pname
if isinstance(parser, ZeroOrMore) or isinstance(parser, OneOrMore):
inner_parser = cast(UnaryParser, parser).parser
tree = self('', inner_parser)
tree = self('', inner_parser, True)
if not tree.error_flag:
if not parser.pname:
msg = 'Parser "%s" in %s can become caught up in an infinite loop!' \
......@@ -2057,8 +2120,10 @@ class Lookahead(FlowParser):
"""
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, _ = self.parser(text)
if self.sign(node is not None):
return Node(self.tag_name, ''), text
if (self.sign(node is not None)
# static analysis requires lookahead to be disabled at document end
or (self.grammar.static_analysis_pending__ and not text)):
return Node(self.tag_name, '') if self.pname else EMPTY_NODE, text
else:
return None, text
......@@ -2220,7 +2285,7 @@ class Retrieve(Parser):
stack = self.grammar.variables__[self.symbol.pname]
value = self.filter(stack)
except (KeyError, IndexError):
node = Node(self.tag_name, '')
node = Node(self.tag_name, '').with_pos(self.grammar.document_length__ - len(text))
self.grammar.tree__.new_error(
node, dsl_error_msg(self, "'%s' undefined or exhausted." % self.symbol.pname))
return node, text
......
......@@ -43,7 +43,7 @@ text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | /(?=%)/~ }
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ }
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+
EOF = !/./
\ No newline at end of file
......@@ -57,7 +57,7 @@ class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "d9a1a1b431a3185dab127be165a37719"
source_hash__ = "ece0314c999ac86f22796331c05efd62"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'//'
......@@ -65,8 +65,8 @@ class BibTeXGrammar(Grammar):
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('(?i).'))
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(Lookahead(RegExp('(?i)%')), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(Lookahead(RegExp('(?i)%')), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD = Series(RegExp('(?i)\\w+'), wsp__)
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
......
......@@ -68,7 +68,7 @@ Match test "entry" for parser "entry" failed:
organization = {Wikipedia}
}
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | /(?i)(?=%)/ ~}+".
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | &/(?i)%/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
......
......@@ -31,8 +31,8 @@ from DHParser import compile_source
from DHParser.error import has_errors, Error
from DHParser.syntaxtree import WHITESPACE_PTYPE
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, \
get_ebnf_compiler, compile_ebnf
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider
get_ebnf_compiler, compile_ebnf, DHPARSER_IMPORTS
from DHParser.dsl import CompilationError, compileDSL, grammar_provider
from DHParser.testing import grammar_unit
......@@ -724,6 +724,9 @@ class TestAllOfResume:
assert len(st.errors_sorted) == 1
class TestStaticAnalysis:
pass
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -29,10 +29,10 @@ from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error, is_error
from DHParser.parse import Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError, MetaParser, EMPTY_NODE
UnknownParserError, MetaParser, GrammarError, EMPTY_NODE
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider, CompilationError
from DHParser.syntaxtree import Node
......@@ -115,7 +115,16 @@ class TestInfiLoopsAndRecursion:
def test_infinite_loops(self):
minilang = """forever = { // } \n"""
snippet = " "
parser = grammar_provider(minilang)()
try:
parser_class = grammar_provider(minilang)
except CompilationError as error:
assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
print(error)
save = get_config_value('early_static_analysis')
set_config_value('early_static_analysis', False)
parser_class = grammar_provider(minilang)
parser = parser_class()
set_config_value('early_static_analysis', save)
syntax_tree = parser(snippet)
assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
res = parser.static_analysis()
......@@ -837,6 +846,46 @@ class TestMetaParser:
assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name
class TestStaticAnalysis:
bibtex_grammar = """# bad bibtex-grammar
@ whitespace = /\s*/
@ ignorecase = True
@ comment = //
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ } # BOOM !!!
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+ # BOOM !!!
EOF = !/./
"""
def test_static_analysis(self):
gr_class = grammar_provider(self.bibtex_grammar, 'BibTex')
try:
gr_instance = gr_class()
except GrammarError as error:
affected_parsers = {e[0] for e in error.errors}
assert affected_parsers == {'CONTENT_STRING', 'COMMA_TERMINATED_STRING'}
assert all(e[2].code == Error.INFINITE_LOOP for e in error.errors)
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -30,6 +30,7 @@ from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, remove_expendables, remove_empty, \
replace_by_single_child, reduce_single_child, flatten
from DHParser.dsl import grammar_provider
from DHParser.error import Error
from DHParser.testing import get_report, grammar_unit, unit_from_file, \
reset_unit
from DHParser.log import logging
......@@ -261,8 +262,8 @@ class TestLookahead:
"category": {
"match": {
1: """Mountains: big:
K2""",
2: """Rivers:""" # allowed because lookahaead failure occurs at end of file and is mandatory!
K2""", # case 1: matches only with lookahead (but should not fail in a test)
2: """Rivers:""" # case 2: lookahaead failure occurs at end of file and is mandatory. (should not fail as a test)
},
"fail": {
6: """Mountains: big:"""
......@@ -310,6 +311,13 @@ class TestLookahead:
assert not cst.error_flag
def test_unit_lookahead(self):
gr = self.grammar_fac()
# Case 1: Lookahead string is part of the test case; parser fails but for the lookahead
result = gr(self.cases['category']['match'][1], 'category', True)
assert any(e.code == Error.PARSER_LOOKAHEAD_MATCH_ONLY for e in result.errors)
# Case 2: Lookahead string is not part of the test case; parser matches but for the mandatory continuation
result = gr(self.cases['category']['match'][2], 'category', True)
assert any(e.code == Error.MANDATORY_CONTINUATION_AT_EOF for e in result.errors)