Commit 7d38e94c authored by eckhart's avatar eckhart

- removal of infinite loop detection first step

parent 3a24111e
DHParser Version 0.8.6 (date ?)
...............................
- static analysis of parser tree
(detects infinite loops except for those caused by regular expressions)
- default configuration now centralized in DHParser/configuration.py
DHParser Version 0.8.5 (10.2.2019)
..................................
......
......@@ -49,20 +49,20 @@ CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
# reflects the parser structure may be helpful.
CONFIG_PRESET['flatten_tree_while_parsing'] = True
# Carries out static analysis on the the parser tree before parsing starts
# to ensure its correctness. Possible values are:
# 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# already. Any errors it revealed will be located in the EBNF
# source code. This naturally only works for parser that are
# generated from an EBNF syntax declaration.
# 'late' - static analysis is carried out when instantiating a Grammar
# (sub-)class. This works also for parser trees that are
# handwritten in Python using the parser classes from module
# `parse`. It slightly slows down instantiation of Grammar
# clasees, though.
# 'none' - no static analysis at all (not recommended).
# Default value: "early"
CONFIG_PRESET['static_analysis'] = "early"
# # Carries out static analysis on the the parser tree before parsing starts
# # to ensure its correctness. Possible values are:
# # 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# # already. Any errors it revealed will be located in the EBNF
# # source code. This naturally only works for parser that are
# # generated from an EBNF syntax declaration.
# # 'late' - static analysis is carried out when instantiating a Grammar
# # (sub-)class. This works also for parser trees that are
# # handwritten in Python using the parser classes from module
# # `parse`. It slightly slows down instantiation of Grammar
# # clasees, though.
# # 'none' - no static analysis at all (not recommended).
# # Default value: "early"
# CONFIG_PRESET['static_analysis'] = "early"
# Defines the output format for the serialization of syntax trees.
# Possible values are:
......
......@@ -915,20 +915,20 @@ class EBNFCompiler(Compiler):
self.definitions.update(definitions)
grammar_python_src = self.assemble_parser(definitions, node)
if get_config_value('static_analysis') == 'early':
try:
grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
self.grammar_name)
_ = grammar_class()
grammar_python_src = grammar_python_src.replace(
'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
except NameError:
pass # undefined name in the grammar are already cuaght and reported
except GrammarError as error:
for sym, prs, err in error.errors:
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
# if get_config_value('static_analysis') == 'early':
# try:
# grammar_class = compile_python_object(DHPARSER_IMPORTS + grammar_python_src,
# self.grammar_name)
# _ = grammar_class()
# grammar_python_src = grammar_python_src.replace(
# 'static_analysis_pending__ = [True]', 'static_analysis_pending__ = []', 1)
# except NameError:
# pass # undefined name in the grammar are already caught and reported
# except GrammarError as error:
# for sym, prs, err in error.errors:
# symdef_node = self.rules[sym][0]
# err.pos = self.rules[sym][0].pos
# self.tree.add_error(symdef_node, err)
return grammar_python_src
......
......@@ -88,7 +88,7 @@ class Error:
MALFORMED_ERROR_STRING = ErrorCode(1060)
AMBIGUOUS_ERROR_HANDLING = ErrorCode(1070)
REDEFINED_DIRECTIVE = ErrorCode(1080)
INFINITE_LOOP = ErrorCode(1090)
# INFINITE_LOOP = ErrorCode(1090)
def __init__(self, message: str, pos, code: ErrorCode = ERROR,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
......
......@@ -629,13 +629,13 @@ class Grammar:
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
static_analysis_pending__: True as long as no static analysis (see the method
with the same name for more information) has been done to check
parser tree for correctness (e.g. no infinite loops). Static analysis
is done at instiantiation and the flag is then set to false, but it
can also be carried out once the class has been generated
(by DHParser.ebnf.EBNFCompiler) and then be set to false in the
definition of the grammar clase already.
# static_analysis_pending__: True as long as no static analysis (see the method
# with the same name for more information) has been done to check
# parser tree for correctness (e.g. no infinite loops). Static analysis
# is done at instiantiation and the flag is then set to false, but it
# can also be carried out once the class has been generated
# (by DHParser.ebnf.EBNFCompiler) and then be set to false in the
# definition of the grammar clase already.
python__src__: For the purpose of debugging and inspection, this field can
take the python src of the concrete grammar class
......@@ -738,7 +738,7 @@ class Grammar:
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
static_analysis_pending__ = [True] # type: List[bool]
# static_analysis_pending__ = [True] # type: List[bool]
@classmethod
......@@ -799,15 +799,15 @@ class Grammar:
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
if self.__class__.static_analysis_pending__ \
and get_config_value('static_analysis') in {'early', 'late'}:
try:
result = self.static_analysis()
if result:
raise GrammarError(result)
self.__class__.static_analysis_pending__.pop()
except (NameError, AttributeError):
pass # don't fail the initialization of PLACEHOLDER
# if self.__class__.static_analysis_pending__ \
# and get_config_value('static_analysis') in {'early', 'late'}:
# try:
# result = self.static_analysis()
# if result:
# raise GrammarError(result)
# self.__class__.static_analysis_pending__.pop()
# except (NameError, AttributeError):
# pass # don't fail the initialization of PLACEHOLDER
def __getitem__(self, key):
......@@ -1069,37 +1069,37 @@ class Grammar:
return line_col(self.document_lbreaks__, self.document_length__ - len(text))
def static_analysis(self) -> List[GrammarErrorType]:
"""
EXPERIMENTAL (does not catch inifinite loops due to regular expressions...)
Checks the parser tree statically for possible errors. At the moment only
infinite loops will be detected.
:return: a list of error-tuples consisting of the narrowest containing
named parser (i.e. the symbol on which the failure occurred),
the actual parser that failed and an error object.
"""
containing_named_parser = '' # type: str
error_list = [] # type: List[GrammarErrorType]
def visit_parser(parser: Parser) -> None:
nonlocal containing_named_parser, error_list
if parser.pname:
containing_named_parser = parser.pname
if isinstance(parser, ZeroOrMore) or isinstance(parser, OneOrMore):
inner_parser = cast(UnaryParser, parser).parser
tree = self('', inner_parser, True)
if not tree.error_flag:
if not parser.pname:
msg = 'Parser "%s" in %s can become caught up in an infinite loop!' \
% (str(parser), containing_named_parser)
else:
msg = 'Parser "%s" can become caught up in an infinite loop!' % str(parser)
error_list.append((containing_named_parser, parser,
Error(msg, -1, Error.INFINITE_LOOP)))
self.root_parser__.apply(visit_parser)
return error_list
# def static_analysis(self) -> List[GrammarErrorType]:
# """
# EXPERIMENTAL (does not catch infinite loops due to regular expressions...)
#
# Checks the parser tree statically for possible errors. At the moment only
# infinite loops will be detected.
# :return: a list of error-tuples consisting of the narrowest containing
# named parser (i.e. the symbol on which the failure occurred),
# the actual parser that failed and an error object.
# """
# containing_named_parser = '' # type: str
# error_list = [] # type: List[GrammarErrorType]
#
# def visit_parser(parser: Parser) -> None:
# nonlocal containing_named_parser, error_list
# if parser.pname:
# containing_named_parser = parser.pname
# if isinstance(parser, ZeroOrMore) or isinstance(parser, OneOrMore):
# inner_parser = cast(UnaryParser, parser).parser
# tree = self('', inner_parser, True)
# if not tree.error_flag:
# if not parser.pname:
# msg = 'Parser "%s" in %s can become caught up in an infinite loop!' \
# % (str(parser), containing_named_parser)
# else:
# msg = 'Parser "%s" can become caught up in an infinite loop!' % str(parser)
# error_list.append((containing_named_parser, parser,
# Error(msg, -1, Error.INFINITE_LOOP)))
#
# self.root_parser__.apply(visit_parser)
# return error_list
def dsl_error_msg(parser: Parser, error_str: str) -> str:
......@@ -1409,20 +1409,20 @@ class MetaParser(Parser):
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
return Node(self.tag_name, results) # unoptimized code
def add_infinite_loop_error(self, node):
"""
Add an "infitnite loop detected" error to the given node, unless an infinite
loop detection error has already been notified at the same location. (As a
consequence, only the innermost parser where an infinite loop is detected
will be set the error message, which is prefereble to a stack of error
messages from failed as well as its calling parsers.)
"""
if (not node.pos in
(err.pos for err in
filter(lambda e: e.code == Error.INFINITE_LOOP, self.grammar.tree__.errors))):
self.grammar.tree__.add_error(
node, Error(dsl_error_msg(self, 'Infinite Loop encountered.'),
node.pos, Error.INFINITE_LOOP))
# def add_infinite_loop_error(self, node):
# """
# Add an "infitnite loop detected" error to the given node, unless an infinite
# loop detection error has already been notified at the same location. (As a
# consequence, only the innermost parser where an infinite loop is detected
# will be set the error message, which is prefereble to a stack of error
# messages from failed as well as its calling parsers.)
# """
# if (not node.pos in
# (err.pos for err in
# filter(lambda e: e.code == Error.INFINITE_LOOP, self.grammar.tree__.errors))):
# self.grammar.tree__.add_error(
# node, Error(dsl_error_msg(self, 'Infinite Loop encountered.'),
# node.pos, Error.INFINITE_LOOP))
class UnaryParser(MetaParser):
......@@ -1560,7 +1560,8 @@ class ZeroOrMore(Option):
if not node:
break
if len(text) == n:
self.add_infinite_loop_error(node)
break # avoid infinite loop
# self.add_infinite_loop_error(node)
results += (node,)
nd = self._return_values(results) # type: Node
return nd, text
......@@ -1606,7 +1607,8 @@ class OneOrMore(UnaryParser):
if not node:
break
if len(text_) == n:
self.add_infinite_loop_error(node)
break # avoid infinite loop
# self.add_infinite_loop_error(node)
results += (node,)
if results == ():
return None, text
......@@ -2109,9 +2111,9 @@ class Lookahead(FlowParser):
"""
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, _ = self.parser(text)
if (self.sign(node is not None)
if self.sign(node is not None):
# static analysis requires lookahead to be disabled at document end
or (self.grammar.static_analysis_pending__ and not text)):
# or (self.grammar.static_analysis_pending__ and not text)):
return Node(self.tag_name, '') if self.pname else EMPTY_NODE, text
else:
return None, text
......
......@@ -33,8 +33,6 @@ declarations to fully exploit the potential of the Cython-compiler.
"""
import collections
from DHParser.toolkit import typing
from typing import Optional, Union, Iterable, Tuple
try:
......
......@@ -7,7 +7,6 @@
cdef class Node:
cdef public int _pos
cdef public object _result
cdef str _content
cdef public tuple children
cdef public int _len
cdef public str tag_name
......@@ -15,6 +14,7 @@ cdef class Node:
cpdef get(self, index_or_tagname, surrogate)
cpdef is_anonymous(self)
cpdef _content(self)
cpdef with_pos(self, pos)
cpdef attr_active(self)
# cpdef compare_attr(self, other)
......
......@@ -417,7 +417,7 @@ def contains_only_whitespace(context: List[Node]) -> bool:
expression /\s*/, including empty nodes. Note, that this is not true
for anonymous whitespace nodes that contain comments."""
content = context[-1].content
return bool(not content or RX_WHITESPACE.match(context[-1].content))
return bool(not content or RX_WHITESPACE.match(content))
def is_any_kind_of_whitespace(context: List[Node]) -> bool:
......
......@@ -60,7 +60,7 @@ class ArithmeticGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "588e988cfef8ace70244463ad9c64fc7"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*'
......
......@@ -60,7 +60,7 @@ class ArithmeticGrammar(Grammar):
expression = Forward()
variable = Forward()
source_hash__ = "43a6a760b591f9409b06f3c18a3b5ea5"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
......@@ -57,7 +57,8 @@ class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "ece0314c999ac86f22796331c05efd62"
source_hash__ = "5a291c267f7f53949384137254282b62"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'//'
......
......@@ -68,8 +68,6 @@ Match test "entry" for parser "entry" failed:
organization = {Wikipedia}
}
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ | &/(?i)%/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
7:1: Error (1020): Parser did not match!
......
......@@ -60,7 +60,7 @@ class EBNFGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "7a7c3764b7b37241534fbb65b44b219d"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
......
......@@ -58,7 +58,7 @@ class LaTeXGrammar(Grammar):
tabular_config = Forward()
text_element = Forward()
source_hash__ = "30f9fd1ad9257035ba83975dd2f46856"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'%.*'
......
......@@ -59,7 +59,7 @@ class Lyrik_explicit_whitespaceGrammar(Grammar):
r"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__ = "2a7f0e987e796860b804a7e162df7e7b"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
......@@ -68,7 +68,7 @@ class XMLGrammar(Grammar):
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "05c24553a9c13db86136495bd1b5fed8"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'//'
......
......@@ -61,7 +61,7 @@ class XMLSnippetGrammar(Grammar):
Name = Forward()
element = Forward()
source_hash__ = "d3c46a530b258f47d6ae47ccf8297702"
static_analysis_pending__ = False
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'//'
......
......@@ -216,18 +216,18 @@ class TestCompilerErrors:
def test_undefined_symbols(self):
"""Use of undefined symbols should be reported.
"""
save = get_config_value('static_analysis')
set_config_value('static_analysis', 'early')
# save = get_config_value('static_analysis')
# set_config_value('static_analysis', 'early')
ebnf = """syntax = { intermediary }
intermediary = "This symbol is " [ badly_spelled ] "!"
bedly_spilled = "wrong" """
result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
# print(messages)
print(messages)
assert messages
set_config_value('static_analysis', save)
# set_config_value('static_analysis', save)
def test_no_error(self):
"""But reserved symbols should not be repoted as undefined.
......@@ -730,18 +730,18 @@ class TestAllOfResume:
assert len(st.errors_sorted) == 1
class TestStaticAnalysis:
def test_static_analysis(self):
save = get_config_value('static_analysis')
set_config_value('static_analysis', 'early')
minilang = """forever = { // } \n"""
try:
parser_class = grammar_provider(minilang)
except CompilationError as error:
assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
set_config_value('static_analysis', save)
# class TestStaticAnalysis:
# def test_static_analysis(self):
# save = get_config_value('static_analysis')
# set_config_value('static_analysis', 'early')
#
# minilang = """forever = { // } \n"""
# try:
# parser_class = grammar_provider(minilang)
# except CompilationError as error:
# assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
#
# set_config_value('static_analysis', save)
if __name__ == "__main__":
......
......@@ -118,31 +118,31 @@ class TestInfiLoopsAndRecursion:
log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
log_parsing_history(parser, "test_LeftRecursion_indirect")
def test_infinite_loops(self):
minilang = """forever = { // } \n"""
try:
parser_class = grammar_provider(minilang)
except CompilationError as error:
assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
save = get_config_value('static_analysis')
set_config_value('static_analysis', 'late')
provider = grammar_provider(minilang)
try:
parser = provider()
except GrammarError as error:
assert error.errors[0][2].code == Error.INFINITE_LOOP
set_config_value('static_analysis', 'none')
parser = provider()
snippet = " "
syntax_tree = parser(snippet)
assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
res = parser.static_analysis()
assert res and res[0][2].code == Error.INFINITE_LOOP
minilang = """not_forever = { / / } \n"""
parser = grammar_provider(minilang)()
res = parser.static_analysis()
assert not res
set_config_value('static_analysis', save)
# def test_infinite_loops(self):
# minilang = """forever = { // } \n"""
# try:
# parser_class = grammar_provider(minilang)
# except CompilationError as error:
# assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
# save = get_config_value('static_analysis')
# set_config_value('static_analysis', 'late')
# provider = grammar_provider(minilang)
# try:
# parser = provider()
# except GrammarError as error:
# assert error.errors[0][2].code == Error.INFINITE_LOOP
# set_config_value('static_analysis', 'none')
# parser = provider()
# snippet = " "
# syntax_tree = parser(snippet)
# assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
# res = parser.static_analysis()
# assert res and res[0][2].code == Error.INFINITE_LOOP
# minilang = """not_forever = { / / } \n"""
# parser = grammar_provider(minilang)()
# res = parser.static_analysis()
# assert not res
# set_config_value('static_analysis', save)
class TestFlowControl:
def setup(self):
......@@ -855,47 +855,47 @@ class TestMetaParser:
assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name
class TestStaticAnalysis:
bibtex_grammar = """# bad bibtex-grammar
@ whitespace = /\s*/
@ ignorecase = True
@ comment = //
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*\n/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ } # BOOM !!!
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+ # BOOM !!!
EOF = !/./
"""
def test_static_analysis(self):
save = get_config_value('static_analysis')
set_config_value('static_analysis', 'late')
gr_class = grammar_provider(self.bibtex_grammar, 'BibTex')
try:
gr_instance = gr_class()
except GrammarError as error:
affected_parsers = {e[0] for e in error.errors}
assert affected_parsers == {'CONTENT_STRING', 'COMMA_TERMINATED_STRING'}
assert all(e[2].code == Error.INFINITE_LOOP for e in error.errors)
set_config_value('static_analysis', save)
# class TestStaticAnalysis:
# bibtex_grammar = """# bad bibtex-grammar
# @ whitespace = /\s*/
# @ ignorecase = True
# @ comment = //
#
# bibliography = { preamble | comment | entry }
#
# preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
# pre_code = { /[^"%]+/ | /%.*\n/ }
#
# comment = "@Comment{" text §"}"
#
# entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
# type = WORD
# key = NO_BLANK_STRING
# field = WORD
# content = "{" text "}" | plain_content
#
# plain_content = COMMA_TERMINATED_STRING
# text = { CONTENT_STRING | "{" text "}" }
#
# WORD = /\w+/~
# NO_BLANK_STRING = /[^ \t\n,%]+/~
# COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ } # BOOM !!!
# CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+ # BOOM !!!
#
# EOF = !/./
# """
#
# def test_static_analysis(self):
# save = get_config_value('static_analysis')
# set_config_value('static_analysis', 'late')
# gr_class = grammar_provider(self.bibtex_grammar, 'BibTex')
# try:
# gr_instance = gr_class()
# except GrammarError as error:
# affected_parsers = {e[0] for e in error.errors}
# assert affected_parsers == {'CONTENT_STRING', 'COMMA_TERMINATED_STRING'}
# assert all(e[2].code == Error.INFINITE_LOOP for e in error.errors)
# set_config_value('static_analysis', save)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment