Commit 75d94187 authored by eckhart's avatar eckhart
Browse files

ebnf.py, parse.py: test and static analysis for bad capture arguments

parent c22cf2f9
......@@ -315,7 +315,7 @@ CONFIG_PRESET['log_syntax_trees'] = set()
# classes, though.
# 'none' - no static analysis at all.
# Default value: "none"
CONFIG_PRESET['static_analysis'] = "none"
CONFIG_PRESET['static_analysis'] = "early"
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
......
......@@ -1327,10 +1327,13 @@ class EBNFCompiler(Compiler):
grammar_python_src = self.assemble_parser(definitions)
if get_config_value('static_analysis') == 'early':
import DHParser.parse
try:
DHParser.parse.STATIC_ANALYSIS_PENDING = True
grammar_class = compile_python_object(
DHPARSER_IMPORTS.format(dhparser_parentdir=DHPARSER_PARENTDIR)
+ grammar_python_src, self.grammar_name)
DHPARSER_IMPORTS.format(dhparser_parentdir=DHPARSER_PARENTDIR) +
# '\nimport DHParser.parse\nDHParser.parse.STATIC_ANALYSIS_PENDING = True\n' +
grammar_python_src, self.grammar_name)
_ = grammar_class()
grammar_python_src = grammar_python_src.replace(
'static_analysis_pending__ = [True]',
......@@ -1342,6 +1345,8 @@ class EBNFCompiler(Compiler):
symdef_node = self.rules[sym][0]
err.pos = self.rules[sym][0].pos
self.tree.add_error(symdef_node, err)
finally:
DHParser.parse.STATIC_ANALYSIS_PENDING = False
return grammar_python_src
......
......@@ -94,6 +94,7 @@ class Error:
AMBIGUOUS_ERROR_HANDLING = ErrorCode(1070)
REDEFINED_DIRECTIVE = ErrorCode(1080)
UNDEFINED_RETRIEVE = ErrorCode(1090)
SYMBOL_UNFIT_TO_CAPTURE = ErrorCode(1100)
# fatal errors
......
......@@ -215,10 +215,15 @@ def reentry_point(rest: StringView,
########################################################################
ApplyFunc = Callable[['Parser'], None]
ApplyFunc = Callable[['Parser'], Optional[bool]] # A return value of True stops any further application
FlagFunc = Callable[[ApplyFunc, Set[ApplyFunc]], bool]
ParseFunc = Callable[[StringView], Tuple[Optional[Node], StringView]]
# The global flag STATIC_ANALYSIS_PENGING signals the constructors
# of Parser-classes not to raise exceptions for errors that will
# be reported more preciseley in the static analysis
STATIC_ANALYSIS_PENDING = False
class Parser:
"""
......@@ -565,11 +570,11 @@ class Parser:
"""
return tuple()
def _apply(self, func: Callable[['Parser'], None],
flip: Callable[[Callable, Set[Callable]], bool]) -> bool:
def _apply(self, func: ApplyFunc, flag_cycle: FlagFunc) -> bool:
"""
Applies function `func(parser)` recursively to this parser and all
descendant parsers, if any exist.
descendant parsers as long as `func()` returns `None` or `False`.
Otherwise stops the further application of `func` and returns `True`.
In order to break cycles, function `flip` is called, which should
return `True`, if this parser has already been visited. If not, it
......@@ -579,18 +584,33 @@ class Parser:
class Parser or any of its descendants. The entry point for external
calls is the method `apply()` without underscore!
"""
if flip(func, self.cycle_detection):
return False
else:
func(self)
for parser in self.sub_parsers():
parser._apply(func, flip)
return True
if not flag_cycle(func, self.cycle_detection):
if func(self):
return True
else:
for parser in self.sub_parsers():
if parser._apply(func, flag_cycle):
return True
return False
return False
def apply(self, func: Callable[['Parser'], None]):
def apply(self, func: ApplyFunc) -> bool:
"""
Applies function `func(parser)` recursively to this parser and all
descendant parsers, if any exist. Traversal is pre-order.
descendant parsers as long as `func()` returns `None` or `False`.
Traversal is pre-order. Stops the further application of `func` and
returns `True` once `func` has returned `True`.
If `func` has been applied to all descendant parsers without issuing
a stop signal by returning `True`, `False` is returned.
This use of the return value allows to use the `apply`-method both
to issue tests on all descendant parsers (including self) which may be
decided already after some parsers have been visited without any need
to visit further parsers. At the same time `apply` can be used to simply
`apply` a procedure to all descendant parsers (including self) without
worrying about forgetting the return value of procedure, because a
return value of `None` means "carry on".
"""
def positive_flip(f: Callable[['Parser'], None], flagged: Set[Callable]) -> bool:
"""Returns True, if function `f` has already been applied to this
......@@ -613,9 +633,14 @@ class Parser:
return False
if func in self.cycle_detection:
self._apply(func, negative_flip)
return self._apply(func, negative_flip)
else:
self._apply(func, positive_flip)
return self._apply(func, positive_flip)
def static_analysis(self) -> Optional[List['GrammarErrorType']]:
"""Analyses the parser for logical errors after the grammar has been
instantiated."""
return None
def copy_parser_base_attrs(src: Parser, duplicate: Parser):
......@@ -1364,11 +1389,13 @@ class Grammar:
"""
error_list = [] # type: List[GrammarErrorType]
# disabled, because no use case as of now
# def visit_parser(parser: Parser) -> None:
# nonlocal error_list
#
# self.root_parser__.apply(visit_parser)
def visit_parser(parser: Parser) -> None:
nonlocal error_list
errors = parser.static_analysis()
if errors is not None:
error_list.extend(errors)
self.root_parser__.apply(visit_parser)
return error_list
......@@ -2573,8 +2600,9 @@ class Capture(UnaryParser):
contained parser's name. This requires the contained parser to be named.
"""
def __init__(self, parser: Parser) -> None:
assert not parser.drop_content, \
"Cannot capture content of returned by parser, the content of which will be dropped!"
if not STATIC_ANALYSIS_PENDING and parser.apply(lambda p: p.drop_content):
raise ValueError('Captured parser "%s" contained parsers that drop content, '
'which can lead to unintended results!' % str(parser))
super(Capture, self).__init__(parser)
def _rollback(self):
......@@ -2599,6 +2627,14 @@ class Capture(UnaryParser):
def __repr__(self):
return self.parser.repr
def static_analysis(self) -> Optional[List[GrammarErrorType]]:
if self.parser.apply(lambda p: p.drop_content):
return [(self.pname, self, Error(
'Captured symbol "%s" contains parsers that drop content!' % self.pname,
0, Error.SYMBOL_UNFIT_TO_CAPTURE))]
return None
MatchVariableFunc = Callable[[Union[StringView, str], List[str]], Optional[str]]
# (text, stack) -> value, where:
......
......@@ -219,7 +219,7 @@ def main():
elif os.path.exists(sys.argv[1]) and os.path.isfile(sys.argv[1]):
_errors = compile_on_disk(sys.argv[1])
if _errors:
print('\n\n'.join(str(err) for err in _errors))
print('\n'.join(str(err) for err in _errors))
sys.exit(1)
else:
create_project(sys.argv[1])
......
......@@ -326,15 +326,16 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
def has_lookahead(parser_name: str) -> bool:
"""Returns True if the parser or any of its descendant parsers is a
Lookahead parser."""
lookahead_found = False
def find_lookahead(p: Parser):
nonlocal lookahead_found
if not lookahead_found:
lookahead_found = isinstance(p, Lookahead)
parser[parser_name].apply(find_lookahead)
return lookahead_found
return parser[parser_name].apply(lambda p: isinstance(p, Lookahead))
# lookahead_found = False
#
# def find_lookahead(p: Parser):
# nonlocal lookahead_found
# if not lookahead_found:
# lookahead_found = isinstance(p, Lookahead)
#
# parser[parser_name].apply(find_lookahead)
# return lookahead_found
def lookahead_artifact(syntax_tree: Node):
"""
......
......@@ -32,7 +32,8 @@ from DHParser.log import is_logging, log_ST, log_parsing_history
from DHParser.error import Error, is_error
from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
Interleave, UnknownParserError, MetaParser, Token, EMPTY_NODE
Interleave, UnknownParserError, MetaParser, Token, EMPTY_NODE, Capture, Drop, Whitespace, \
GrammarError
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
compile_ebnf, DHPARSER_IMPORTS
......@@ -644,6 +645,19 @@ class TestPopRetrieve:
def has_tag_name(node, name):
return node.tag_name == name # and not isinstance(node.parser, Retrieve)
def test_capture_assertions(self):
try:
_ = Capture(Drop(Whitespace(r'\s*')))
assert False, "ValueError expected!"
except ValueError:
pass
try:
_ = Capture(Series(Token(' '), Drop(Whitespace(r'\s*'))))
assert False, "ValueError expected!"
except ValueError:
pass
_ = Capture(RegExp(r'\w+'))
def test_compile_mini_language(self):
assert self.minilang_parser
assert self.minilang_parser2
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment