Commit 92513cc4 authored by di68kap's avatar di68kap
Browse files

- allow lookahead in unit tests

parent d1adc0ce
......@@ -80,8 +80,9 @@ class Error:
MANDATORY_CONTINUATION = ErrorCode(1001)
PARSER_DID_NOT_MATCH = ErrorCode(1002)
PARSER_STOPPED_BEFORE_END = ErrorCode(1003)
CAPTURE_STACK_NOT_EMPTY = ErrorCode(1004)
PARSER_LOOKAHEAD_MATCH_ONLY = ErrorCode(1003)
PARSER_STOPPED_BEFORE_END = ErrorCode(1004)
CAPTURE_STACK_NOT_EMPTY = ErrorCode(1005)
def __init__(self, message: str, pos, code: ErrorCode = ERROR,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
......
......@@ -650,17 +650,22 @@ class Grammar:
parser.grammar = self
def __call__(self, document: str, start_parser="root__") -> Node:
def __call__(self, document: str, start_parser="root__", track_history=False) -> Node:
"""
Parses a document with with parser-combinators.
Args:
document (str): The source text to be parsed.
start_parser (str): The name of the parser with which to
start. This is useful for testing particular parsers
start_parser (str or Parser): The name of the parser with which
to start. This is useful for testing particular parsers
(i.e. particular parts of the EBNF-Grammar.)
track_history (bool): If true, the parsing history will be
recorded in self.history__. If logging is turned on (i.e.
DHParser.log.is_logging() returns true), the parsing history
will always be recorded, even if `False` is passed to
the `track_history` parameter.
Returns:
Node: The root node ot the parse tree.
Node: The root node to the parse tree.
"""
def tail_pos(predecessors: Union[List[Node], Tuple[Node, ...]]) -> int:
......@@ -677,7 +682,10 @@ class Grammar:
parser.reset()
else:
self._dirty_flag__ = True
self.history_tracking__ = is_logging()
self.history_tracking__ = track_history or is_logging()
# safe tracking state, because history_tracking__ might be set to false, later,
# but original tracking state is needed for additional error information.
track_history = self.history_tracking__
self.document__ = StringView(document)
self.document_length__ = len(self.document__)
self.document_lbreaks__ = linebreaks(document) if self.history_tracking__ else []
......@@ -701,11 +709,22 @@ class Grammar:
fwd = rest.find("\n") + 1 or len(rest)
skip, rest = rest[:fwd], rest[fwd:]
if result is None:
error_msg = 'Parser did not match! Invalid source file?' \
'\n Most advanced: %s\n Last match: %s;' % \
(str(HistoryRecord.most_advanced_match(self.history__)),
str(HistoryRecord.last_match(self.history__)))
error_code = Error.PARSER_DID_NOT_MATCH
err_info = '' if not track_history else \
'\n Most advanced: %s\n Last match: %s;' % \
(str(HistoryRecord.most_advanced_match(self.history__)),
str(HistoryRecord.last_match(self.history__)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record = self.history__[-2] if len(self.history__) > 1 else []
if last_record and parser != self.root__ \
and last_record.status == HistoryRecord.MATCH \
and any(isinstance(parser, Lookahead)
for parser in last_record.call_stack):
error_msg = 'Parser did not match except for lookahead! ' + err_info
error_code = Error.PARSER_LOOKAHEAD_MATCH_ONLY
else:
error_msg = 'Parser did not match!' + err_info
error_code = Error.PARSER_DID_NOT_MATCH
else:
stitches.append(result)
error_msg = "Parser stopped before end" + \
......@@ -1150,7 +1169,7 @@ class OneOrMore(UnaryOperator):
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> str(Grammar(sentence)('.')) # an empty sentence also matches
' <<< Error on "." | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
' <<< Error on "." | Parser did not match! >>> '
EBNF-Notation: ``{ ... }+``
......@@ -1201,7 +1220,7 @@ class Series(NaryOperator):
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
' <<< Error on "1_variable" | Parser did not match! >>> '
EBNF-Notation: ``... ...`` (sequence of parsers separated by a blank or new line)
......
......@@ -36,7 +36,7 @@ import json
import os
import sys
from DHParser.error import is_error, adjust_error_locations
from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
from DHParser.parse import UnknownParserError
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
......@@ -308,6 +308,17 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser = parser_factory()
transform = transformer_factory()
def lookahead_artifact(raw_errors):
"""
Returns True, if the error merely occured, because the parser
stopped in front of a seuqence that was captured by a lookahead
operator. This is required for testing of parsers that put a
lookahead operator at the end. See test_testing.TestLookahead.
"""
return len(raw_errors) == 2 \
and raw_errors[-1].code == Error.PARSER_LOOKAHEAD_MATCH_ONLY \
and raw_errors[-2].code == Error.PARSER_STOPPED_BEFORE_END
for parser_name, tests in test_unit.items():
assert parser_name, "Missing parser name in test %s!" % unit_name
assert not any (test_type in RESULT_STAGES for test_type in tests), \
......@@ -337,7 +348,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr = ' match-test "' + test_name + '" ... '
errflag = len(errata)
try:
cst = parser(test_code, parser_name)
cst = parser(test_code, parser_name, track_history=True)
except UnknownParserError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE_PARSER, "").init_pos(0), str(upe))
......@@ -349,8 +360,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
# log_ST(ast, "match_%s_%s.ast" % (parser_name, clean_test_name))
if is_error(cst.error_flag):
errors = adjust_error_locations(cst.collect_errors(), test_code)
raw_errors = cst.collect_errors()
if is_error(cst.error_flag) and not lookahead_artifact(raw_errors):
errors = adjust_error_locations(raw_errors, test_code)
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
......@@ -385,13 +397,13 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errflag = len(errata)
# cst = parser(test_code, parser_name)
try:
cst = parser(test_code, parser_name)
cst = parser(test_code, parser_name, track_history=True)
except UnknownParserError as upe:
node = Node(ZOMBIE_PARSER, "").init_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(parser_name, test_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
if not is_error(cst.error_flag):
if not is_error(cst.error_flag) and not lookahead_artifact(cst.collect_errors()):
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
......
......@@ -27,11 +27,13 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, remove_expendables, \
from DHParser.transform import traverse, remove_expendables, remove_empty, \
replace_by_single_child, reduce_single_child, flatten
from DHParser.dsl import grammar_provider
from DHParser.testing import get_report, grammar_unit, unit_from_file, \
reset_unit
from DHParser.log import logging
CFG_FILE_1 = '''
# a comment
......@@ -143,6 +145,19 @@ ARITHMETIC_EBNF_transformation_table = {
ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)
def clean_report():
if os.path.exists('REPORT'):
files = os.listdir('REPORT')
flag = False
for file in files:
if re.match(r'unit_test_\d+\.md', file):
os.remove(os.path.join('REPORT', file))
else:
flag = True
if not flag:
os.rmdir('REPORT')
class TestGrammarTest:
cases = {
"factor": {
......@@ -198,16 +213,7 @@ class TestGrammarTest:
}
def teardown(self):
if os.path.exists('REPORT'):
files = os.listdir('REPORT')
flag = False
for file in files:
if re.match(r'unit_test_\d+\.md', file):
os.remove(os.path.join('REPORT', file))
else:
flag = True
if not flag:
os.rmdir('REPORT')
clean_report()
def test_testing_grammar(self):
parser_fac = grammar_provider(ARITHMETIC_EBNF)
......@@ -222,7 +228,6 @@ class TestGrammarTest:
# print(e)
assert len(errata) == 3, str(errata)
# def test_get_report(self):
# parser_fac = grammar_provider(ARITHMETIC_EBNF)
# trans_fac = lambda : ARITHMETIC_EBNFTransform
......@@ -242,6 +247,75 @@ class TestGrammarTest:
assert errata
class TestLookahead:
"""
Testing of Expressions with trailing Lookahead-Parser.
"""
EBNF = r"""
document = { category | entry } { LF }
category = {LF } sequence_of_letters { /:/ sequence_of_letters } /:/ &(LF sequence_of_letters)
entry = { LF } sequence_of_letters !/:/
sequence_of_letters = /[A-Za-z0-9 ]+/
LF = / *\n/
"""
cases = {
"category": {
"match": {
1: """Mountains: big:
K2"""
},
"fail": {
6: """Mountains: big:"""
}
}
}
fail_cases = {
"category": {
"match": {
1: """Mountains: b""", # stop sign ":" is missing
2: """Rivers:
# not allowed"""
},
"fail": {
1: """Mountains: big:
K2"""
}
}
}
def setup(self):
self.grammar_fac = grammar_provider(TestLookahead.EBNF)
self.trans_fac = lambda : partial(traverse, processing_table={"*": [flatten, remove_empty]})
def teardown(self):
clean_report()
def test_selftest(self):
doc = """
Mountains: big:
Mount Everest
K2
Mountains: medium:
Denali
Alpomayo
Rivers:
Nile
"""
grammar = self.grammar_fac()
cst = grammar(doc)
assert not cst.error_flag
# trans = self.trans_fac()
# trans(cst)
# print(cst.as_sxpr())
def test_unit_lookahead(self):
errata = grammar_unit(self.cases, self.grammar_fac, self.trans_fac)
assert not errata
errata = grammar_unit(self.fail_cases, self.grammar_fac, self.trans_fac)
assert errata
class TestSExpr:
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment