Commit a8bf0742 authored by eckhart's avatar eckhart

- testing-framework extended

parent 0e2c4d73
...@@ -41,7 +41,7 @@ from DHParser.transform import TransformationFunc, traverse, remove_brackets, \ ...@@ -41,7 +41,7 @@ from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \ reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content remove_tokens, flatten, forbid, assert_content
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any, cast
__all__ = ('get_ebnf_preprocessor', __all__ = ('get_ebnf_preprocessor',
...@@ -572,7 +572,7 @@ class EBNFCompiler(Compiler): ...@@ -572,7 +572,7 @@ class EBNFCompiler(Compiler):
# transformations = '[reduce_single_child]' # transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations) transtable.append(' "' + name + '": %s,' % transformations)
# transtable.append(' ":Token": reduce_single_child,') # transtable.append(' ":Token": reduce_single_child,')
# transtable += [' "*": replace_by_single_child', '}', ''] transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)]
return '\n'.join(transtable) return '\n'.join(transtable)
...@@ -1264,10 +1264,12 @@ def get_ebnf_compiler(grammar_name="", grammar_source="") -> EBNFCompiler: ...@@ -1264,10 +1264,12 @@ def get_ebnf_compiler(grammar_name="", grammar_source="") -> EBNFCompiler:
def compile_ebnf(ebnf_source: str, branding: str = 'DSL') \ def compile_ebnf(ebnf_source: str, branding: str = 'DSL') \
-> Tuple[Optional[Any], List[Error], Optional[Node]]: -> Tuple[Optional[Any], List[Error], Optional[Node]]:
"""Compiles an `ebnf_source` (file_name or EBNF-string) and returns """
Compiles an `ebnf_source` (file_name or EBNF-string) and returns
a tuple of the python code of the compiler, a list of warnings or errors a tuple of the python code of the compiler, a list of warnings or errors
and the abstract syntax tree of the EBNF-source. and the abstract syntax tree of the EBNF-source.
This function is merely syntactic sugar.""" This function is merely syntactic sugar.
"""
return compile_source(ebnf_source, return compile_source(ebnf_source,
get_ebnf_preprocessor(), get_ebnf_preprocessor(),
get_ebnf_grammar(), get_ebnf_grammar(),
......
...@@ -44,6 +44,7 @@ __all__ = ('WHITESPACE_PTYPE', ...@@ -44,6 +44,7 @@ __all__ = ('WHITESPACE_PTYPE',
'RootNode', 'RootNode',
'parse_sxpr', 'parse_sxpr',
'parse_xml', 'parse_xml',
'parse_tree',
'flatten_sxpr', 'flatten_sxpr',
'flatten_xml') 'flatten_xml')
...@@ -1081,6 +1082,19 @@ def parse_xml(xml: Union[str, StringView]) -> Node: ...@@ -1081,6 +1082,19 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
assert _.match(RX_WHITESPACE_TAIL) assert _.match(RX_WHITESPACE_TAIL)
return tree return tree
def parse_tree(xml_or_sxpr: str) -> Optional[Node]:
if re.match('\s*<', xml_or_sxpr):
return parse_xml(xml_or_sxpr)
elif re.match('\s*\(', xml_or_sxpr):
return parse_sxpr(xml_or_sxpr)
elif re.match('\s*', xml_or_sxpr):
return None
else:
m = re.match('\s*(.*)\n?', xml_or_sxpr)
snippet = m.group(1) if m else ''
raise ValueError('Snippet seems to be neither S-expression nor XML: ' + snippet + ' ...')
# if __name__ == "__main__": # if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))") # st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
# print(st.as_sxpr()) # print(st.as_sxpr())
......
...@@ -40,10 +40,10 @@ import sys ...@@ -40,10 +40,10 @@ import sys
from DHParser.error import Error, is_error, adjust_error_locations from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_parsing_history from DHParser.log import is_logging, clear_logs, log_parsing_history
from DHParser.parse import UnknownParserError, Parser, Lookahead from DHParser.parse import UnknownParserError, Parser, Lookahead
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_TAG from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG
from DHParser.toolkit import re, typing from DHParser.toolkit import load_if_file, re, typing
from typing import Tuple from typing import Dict, List, Union, cast
__all__ = ('unit_from_config', __all__ = ('unit_from_config',
'unit_from_json', 'unit_from_json',
...@@ -52,6 +52,9 @@ __all__ = ('unit_from_config', ...@@ -52,6 +52,9 @@ __all__ = ('unit_from_config',
'get_report', 'get_report',
'grammar_unit', 'grammar_unit',
'grammar_suite', 'grammar_suite',
'SymbolsDictType',
'extract_symbols',
'create_test_templates',
'reset_unit', 'reset_unit',
'runner') 'runner')
...@@ -130,8 +133,8 @@ def unit_from_config(config_str): ...@@ -130,8 +133,8 @@ def unit_from_config(config_str):
pos = eat_comments(cfg, section_match.span()[1]) pos = eat_comments(cfg, section_match.span()[1])
entry_match = RX_ENTRY.match(cfg, pos) entry_match = RX_ENTRY.match(cfg, pos)
if entry_match is None: # if entry_match is None:
raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol)) # SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
while entry_match: while entry_match:
testkey, testcode = [group for group in entry_match.groups() if group is not None] testkey, testcode = [group for group in entry_match.groups() if group is not None]
lines = testcode.split('\n') lines = testcode.split('\n')
...@@ -148,7 +151,7 @@ def unit_from_config(config_str): ...@@ -148,7 +151,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos) section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg): if pos != len(cfg) and not re.match('\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1)) raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
return unit return unit
...@@ -298,13 +301,14 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -298,13 +301,14 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
except AttributeError: except AttributeError:
return k return k
def get(tests, category, key): def get(tests, category, key) -> str:
try: try:
value = tests[category][key] if key in tests[category] \ value = tests[category][key] if key in tests[category] \
else tests[category][clean_key(key)] else tests[category][clean_key(key)]
except KeyError: except KeyError:
raise AssertionError('%s-test %s for parser %s missing !?' return ''
% (category, test_name, parser_name)) # raise AssertionError('%s-test %s for parser %s missing !?'
# % (category, test_name, parser_name))
return value return value
if isinstance(test_unit, str): if isinstance(test_unit, str):
...@@ -393,9 +397,6 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -393,9 +397,6 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# run match tests # run match tests
for test_name, test_code in tests.get('match', dict()).items(): for test_name, test_code in tests.get('match', dict()).items():
errflag = 0
if verbose:
infostr = ' match-test "' + test_name + '" ... '
errflag = len(errata) errflag = len(errata)
try: try:
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name)) cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
...@@ -420,31 +421,42 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -420,31 +421,42 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# write parsing-history log only in case of failure! # write parsing-history log only in case of failure!
if is_logging(): if is_logging():
log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name)) log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name))
elif "cst" in tests and parse_sxpr(get(tests, "cst", test_name)) != cst: if verbose:
infostr = ' match-test "' + test_name + '" ... '
write(infostr + ("OK" if len(errata) == errflag else "FAIL"))
if "cst" in tests and len(errata) == errflag:
compare = parse_tree(get(tests, "cst", test_name))
if compare:
if compare != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' % errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sxpr())) (test_name, parser_name, cst.as_sxpr()))
elif "ast" in tests: if verbose:
compare = parse_sxpr(get(tests, "ast", test_name)) infostr = ' cst-test "' + test_name + '" ... '
write(infostr + ("OK" if len(errata) == errflag else "FAIL"))
if "ast" in tests and len(errata) == errflag:
compare = parse_tree(get(tests, "ast", test_name))
if compare:
if compare != ast: if compare != ast:
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:' errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s' '\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')), % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
flatten_sxpr(compare.as_sxpr()), flatten_sxpr(compare.as_sxpr()),
flatten_sxpr(ast.as_sxpr()))) flatten_sxpr(ast.as_sxpr())))
if errata:
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose: if verbose:
infostr = ' ast-test "' + test_name + '" ... '
write(infostr + ("OK" if len(errata) == errflag else "FAIL")) write(infostr + ("OK" if len(errata) == errflag else "FAIL"))
if len(errata) > errflag:
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose and 'fail' in tests: if verbose and 'fail' in tests:
write(' Fail-Tests for parser "' + parser_name + '"') write(' Fail-Tests for parser "' + parser_name + '"')
# run fail tests # run fail tests
for test_name, test_code in tests.get('fail', dict()).items(): for test_name, test_code in tests.get('fail', dict()).items():
errflag = 0
if verbose:
infostr = ' fail-test "' + test_name + '" ... '
errflag = len(errata) errflag = len(errata)
# cst = parser(test_code, parser_name) # cst = parser(test_code, parser_name)
try: try:
...@@ -465,15 +477,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -465,15 +477,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests.setdefault('__msg__', {})[test_name] = \ tests.setdefault('__msg__', {})[test_name] = \
"\n".join(str(e) for e in cst.errors_sorted) "\n".join(str(e) for e in cst.errors_sorted)
if verbose: if verbose:
infostr = ' fail-test "' + test_name + '" ... '
write(infostr + ("OK" if len(errata) == errflag else "FAIL")) write(infostr + ("OK" if len(errata) == errflag else "FAIL"))
# write test-report # write test-report
if report: if report:
report_dir = "REPORT" report_dir = "REPORT"
test_report = get_report(test_unit)
if test_report:
if not os.path.exists(report_dir): if not os.path.exists(report_dir):
os.mkdir(report_dir) os.mkdir(report_dir)
with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f: with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
f.write(get_report(test_unit)) f.write(test_report)
print('\n'.join(output)) print('\n'.join(output))
return errata return errata
...@@ -543,6 +558,116 @@ def grammar_suite(directory, parser_factory, transformer_factory, ...@@ -543,6 +558,116 @@ def grammar_suite(directory, parser_factory, transformer_factory,
return '' return ''
########################################################################
#
# Support for unit-testing of ebnf-grammars
#
########################################################################
RX_DEFINITION_OR_SECTION = re.compile('(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
SymbolsDictType = Dict[str, List[str]]
def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
"""
Extracts all defined symbols from an EBNF-grammar. This can be used to
prepare grammar-tests. The symbols will be returned as lists of strings
which are grouped by the sections to which they belong and returned as
an ordered dictionary, they keys of which are the section names.
In order to define a section in the ebnf-source, add a comment-line
starting with "#:", followed by the section name. It is recommended
to use valid file names as section names. Example:
#: components
expression = term { EXPR_OP~ term}
term = factor { TERM_OP~ factor}
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
#: leaf_expressions
EXPR_OP = /\+/ | /-/
TERM_OP = /\*/ | /\//
SIGN = /-/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
If no sections have been defined in the comments, there will be only
one group with the empty string as a key.
:param ebnf_text_or_file: Either an ebnf-grammar or the file-name
of an ebnf-grammar
:return: Ordered dictionary mapping the section names of the grammar
to lists of symbols that appear under that section.
"""
def trim_section_name(name: str) -> str:
return re.sub('[^\w-]', '_', name.replace('#:', '').strip())
ebnf = load_if_file(ebnf_text_or_file)
deflist = RX_DEFINITION_OR_SECTION.findall(ebnf)
if not deflist:
raise AssertionError('No symbols found in: ' + ebnf_text_or_file[:40])
symbols = collections.OrderedDict() # type: SymbolsDictType
if deflist[0][:2] != '#:':
curr_section = ''
symbols[curr_section] = []
for df in deflist:
if df[:2] == '#:':
curr_section = trim_section_name(df)
if curr_section in symbols:
raise AssertionError('Section name must not be repeated: ' + curr_section)
symbols[curr_section] = []
else:
symbols[curr_section].append(df)
return symbols
def create_test_templates(symbols_or_ebnf: Union[str, SymbolsDictType],
path: str,
fmt: str = '.ini') -> None:
"""
Creates template files for grammar unit-tests for the given symbols .
Args:
symbols_or_ebnf: Either a dictionary that matches section names to
the grammar's symbols under that section or an EBNF-grammar
or file name of an EBNF-grammar from which the symbols shall
be extracted.
path: the path to the grammar-test directory (usually 'grammar_tests').
If the last element of the path does not exist, the directory
will be created.
fmt: the test-file-format. At the moment only '.ini' is supported
"""
assert fmt == '.ini'
if isinstance(symbols_or_ebnf, str):
symbols = extract_symbols(cast(str, symbols_or_ebnf)) # type: SymbolsDictType
else:
symbols = cast(Dict, symbols_or_ebnf)
if not os.path.exists(path):
os.mkdir(path)
if os.path.isdir(path):
save = os.getcwd()
os.chdir(path)
keys = reversed(list(symbols.keys()))
for i, k in enumerate(keys):
filename = '{num:0>2}_test_{section}'.format(num=i+1, section=k) + fmt
if os.path.exists(filename):
print('File "{name}" not created, because it already exists!')
else:
with open(filename, 'w', encoding='utf-8') as f:
for sym in symbols[k]:
f.write('\n[match:{sym}]\n\n'.format(sym=sym))
f.write('[ast:{sym}]\n\n'.format(sym=sym))
f.write('[fail:{sym}]\n\n'.format(sym=sym))
os.chdir(save)
else:
raise ValueError(path + ' is not a directory!')
####################################################################### #######################################################################
# #
# general unit testing support # general unit testing support
......
...@@ -138,6 +138,12 @@ except ModuleNotFoundError: ...@@ -138,6 +138,12 @@ except ModuleNotFoundError:
def recompile_grammar(grammar_src, force): def recompile_grammar(grammar_src, force):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
if not os.path.exists(grammar_tests_dir) \
or not any(os.path.isfile(os.path.join(grammar_tests_dir, entry))
for entry in os.listdir(grammar_tests_dir)):
print('No grammar-tests found, generating test templates.')
testing.create_test_templates(grammar_src, grammar_tests_dir)
with DHParser.log.logging(LOGGING): with DHParser.log.logging(LOGGING):
# recompiles Grammar only if it has changed # recompiles Grammar only if it has changed
if not dsl.recompile_grammar(grammar_src, force=force): if not dsl.recompile_grammar(grammar_src, force=force):
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
####################################################################### #######################################################################
# #
# Structure and Components #: Structure and Components
# #
####################################################################### #######################################################################
...@@ -25,7 +25,7 @@ group = "(" expression ")" ...@@ -25,7 +25,7 @@ group = "(" expression ")"
####################################################################### #######################################################################
# #
# "Leaf"-Expressions #: "Leaf"-Expressions
# #
####################################################################### #######################################################################
......
...@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar): ...@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file. r"""Parser for an Arithmetic source file.
""" """
expression = Forward() expression = Forward()
source_hash__ = "48fe89871e7ba344eb238c1d1a927167" source_hash__ = "50681341ebb2536b3eadd7eb5540ece0"
parser_initialization__ = ["upon instantiation"] parser_initialization__ = ["upon instantiation"]
resume_rules__ = {} resume_rules__ = {}
COMMENT__ = r'#.*' COMMENT__ = r'#.*'
......
[match:symbol]
M1: word
M2: one_word_with_underscores
[fail:symbol]
F1: two words
[match:document]
M1: """This is a sequence of words
extending over several lines"""
M2: """ This sequence contains leading whitespace"""
[fail:document]
F1: """This test should fail, because neither
comma nor full have been defined anywhere."""
...@@ -16,7 +16,7 @@ scriptpath = os.path.dirname(__file__) ...@@ -16,7 +16,7 @@ scriptpath = os.path.dirname(__file__)
try: try:
from DHParser import dsl from DHParser import dsl
import DHParser.log import DHParser.log
from DHParser import testing from DHParser import testing, create_test_templates
except ModuleNotFoundError: except ModuleNotFoundError:
print('Could not import DHParser. Please adjust sys.path in file ' print('Could not import DHParser. Please adjust sys.path in file '
'"%s" manually' % __file__) '"%s" manually' % __file__)
...@@ -24,6 +24,12 @@ except ModuleNotFoundError: ...@@ -24,6 +24,12 @@ except ModuleNotFoundError:
def recompile_grammar(grammar_src, force): def recompile_grammar(grammar_src, force):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
if not os.path.exists(grammar_tests_dir) \
or not any(os.path.isfile(os.path.join(grammar_tests_dir, entry))
for entry in os.listdir(grammar_tests_dir)):
print('No grammar-tests found, generating test templates.')
create_test_templates(grammar_src, grammar_tests_dir)
with DHParser.log.logging(LOGGING): with DHParser.log.logging(LOGGING):
# recompiles Grammar only if it has changed # recompiles Grammar only if it has changed
if not dsl.recompile_grammar(grammar_src, force=force): if not dsl.recompile_grammar(grammar_src, force=force):
...@@ -35,9 +41,10 @@ def recompile_grammar(grammar_src, force): ...@@ -35,9 +41,10 @@ def recompile_grammar(grammar_src, force):
def run_grammar_tests(glob_pattern): def run_grammar_tests(glob_pattern):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
with DHParser.log.logging(LOGGING): with DHParser.log.logging(LOGGING):
error_report = testing.grammar_suite( error_report = testing.grammar_suite(
os.path.join(scriptpath, 'grammar_tests'), grammar_tests_dir,
get_grammar, get_transformer, get_grammar, get_transformer,
fn_patterns=[glob_pattern], report=True, verbose=True) fn_patterns=[glob_pattern], report=True, verbose=True)
return error_report return error_report
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment