From a8bf074233fd746e76a3b910958f28bd0f9aa7d2 Mon Sep 17 00:00:00 2001 From: eckhart Date: Sun, 10 Feb 2019 08:53:52 +0100 Subject: [PATCH] - testing-framework extended --- DHParser/ebnf.py | 10 +- DHParser/syntaxtree.py | 14 ++ DHParser/testing.py | 193 +++++++++++++++--- dhparser.py | 6 + examples/Arithmetic/Arithmetic.ebnf | 4 +- examples/Arithmetic/ArithmeticCompiler.py | 2 +- .../grammar_tests/01_test_leaf_elements.ini | 7 - .../EBNF/grammar_tests/02_test_document.ini | 8 - examples/EBNF/tst_EBNF_grammar.py | 11 +- 9 files changed, 197 insertions(+), 58 deletions(-) delete mode 100644 examples/EBNF/grammar_tests/01_test_leaf_elements.ini delete mode 100644 examples/EBNF/grammar_tests/02_test_document.ini diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index f7e2390..12ebea1 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -41,7 +41,7 @@ from DHParser.transform import TransformationFunc, traverse, remove_brackets, \ reduce_single_child, replace_by_single_child, remove_expendables, \ remove_tokens, flatten, forbid, assert_content from DHParser.versionnumber import __version__ -from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any +from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any, cast __all__ = ('get_ebnf_preprocessor', @@ -572,7 +572,7 @@ class EBNFCompiler(Compiler): # transformations = '[reduce_single_child]' transtable.append(' "' + name + '": %s,' % transformations) # transtable.append(' ":Token": reduce_single_child,') - # transtable += [' "*": replace_by_single_child', '}', ''] + transtable += [' "*": replace_by_single_child', '}', ''] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)] return '\n'.join(transtable) @@ -1264,10 +1264,12 @@ def get_ebnf_compiler(grammar_name="", grammar_source="") -> EBNFCompiler: def compile_ebnf(ebnf_source: str, branding: str = 'DSL') \ -> Tuple[Optional[Any], List[Error], Optional[Node]]: - """Compiles an `ebnf_source` (file_name or EBNF-string) and returns + """ + Compiles an `ebnf_source` (file_name or EBNF-string) and returns a tuple of the python code of the compiler, a list of warnings or errors and the abstract syntax tree of the EBNF-source. - This function is merely syntactic sugar.""" + This function is merely syntactic sugar. + """ return compile_source(ebnf_source, get_ebnf_preprocessor(), get_ebnf_grammar(), diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index c8f7c2c..d8e00d2 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -44,6 +44,7 @@ __all__ = ('WHITESPACE_PTYPE', 'RootNode', 'parse_sxpr', 'parse_xml', + 'parse_tree', 'flatten_sxpr', 'flatten_xml') @@ -1081,6 +1082,19 @@ def parse_xml(xml: Union[str, StringView]) -> Node: assert _.match(RX_WHITESPACE_TAIL) return tree + +def parse_tree(xml_or_sxpr: str) -> Optional[Node]: + if re.match('\s*<', xml_or_sxpr): + return parse_xml(xml_or_sxpr) + elif re.match('\s*\(', xml_or_sxpr): + return parse_sxpr(xml_or_sxpr) + elif re.match('\s*', xml_or_sxpr): + return None + else: + m = re.match('\s*(.*)\n?', xml_or_sxpr) + snippet = m.group(1) if m else '' + raise ValueError('Snippet seems to be neither S-expression nor XML: ' + snippet + ' ...') + # if __name__ == "__main__": # st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))") # print(st.as_sxpr()) diff --git a/DHParser/testing.py b/DHParser/testing.py index 0432c69..f050321 100644 --- a/DHParser/testing.py +++ b/DHParser/testing.py @@ -40,10 +40,10 @@ import sys from DHParser.error import Error, is_error, adjust_error_locations from DHParser.log import is_logging, clear_logs, log_parsing_history from DHParser.parse import UnknownParserError, Parser, Lookahead -from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_TAG -from DHParser.toolkit import re, typing +from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG +from DHParser.toolkit import load_if_file, re, typing -from typing import Tuple +from typing import Dict, List, Union, cast __all__ = ('unit_from_config', 'unit_from_json', @@ -52,6 +52,9 @@ __all__ = ('unit_from_config', 'get_report', 'grammar_unit', 'grammar_suite', + 'SymbolsDictType', + 'extract_symbols', + 'create_test_templates', 'reset_unit', 'runner') @@ -130,8 +133,8 @@ def unit_from_config(config_str): pos = eat_comments(cfg, section_match.span()[1]) entry_match = RX_ENTRY.match(cfg, pos) - if entry_match is None: - raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol)) + # if entry_match is None: + # SyntaxError('No entries in section [%s:%s]' % (stage, symbol)) while entry_match: testkey, testcode = [group for group in entry_match.groups() if group is not None] lines = testcode.split('\n') @@ -148,7 +151,7 @@ def unit_from_config(config_str): section_match = RX_SECTION.match(cfg, pos) - if pos != len(cfg): + if pos != len(cfg) and not re.match('\s+$', cfg[pos:]): raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1)) return unit @@ -298,13 +301,14 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve except AttributeError: return k - def get(tests, category, key): + def get(tests, category, key) -> str: try: value = tests[category][key] if key in tests[category] \ else tests[category][clean_key(key)] except KeyError: - raise AssertionError('%s-test %s for parser %s missing !?' - % (category, test_name, parser_name)) + return '' + # raise AssertionError('%s-test %s for parser %s missing !?' + # % (category, test_name, parser_name)) return value if isinstance(test_unit, str): @@ -393,10 +397,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve # run match tests for test_name, test_code in tests.get('match', dict()).items(): - errflag = 0 - if verbose: - infostr = ' match-test "' + test_name + '" ... ' - errflag = len(errata) + errflag = len(errata) try: cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name)) except UnknownParserError as upe: @@ -420,32 +421,43 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve # write parsing-history log only in case of failure! if is_logging(): log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name)) - elif "cst" in tests and parse_sxpr(get(tests, "cst", test_name)) != cst: - errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' % - (test_name, parser_name, cst.as_sxpr())) - elif "ast" in tests: - compare = parse_sxpr(get(tests, "ast", test_name)) - if compare != ast: - errata.append('Abstract syntax tree test "%s" for parser "%s" failed:' - '\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s' - % (test_name, parser_name, '\n\t'.join(test_code.split('\n')), - flatten_sxpr(compare.as_sxpr()), - flatten_sxpr(ast.as_sxpr()))) - if errata: - tests.setdefault('__err__', {})[test_name] = errata[-1] if verbose: + infostr = ' match-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) + if "cst" in tests and len(errata) == errflag: + compare = parse_tree(get(tests, "cst", test_name)) + if compare: + if compare != cst: + errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' % + (test_name, parser_name, cst.as_sxpr())) + if verbose: + infostr = ' cst-test "' + test_name + '" ... ' + write(infostr + ("OK" if len(errata) == errflag else "FAIL")) + + if "ast" in tests and len(errata) == errflag: + compare = parse_tree(get(tests, "ast", test_name)) + if compare: + if compare != ast: + errata.append('Abstract syntax tree test "%s" for parser "%s" failed:' + '\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s' + % (test_name, parser_name, '\n\t'.join(test_code.split('\n')), + flatten_sxpr(compare.as_sxpr()), + flatten_sxpr(ast.as_sxpr()))) + if verbose: + infostr = ' ast-test "' + test_name + '" ... ' + write(infostr + ("OK" if len(errata) == errflag else "FAIL")) + + if len(errata) > errflag: + tests.setdefault('__err__', {})[test_name] = errata[-1] + if verbose and 'fail' in tests: write(' Fail-Tests for parser "' + parser_name + '"') # run fail tests for test_name, test_code in tests.get('fail', dict()).items(): - errflag = 0 - if verbose: - infostr = ' fail-test "' + test_name + '" ... ' - errflag = len(errata) + errflag = len(errata) # cst = parser(test_code, parser_name) try: cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name)) @@ -465,15 +477,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve tests.setdefault('__msg__', {})[test_name] = \ "\n".join(str(e) for e in cst.errors_sorted) if verbose: + infostr = ' fail-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) # write test-report if report: report_dir = "REPORT" - if not os.path.exists(report_dir): - os.mkdir(report_dir) - with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f: - f.write(get_report(test_unit)) + test_report = get_report(test_unit) + if test_report: + if not os.path.exists(report_dir): + os.mkdir(report_dir) + with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f: + f.write(test_report) print('\n'.join(output)) return errata @@ -543,6 +558,116 @@ def grammar_suite(directory, parser_factory, transformer_factory, return '' +######################################################################## +# +# Support for unit-testing of ebnf-grammars +# +######################################################################## + + +RX_DEFINITION_OR_SECTION = re.compile('(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))') +SymbolsDictType = Dict[str, List[str]] + + +def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType: + """ + Extracts all defined symbols from an EBNF-grammar. This can be used to + prepare grammar-tests. The symbols will be returned as lists of strings + which are grouped by the sections to which they belong and returned as + an ordered dictionary, they keys of which are the section names. + In order to define a section in the ebnf-source, add a comment-line + starting with "#:", followed by the section name. It is recommended + to use valid file names as section names. Example: + + #: components + + expression = term { EXPR_OP~ term} + term = factor { TERM_OP~ factor} + factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group } + group = "(" expression ")" + + + #: leaf_expressions + + EXPR_OP = /\+/ | /-/ + TERM_OP = /\*/ | /\// + SIGN = /-/ + + NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~ + VARIABLE = /[A-Za-z]/~ + + If no sections have been defined in the comments, there will be only + one group with the empty string as a key. + + :param ebnf_text_or_file: Either an ebnf-grammar or the file-name + of an ebnf-grammar + :return: Ordered dictionary mapping the section names of the grammar + to lists of symbols that appear under that section. + """ + def trim_section_name(name: str) -> str: + return re.sub('[^\w-]', '_', name.replace('#:', '').strip()) + + ebnf = load_if_file(ebnf_text_or_file) + deflist = RX_DEFINITION_OR_SECTION.findall(ebnf) + if not deflist: + raise AssertionError('No symbols found in: ' + ebnf_text_or_file[:40]) + symbols = collections.OrderedDict() # type: SymbolsDictType + if deflist[0][:2] != '#:': + curr_section = '' + symbols[curr_section] = [] + for df in deflist: + if df[:2] == '#:': + curr_section = trim_section_name(df) + if curr_section in symbols: + raise AssertionError('Section name must not be repeated: ' + curr_section) + symbols[curr_section] = [] + else: + symbols[curr_section].append(df) + return symbols + + +def create_test_templates(symbols_or_ebnf: Union[str, SymbolsDictType], + path: str, + fmt: str = '.ini') -> None: + """ + Creates template files for grammar unit-tests for the given symbols . + + Args: + symbols_or_ebnf: Either a dictionary that matches section names to + the grammar's symbols under that section or an EBNF-grammar + or file name of an EBNF-grammar from which the symbols shall + be extracted. + path: the path to the grammar-test directory (usually 'grammar_tests'). + If the last element of the path does not exist, the directory + will be created. + fmt: the test-file-format. At the moment only '.ini' is supported + """ + assert fmt == '.ini' + if isinstance(symbols_or_ebnf, str): + symbols = extract_symbols(cast(str, symbols_or_ebnf)) # type: SymbolsDictType + else: + symbols = cast(Dict, symbols_or_ebnf) + if not os.path.exists(path): + os.mkdir(path) + if os.path.isdir(path): + save = os.getcwd() + os.chdir(path) + keys = reversed(list(symbols.keys())) + for i, k in enumerate(keys): + filename = '{num:0>2}_test_{section}'.format(num=i+1, section=k) + fmt + if os.path.exists(filename): + print('File "{name}" not created, because it already exists!') + else: + with open(filename, 'w', encoding='utf-8') as f: + for sym in symbols[k]: + f.write('\n[match:{sym}]\n\n'.format(sym=sym)) + f.write('[ast:{sym}]\n\n'.format(sym=sym)) + f.write('[fail:{sym}]\n\n'.format(sym=sym)) + os.chdir(save) + else: + raise ValueError(path + ' is not a directory!') + + ####################################################################### # # general unit testing support diff --git a/dhparser.py b/dhparser.py index c9e082c..9b4afd5 100755 --- a/dhparser.py +++ b/dhparser.py @@ -138,6 +138,12 @@ except ModuleNotFoundError: def recompile_grammar(grammar_src, force): + grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests') + if not os.path.exists(grammar_tests_dir) \ + or not any(os.path.isfile(os.path.join(grammar_tests_dir, entry)) + for entry in os.listdir(grammar_tests_dir)): + print('No grammar-tests found, generating test templates.') + testing.create_test_templates(grammar_src, grammar_tests_dir) with DHParser.log.logging(LOGGING): # recompiles Grammar only if it has changed if not dsl.recompile_grammar(grammar_src, force=force): diff --git a/examples/Arithmetic/Arithmetic.ebnf b/examples/Arithmetic/Arithmetic.ebnf index 53cbd22..845fe53 100644 --- a/examples/Arithmetic/Arithmetic.ebnf +++ b/examples/Arithmetic/Arithmetic.ebnf @@ -14,7 +14,7 @@ ####################################################################### # -# Structure and Components +#: Structure and Components # ####################################################################### @@ -25,7 +25,7 @@ group = "(" expression ")" ####################################################################### # -# "Leaf"-Expressions +#: "Leaf"-Expressions # ####################################################################### diff --git a/examples/Arithmetic/ArithmeticCompiler.py b/examples/Arithmetic/ArithmeticCompiler.py index 3d3c450..f7a3b67 100755 --- a/examples/Arithmetic/ArithmeticCompiler.py +++ b/examples/Arithmetic/ArithmeticCompiler.py @@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar): r"""Parser for an Arithmetic source file. """ expression = Forward() - source_hash__ = "48fe89871e7ba344eb238c1d1a927167" + source_hash__ = "50681341ebb2536b3eadd7eb5540ece0" parser_initialization__ = ["upon instantiation"] resume_rules__ = {} COMMENT__ = r'#.*' diff --git a/examples/EBNF/grammar_tests/01_test_leaf_elements.ini b/examples/EBNF/grammar_tests/01_test_leaf_elements.ini deleted file mode 100644 index 571cb5d..0000000 --- a/examples/EBNF/grammar_tests/01_test_leaf_elements.ini +++ /dev/null @@ -1,7 +0,0 @@ -[match:symbol] -M1: word -M2: one_word_with_underscores - -[fail:symbol] -F1: two words - diff --git a/examples/EBNF/grammar_tests/02_test_document.ini b/examples/EBNF/grammar_tests/02_test_document.ini deleted file mode 100644 index 3653dd8..0000000 --- a/examples/EBNF/grammar_tests/02_test_document.ini +++ /dev/null @@ -1,8 +0,0 @@ -[match:document] -M1: """This is a sequence of words - extending over several lines""" -M2: """ This sequence contains leading whitespace""" - -[fail:document] -F1: """This test should fail, because neither - comma nor full have been defined anywhere.""" diff --git a/examples/EBNF/tst_EBNF_grammar.py b/examples/EBNF/tst_EBNF_grammar.py index 01776b4..01f0152 100755 --- a/examples/EBNF/tst_EBNF_grammar.py +++ b/examples/EBNF/tst_EBNF_grammar.py @@ -16,7 +16,7 @@ scriptpath = os.path.dirname(__file__) try: from DHParser import dsl import DHParser.log - from DHParser import testing + from DHParser import testing, create_test_templates except ModuleNotFoundError: print('Could not import DHParser. Please adjust sys.path in file ' '"%s" manually' % __file__) @@ -24,6 +24,12 @@ except ModuleNotFoundError: def recompile_grammar(grammar_src, force): + grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests') + if not os.path.exists(grammar_tests_dir) \ + or not any(os.path.isfile(os.path.join(grammar_tests_dir, entry)) + for entry in os.listdir(grammar_tests_dir)): + print('No grammar-tests found, generating test templates.') + create_test_templates(grammar_src, grammar_tests_dir) with DHParser.log.logging(LOGGING): # recompiles Grammar only if it has changed if not dsl.recompile_grammar(grammar_src, force=force): @@ -35,9 +41,10 @@ def recompile_grammar(grammar_src, force): def run_grammar_tests(glob_pattern): + grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests') with DHParser.log.logging(LOGGING): error_report = testing.grammar_suite( - os.path.join(scriptpath, 'grammar_tests'), + grammar_tests_dir, get_grammar, get_transformer, fn_patterns=[glob_pattern], report=True, verbose=True) return error_report -- GitLab