From 9ef2e91821a3b781e57393e7c753a6f43ceb8cd5 Mon Sep 17 00:00:00 2001 From: Eckhart Arnold Date: Wed, 26 Apr 2017 22:21:57 +0200 Subject: [PATCH] bug fixes; started code for systematic testing --- .gitignore | 2 +- DHParser/dsl.py | 1 - DHParser/ebnf.py | 11 +++++------ DHParser/parsers.py | 16 +++++++++++++--- DHParser/syntaxtree.py | 12 +++++++++++- DHParser/toolkit.py | 2 +- OLDSTUFF/ParserCombinators_obsolete.py | 2 +- test/test_dsl.py | 11 ++++++----- test/test_ebnf.py | 18 +++++++++++++++--- test/test_parsers.py | 2 +- test/test_syntaxtree.py | 10 +++++++++- test/test_toolkit.py | 2 +- 12 files changed, 64 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 9a8e1e1..36f3870 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,6 @@ testdata/*.pdf DEBUG* LOGS/ external_resources/ -tmp/ +tmp/* build/ dist/ diff --git a/DHParser/dsl.py b/DHParser/dsl.py index 514b1d8..9f1b154 100644 --- a/DHParser/dsl.py +++ b/DHParser/dsl.py @@ -396,4 +396,3 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"): return [] - diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index 8d2fc05..787babb 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -140,7 +140,7 @@ EBNF_transformation_table = { (TOKEN_KEYWORD, WHITESPACE_KEYWORD): [remove_expendables, reduce_single_child], "list_": - [partial(remove_tokens, tokens={','})], + [flatten, partial(remove_tokens, tokens={','})], "": [remove_expendables, replace_by_single_child] } @@ -188,8 +188,8 @@ class EBNFCompiler(CompilerBase): def _reset(self): self.rules = set() - self.symbols = set() self.variables = set() + self.symbol_nodes = [] self.definition_names = [] self.recursive = set() self.root = "" @@ -302,10 +302,10 @@ class EBNFCompiler(CompilerBase): declarations += [symbol + '.set(' + statement + ')'] else: declarations += [symbol + ' = ' + statement] - for nd in self.symbols: + for nd in self.symbol_nodes: if nd.result not in self.rules: nd.add_error("Missing production for symbol '%s'" % nd.result) - if self.root and 'root__' not in self.symbols: + if self.root and 'root__' not in self.rules: declarations.append('root__ = ' + self.root) declarations.append('') return '\n '.join(declarations) @@ -443,7 +443,6 @@ class EBNFCompiler(CompilerBase): if prefix in {'::', ':'}: assert len(node.result) == 2 arg = node.result[-1] - argstr = str(arg) if arg.parser.name != 'symbol': node.add_error(('Retrieve Operator "%s" requires a symbol, ' 'and not a %s.') % (prefix, str(arg.parser))) @@ -487,7 +486,7 @@ class EBNFCompiler(CompilerBase): if node.result in self.directives['tokens']: return 'ScannerToken("' + node.result + '")' else: - self.symbols.add(node) + self.symbol_nodes.append(node) if node.result in self.rules: self.recursive.add(node.result) return node.result diff --git a/DHParser/parsers.py b/DHParser/parsers.py index 3512b8c..7203de5 100644 --- a/DHParser/parsers.py +++ b/DHParser/parsers.py @@ -55,11 +55,10 @@ try: import regex as re except ImportError: import re -import sys -from .toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name, smart_list +from .toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name from .syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \ - traverse + mock_syntax_tree from DHParser.toolkit import load_if_file, error_messages __all__ = ['HistoryRecord', @@ -1070,3 +1069,14 @@ def full_compilation(source, scanner, parser, transform, compiler): messages = error_messages(source_text, errors) return result, messages, syntax_tree + +def test_grammar(test_suite, parse_function, transform): + for parser_name, tests in test_suite.items(): + assert set(tests.keys()).issubset({'match', 'fail', 'ast', 'cst'}) + for test_name, test_code in tests['match'].items(): + cst = parse_function(test_code, parser_name) + if not cst.error_flag: + yield "Test %s for parser %s did not match" % (test_name, parser_name) + if "cst" in tests: + if tests["cst"][test_name] != mock_syntax_tree(cst): + pass # TO BE CONTINUED \ No newline at end of file diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index 0dddebd..7e024e1 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -17,6 +17,7 @@ implied. See the License for the specific language governing permissions and limitations under the License. """ +import copy import itertools import os from functools import partial @@ -166,9 +167,18 @@ class Node: def __eq__(self, other): return str(self.parser) == str(other.parser) and self.result == other.result + def __hash__(self): + return hash((str(self.parser), )) + + def __deepcopy__(self, memodict={}): + result = copy.deepcopy(self.result) + other = Node(self.parser, result) + other._pos = self._pos + return other + @property def tag_name(self): - return str(self.parser) + return self.parser.name or self.parser.__class__.__name__ # ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.__class__.__name__ @property diff --git a/DHParser/toolkit.py b/DHParser/toolkit.py index e539a4e..d6f4699 100644 --- a/DHParser/toolkit.py +++ b/DHParser/toolkit.py @@ -54,7 +54,7 @@ __all__ = ['logging_on', 'sane_parser_name'] -LOGGING: str = "LOGS" # LOGGING = "" turns logging off! +LOGGING: str = "" # "LOGS" # LOGGING = "" turns logging off! def logging_on(log_subdir="LOGS"): diff --git a/OLDSTUFF/ParserCombinators_obsolete.py b/OLDSTUFF/ParserCombinators_obsolete.py index a5fdac9..268356f 100644 --- a/OLDSTUFF/ParserCombinators_obsolete.py +++ b/OLDSTUFF/ParserCombinators_obsolete.py @@ -740,7 +740,7 @@ class Parser(metaclass=ParserMetaClass): def apply(self, func): """Applies function `func(parser)` recursively to this parser and all - descendendants of the tree of parsers. The same function can never + descendants of the tree of parsers. The same function can never be applied twice between calls of the ``reset()``-method! """ if func in self.cycle_detection: diff --git a/test/test_dsl.py b/test/test_dsl.py index f7f0c90..2c8ecfd 100644 --- a/test/test_dsl.py +++ b/test/test_dsl.py @@ -31,11 +31,12 @@ class TestCompilerGeneration: word = /\w+/ WSPC = /\s+/ """ + tmp = 'tmp/' if os.path.isdir('tmp') else ('test/tmp/') trivial_text = """Es war ein König in Thule.""" - grammar_name = "tmp/TestCompilerGeneration.ebnf" - compiler_name = "tmp/TestCompilerGeneration_compiler.py" - text_name = "tmp/TestCompilerGeneration_text.txt" - result_name = "tmp/TestCompilerGeneration_text.xml" + grammar_name = tmp + "TestCompilerGeneration.ebnf" + compiler_name = tmp + "TestCompilerGeneration_compiler.py" + text_name = tmp + "TestCompilerGeneration_text.txt" + result_name = tmp + "TestCompilerGeneration_text.xml" def setup(self): with open(self.grammar_name, "w") as f: @@ -71,7 +72,7 @@ class TestCompilerGeneration: result = run_compiler(self.trivial_text, self.compiler_name) assert output == result.as_xml(), str(result) - sys.path.append('tmp') + sys.path.append(self.tmp) from TestCompilerGeneration_compiler import compile_TestCompilerGeneration result, errors, ast = compile_TestCompilerGeneration(self.trivial_text) diff --git a/test/test_ebnf.py b/test/test_ebnf.py index 967013e..ed8a3cc 100644 --- a/test/test_ebnf.py +++ b/test/test_ebnf.py @@ -81,9 +81,21 @@ class TestDirectives: class TestEBNFParser: - test_json = [ -"" -] + test_json = { + "list_": { + "match": { + 1: "hund", + 2: "hund, katze,maus", + 3: "hund , katze" + }, + "fail": { + 1: "123", + 2: '"literal"', + 3: "/regexp/" + } + } + } + def setup(self): self.EBNF = EBNFGrammar() diff --git a/test/test_parsers.py b/test/test_parsers.py index 71c44a2..551ce1b 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -105,4 +105,4 @@ class TestRegex: if __name__ == "__main__": from run import runner - runner("", globals()) + runner("TestInfiLoopsAndRecursion", globals()) diff --git a/test/test_syntaxtree.py b/test/test_syntaxtree.py index faf7487..d719bb8 100644 --- a/test/test_syntaxtree.py +++ b/test/test_syntaxtree.py @@ -19,6 +19,7 @@ See the License for the specific language governing permissions and limitations under the License. """ +import copy import os import sys sys.path.append(os.path.abspath('../../')) @@ -90,6 +91,13 @@ class TestNode: assert self.recurr_tree != self.unique_tree assert mock_syntax_tree('(a (b c))') != mock_syntax_tree('(a (b d))') + def test_copy(self): + cpy = copy.deepcopy(self.unique_tree) + assert cpy == self.unique_tree + assert cpy.result[0].result != "epsilon" # just to make sure... + cpy.result[0].result = "epsilon" + assert cpy != self.unique_tree + class TestErrorHandling: def test_error_flag_propagation(self): @@ -106,4 +114,4 @@ class TestErrorHandling: if __name__ == "__main__": from run import runner - runner("TestNode", globals()) + runner("", globals()) diff --git a/test/test_toolkit.py b/test/test_toolkit.py index 1c90d53..6a054cf 100644 --- a/test/test_toolkit.py +++ b/test/test_toolkit.py @@ -26,7 +26,7 @@ import sys from DHParser.toolkit import load_if_file class TestToolkit: - filename = "tmp/test.py" + filename = "tmp/test.py" if os.path.isdir('tmp') else "test/tmp/test.py" code1 = "x = 46" code2 = "def f():\n return 46" -- GitLab