diff --git a/DSLsupport.py b/DSLsupport.py index 3735ba30bf2a957cb8fdc9b607d6dd173ea0cc8f..da12bcf11bed535b36a313c871c17ae09424e31b 100644 --- a/DSLsupport.py +++ b/DSLsupport.py @@ -29,7 +29,7 @@ except ImportError: import re from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5 -from logging import LOGGING +from logs import LOGGING from parser import * from syntaxtree import * from version import __version__ @@ -114,11 +114,11 @@ def compile_python_object(python_src, obj_name_ending="Grammar"): exec(code, namespace) # safety risk? for key in namespace.keys(): if key.endswith(obj_name_ending): - parser = namespace[key] + obj = namespace[key] break else: - parser = None - return parser + obj = None + return obj def get_grammar_instance(grammar): @@ -196,6 +196,25 @@ def compileDSL(text_or_file, dsl_grammar, trans_table, compiler, return result +def compileEBNF(ebnf_src, ebnf_grammar_obj=None): + """Compiles an EBNF source file into a Grammar class + + Args: + ebnf_src(str): Either the file name of an EBNF grammar or + the EBNF grammar itself as a string. + ebnf_grammar_obj: An existing instance of the + DHParser.EBNFcompiler.EBNFGrammar object. This can speed + up compilation, because no new EBNFGrammar object needs to + be instantiated. + Returns: + A Grammar class that can be instantiated for parsing a text + which conforms to the language defined by ``ebnf_src`` + """ + grammar = ebnf_grammar_obj or EBNFGrammar() + grammar_src = compileDSL(ebnf_src, grammar, EBNFTransTable, EBNFCompiler()) + return compile_python_object(grammar_src) + + def run_compiler(source_file, compiler_suite="", extension=".xml"): """Compiles the a source file with a given compiler and writes the result to a file. @@ -216,9 +235,9 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): alls symbols in ``symbols`` (set or other container) from python_module ``python_module``.""" symlist = list(symbols) - grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)] + grouped = [symlist[i:i + 3] for i in range(0, len(symlist), 3)] return ("\nfrom " + python_module + " import " - + ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n') + + ', \\\n '.join(', '.join(g) for g in grouped)) filepath = os.path.normpath(source_file) with open(source_file, encoding="utf-8") as f: @@ -248,7 +267,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source) except (PermissionError, FileNotFoundError, IOError) as error: intro, outro = '', '' - syms = import_block("PyDSL", PARSER_SYMBOLS | AST_SYMBOLS | {'CompilerBase'}) + syms = 'import re\n' + import_block("DHParser.syntaxtree", AST_SYMBOLS) + syms += import_block("DHParser.parser", PARSER_SYMBOLS | {'CompilerBase'}) + '\n\n' scanner = compiler.gen_scanner_skeleton() ast = compiler.gen_AST_skeleton() compiler = compiler.gen_compiler_skeleton() diff --git a/tests/test_markdown.py b/examples/CommonMark/test_markdown.py similarity index 100% rename from tests/test_markdown.py rename to examples/CommonMark/test_markdown.py diff --git a/logging.py b/logs.py similarity index 95% rename from logging.py rename to logs.py index 711452c1edef59ecec4ebc7b46a1055240b84c39..515f98de1b13956e3219b7c17009c7fe4ba353c0 100644 --- a/logging.py +++ b/logs.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -"""logging.py - basic log file support for DHParser +"""logs.py - basic log file support for DHParser Copyright 2016 by Eckhart Arnold (arnold@badw.de) Bavarian Academy of Sciences an Humanities (badw.de) @@ -18,7 +18,7 @@ implied. See the License for the specific language governing permissions and limitations under the License. -Module ``logging`` defines the global variable LOGGING which contains +Module ``logs`` defines the global variable LOGGING which contains the name of a directory where log files shall be placed. By setting its value to the empty string "" logging can be turned off. diff --git a/parser.py b/parser.py index 96fd384b32ddc2edac866d88aa066abf83122c6f..cba422a92b33210e2a4aca6e5991c70c915b4f63 100644 --- a/parser.py +++ b/parser.py @@ -59,7 +59,7 @@ try: except ImportError: import re -from logging import LOGGING, LOGS_DIR +from logs import LOGGING, LOGS_DIR from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \ error_messages, ASTTransform @@ -160,14 +160,14 @@ def add_parser_guard(parser_func): parser.recursion_counter[location] += 1 grammar = parser.grammar - if grammar.track_history: + if grammar.history_tracking: grammar.call_stack.append(parser) grammar.moving_forward = True # run original __call__ method node, rest = parser_func(parser, text) - if grammar.track_history: + if grammar.history_tracking: if grammar.moving_forward: # and result[0] == None grammar.moving_forward = False record = HistoryRecord(grammar.call_stack.copy(), node, len(rest)) @@ -213,13 +213,14 @@ class Parser(metaclass=ParserMetaClass): def __init__(self, name=None): assert name is None or isinstance(name, str), str(name) self.name = name or '' - self.grammar = None # center for global variables etc. + self._grammar = None # center for global variables etc. self.reset() def reset(self): self.visited = dict() self.recursion_counter = dict() self.cycle_detection = set() + return self def __call__(self, text): return None, text # default behaviour: don't match @@ -282,9 +283,7 @@ class GrammarBase: def __init__(self): self.all_parsers = set() self.dirty_flag = False - self.track_history = LOGGING - name = self.__class__.__name__ - self.log_file_name = name[:-7] if name.lower().endswith('grammar') else name + self.history_tracking = LOGGING self._reset() self._assign_parser_names() self.root__ = copy.deepcopy(self.__class__.root__) @@ -359,7 +358,7 @@ class GrammarBase: result.pos = 0 # calculate all positions return result - def log_parsing_history(self): + def log_parsing_history(self, log_file_name=''): """Writes a log of the parsing history of the most recently parsed document. """ @@ -367,10 +366,10 @@ class GrammarBase: def prepare_line(record): excerpt = self.document.__getitem__(slice(*record.extent))[:25].replace('\n', '\\n') excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt - return (record.stack, record.status, excerpt) + return record.stack, record.status, excerpt def write_log(history, log_name): - path = os.path.join(LOGS_DIR(), self.log_file_name + log_name + "_parser.log") + path = os.path.join(LOGS_DIR(), log_name + "_parser.log") if history: with open(path, "w", encoding="utf-8") as f: f.write("\n".join(history)) @@ -379,6 +378,9 @@ class GrammarBase: if LOGGING: assert self.history + if not log_file_name: + name = self.__class__.__name__ + log_file_name = name[:-7] if name.lower().endswith('grammar') else name full_history, match_history, errors_only = [], [], [] for record in self.history: line = "; ".join(prepare_line(record)) @@ -387,9 +389,9 @@ class GrammarBase: match_history.append(line) if record.node.errors: errors_only.append(line) - write_log(full_history, '_full') - write_log(match_history, '_match') - write_log(errors_only, '_errors') + write_log(full_history, log_file_name + '_full') + write_log(match_history, log_file_name + '_match') + write_log(errors_only, log_file_name + '_errors') @@ -590,7 +592,7 @@ class Optional(UnaryOperator): "Nesting options would be redundant: %s(%s)" % \ (str(name), str(parser.name)) assert not isinstance(parser, Required), \ - "Nestion options with required elements is contradictory: " \ + "Nesting options with required elements is contradictory: " \ "%s(%s)" % (str(name), str(parser.name)) def __call__(self, text): @@ -899,12 +901,14 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): of failure, 2. A list of error messages, each of which is a tuple (position: int, error: str) - 3. The root-node of the abstract syntax tree + 3. The root-node of the abstract syntax treelow """ assert isinstance(compiler, CompilerBase) syntax_tree = grammar_base.parse(source) - syntax_tree.log(grammar_base.log_file_name, ext='.cst') + cname = grammar_base.__class__.__name__ + log_file_name = cname[:-7] if cname.endswith('Grammar') else cname + syntax_tree.log(log_file_name, ext='.cst') grammar_base.log_parsing_history() assert syntax_tree.error_flag or str(syntax_tree) == source, str(syntax_tree) @@ -914,7 +918,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): result = None else: ASTTransform(syntax_tree, AST_transformations) - syntax_tree.log(grammar_base.log_file_name, ext='.ast') + syntax_tree.log(log_file_name, ext='.ast') result = compiler.compile__(syntax_tree) errors = syntax_tree.collect_errors() messages = error_messages(source, errors) diff --git a/syntaxtree.py b/syntaxtree.py index 6ae7d1bb32818c86044a31262f42b5d5e4a166e7..d7eaf08a801e6b39da39f33d5d8d44cf0a4eb479 100644 --- a/syntaxtree.py +++ b/syntaxtree.py @@ -20,12 +20,16 @@ permissions and limitations under the License. """ import collections +import itertools import os from functools import partial - +try: + import regex as re +except ImportError: + import re from typing import NamedTuple -from logging import LOGGING, LOGS_DIR +from logs import LOGGING, LOGS_DIR __all__ = ['WHITESPACE_KEYWORD', @@ -35,6 +39,7 @@ __all__ = ['WHITESPACE_KEYWORD', 'Error', 'Node', 'error_messages', + 'compact_sexpr', 'ASTTransform', 'no_transformation', 'replace_by_single_child', @@ -150,7 +155,7 @@ class Node: def __str__(self): if self.children: - return "".join([str(child) for child in self.result]) + return "".join(str(child) for child in self.result) return str(self.result) @property @@ -236,7 +241,7 @@ class Node: return head + '\n'.join([tab + dataF(s) for s in str(self.result).split('\n')]) + tail - def as_sexpr(self, src=None): + def as_sexpr(self, src=None, prettyprint=True): """ Returns content as S-expression, i.e. in lisp-like form. @@ -244,6 +249,8 @@ class Node: src: The source text or `None`. In case the source text is given the position of the element in the text will be reported as line and column. + prettyprint(bool): True (default), if pretty printing + of leaf nodes shall be applied for better readability. """ def opening(node): @@ -261,7 +268,8 @@ class Node: else "'%s'" % s if s.find("'") < 0 \ else '"%s"' % s.replace('"', r'\"') - return self._tree_repr(' ', opening, lambda node: ')', pretty) + return self._tree_repr(' ', opening, lambda node: ')', + pretty if prettyprint else lambda s: s) def as_xml(self, src=None): """ @@ -318,33 +326,50 @@ class Node: with open(os.path.join(LOGS_DIR(), st_file_name), "w", encoding="utf-8") as f: f.write(self.as_sexpr()) + def find(self, match_function): + """Finds nodes in the tree that match a specific criterion. + + ``find`` is a generator that yields all nodes for which the + given ``match_function`` evaluates to True. The tree is + traversed pre-order. + + Args: + match_function (function): A function that takes as Node + object as argument and returns True or False + + Yields: + Node: all nodes of the tree for which + ``match_function(node)`` returns True + """ + if match_function(self): + yield self + else: + for child in self.children: + for nd in child.find(match_function): + yield nd + def navigate(self, path): - """EXPERIMENTAL! NOT YET TESTED!!! - Returns the first descendant element matched by `path`, e.g. - 'd/s' returns 'l' from (d (s l)(e (r x1) (r x2)) - 'e/r' returns 'x2' - 'e' returns (r x1)(r x2) + """Yields the results of all descendant elements matched by + ``path``, e.g. + 'd/s' yields 'l' from (d (s l)(e (r x1) (r x2)) + 'e/r' yields 'x1', then 'x2' + 'e' yields (r x1)(r x2) Parameters: - path (str): The path of the object, e.g. 'a/b/c' + path (str): The path of the object, e.g. 'a/b/c'. The + components of ``path`` can be regular expressions Returns: The object at the path, either a string or a Node or ``None``, if the path did not match. """ - pl = path.strip('') - assert pl[0] != '/', 'Path must noch start with "/"!' - nd = self - for p in pl: - if isinstance(nd.result, str): - return p if (p == nd.result) and (p == pl[-1]) else None - for child in nd.result: - if str(child) == p: - nd = child - break + def nav(node, pl): + if pl: + return itertools.chain(nav(child, pl[1:]) for child in node.children + if re.match(pl[0], child.tag_name)) else: - return None - return nd + return self.result, + return nav(path.split('/')) def error_messages(text, errors): @@ -359,7 +384,15 @@ def error_messages(text, errors): for err in sorted(list(errors))) -# lambda compact_sexpr s : re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip() +def compact_sexpr(s): + """Returns S-expression ``s`` as a one liner without unnecessary + whitespace. + + Example: + >>> compact_sexpr("(a\n (b\n c\n )\n)\n") + (a (b c)) + """ + return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip() ######################################################################## diff --git a/tests/no_unit_tests/PopRetrieve_compiler.py b/tests/no_unit_tests/PopRetrieve_compiler.py index 98318806d02eb38fcb015bb4a29d35a6b2c7bfb8..c2162d49ae6412f39aee6c7c24889ad1c6024d19 100644 --- a/tests/no_unit_tests/PopRetrieve_compiler.py +++ b/tests/no_unit_tests/PopRetrieve_compiler.py @@ -6,17 +6,21 @@ # ####################################################################### +import re -from PyDSL import ZeroOrMore, Capture, mixin_comment, OneOrMore, \ - remove_comments, partial, Lookahead, remove_scanner_tokens, \ - Lookbehind, flatten, NegativeLookbehind, remove_enclosing_delimiters, \ - NegativeLookahead, remove_whitespace, is_whitespace, reduce_single_child, \ - RE, is_scanner_token, Retrieve, remove_children_if, \ - Sequence, Token, CompilerBase, is_comment, \ - remove_expendables, remove_tokens, Alternative, is_expendable, \ - Optional, no_transformation, TOKEN_KEYWORD, RegExp, \ - replace_by_single_child, Required, GrammarBase, WHITESPACE_KEYWORD, \ - Forward, Pop + +from DHParser.syntaxtree import remove_whitespace, no_transformation, replace_by_single_child, \ + is_expendable, remove_children_if, TOKEN_KEYWORD, \ + remove_brackets, partial, flatten, \ + remove_expendables, WHITESPACE_KEYWORD, is_whitespace, \ + remove_tokens, reduce_single_child +from DHParser.parser import mixin_comment, Required, Pop, \ + ZeroOrMore, Token, CompilerBase, \ + Sequence, Retrieve, Lookahead, \ + GrammarBase, Optional, NegativeLookbehind, \ + RegExp, Lookbehind, Capture, \ + NegativeLookahead, Alternative, OneOrMore, \ + Forward, RE @@ -45,9 +49,10 @@ class PopRetrieveGrammar(GrammarBase): delimiter_sign = /`+/ text = /[^`]+/ """ - source_hash__ = "50f817c35d08825b20a95664a555d9b0" + source_hash__ = "4a1025732f79bf6787d1f753cbec7fc3" parser_initialization__ = "upon instatiation" - wsp__ = mixin_comment(whitespace=r'\s*', comment=r'') + COMMENT__ = r'' + WSP__ = mixin_comment(whitespace=r'\s*', comment=r'') wspL__ = '' wspR__ = '' text = RE('[^`]+') diff --git a/tests/no_unit_tests/compile_PopRetrieve_EBNF.py b/tests/no_unit_tests/compile_PopRetrieve_EBNF.py index 5631ae34cfd3dbccaa79983c3f3d4edc32256841..7ed3c485123bb2159fbff35fe4fbab98eb41b6f3 100644 --- a/tests/no_unit_tests/compile_PopRetrieve_EBNF.py +++ b/tests/no_unit_tests/compile_PopRetrieve_EBNF.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 -"""compile_MLW.py - simple utility script for compiling MLW.ebnf +"""compile_PopRetrieve.py - test of Pop and Retrieve operators + Author: Eckhart Arnold @@ -22,7 +23,7 @@ limitations under the License. import os import sys sys.path.append(os.path.abspath('../../')) -from ParserCombinators import run_compiler, source_changed +from DSLsupport import run_compiler, source_changed if (not os.path.exists('PopRetrieve_compiler.py') or source_changed('PopRetrieve.ebnf', 'PopRetrieve_compiler.py')): @@ -40,4 +41,4 @@ if errors: errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py') if errors: print(errors) - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/tests/run.py b/tests/run.py index fd434ccfe2830233c3f24525fc74d69956070c7f..25bb7095a9188ed23178041acceae2bbba42dad7 100644 --- a/tests/run.py +++ b/tests/run.py @@ -1,2 +1,54 @@ -import os -os.system("nosetests") + +class SelfTest: + def setup(self): + print("setup") + + def teardown(self): + print("teardown") + + def test1(self): + print("test1") + + def test2(self): + print("test2") + + +def run_tests(tests, namespace): + """ Runs selected tests. + + Args: + tests: Either a string or a list of strings that contains the + names of test or test classes. Each test and, in the case + of a test class, all tests within the test class will be + run. + namespace: The namespace for running the test, usually + ``globals()`` should be used. + """ + def instantiate(cls_name): + exec("obj = " + cls_name + "()", namespace) + obj = namespace["obj"] + if "setup" in dir(obj): + obj.setup() + return obj + + if isinstance(tests, str): + tests = tests.split(" ") + for test in tests: + if test.find('.') >= 0: + cls_name, method_name = test.split('.') + obj = instantiate(cls_name) + print("Running " + cls_name + "." + method_name) + exec('obj.' + method_name + '()') + else: + obj = instantiate(test) + for name in dir(obj): + if name.lower().startswith("test"): + print("Running " + test + "." + name) + exec('obj.' + name + '()') + if "teardown" in dir(obj): + obj.teardown() + +if __name__ == "__main__": + # run_tests("SelfTest.test1 SelfTest") + import os + os.system("nosetests") diff --git a/tests/test_EBNFcompiler.py b/tests/test_EBNFcompiler.py new file mode 100644 index 0000000000000000000000000000000000000000..5405cc7d2754be1f67c1f61b796c224c2395a2c7 --- /dev/null +++ b/tests/test_EBNFcompiler.py @@ -0,0 +1,68 @@ +#!/usr/bin/python3 + +"""test_EBNFcompiler.py - tests of the EBNFcompiler-module of DHParser + + +Author: Eckhart Arnold + +Copyright 2017 Bavarian Academy of Sciences and Humanities + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import sys +sys.path.append(os.path.abspath('../../')) +from DSLsupport import compileEBNF, run_compiler, source_changed + + +WRITE_LOGS = True + + +class TestPopRetrieve: + mini_language = """ + document = { text | codeblock } + codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter + delimiter = delimiter_sign + delimiter_sign = /`+/ + text = /[^`]+/ + """ + + def setup(self): + self.minilang_parser = compileEBNF(self.mini_language)() + + def test_compile_mini_language(self): + assert self.minilang_parser + + def test_single_line(self): + teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende" + syntax_tree = self.minilang_parser.parse(teststr) + assert not syntax_tree.collect_errors() + if WRITE_LOGS: + syntax_tree.log("test_PopRetrieve_single_line", '.cst') + self.minilang_parser.log_parsing_history("test_PopRetrieve_single_line") + + def test_multi_line(self): + teststr = """ + Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde + + Absatz ohne ``` codeblock, aber + das stellt sich erst am Ende herause... + + Mehrzeliger ```code block + """ + syntax_tree = self.minilang_parser.parse(teststr) + assert not syntax_tree.collect_errors() + if WRITE_LOGS: + syntax_tree.log("test_PopRetrieve_multi_line", '.cst') + self.minilang_parser.log_parsing_history("test_PopRetrieve_multi_line") diff --git a/tests/test_syntaxtree.py b/tests/test_syntaxtree.py new file mode 100644 index 0000000000000000000000000000000000000000..7b1f981a7c9918eded5874acc95c8d45732adb9e --- /dev/null +++ b/tests/test_syntaxtree.py @@ -0,0 +1,111 @@ +#!/usr/bin/python3 + +"""test_syntaxtree.py - test of syntaxtree-module of DHParser + + +Author: Eckhart Arnold + +Copyright 2017 Bavarian Academy of Sciences and Humanities + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import re +import sys +sys.path.append(os.path.abspath('../../')) +from syntaxtree import Node, compact_sexpr + +class DummyParser: + def __init__(self, name=''): + self.name = name + + def __str__(self): + return self.name or self.__class__.__name__ + + def __call__(self, text): + return None, text + + +def from_sexpr(s): + """Generates a tree of nodes from an S-expression. + """ + def next_block(s): + s = s.strip() + while s[0] != ')': + assert s[0] == '(', s + level = 1; i = 1 + while level > 0: + if s[i] == '(': + level += 1 + elif s[i] == ')': + level -= 1 + i += 1 + yield s[:i] + s = s[i:].strip() + + s = s.strip() + assert s[0] == '(', s + s = s[1:].strip() + m = re.match('\w+', s) + name = s[:m.end()] + s = s[m.end():].strip() + if s[0] == '(': + result = tuple(from_sexpr(block) for block in next_block(s)) + else: + m = re.match('\w+', s) + result = s[:m.end()] + s = s[m.end():].strip() + assert s[0] == ')', s + return Node(DummyParser(name), result) + + +class TestSExpr: + """ + Tests for S-expression handling. + """ + def test_compact_sexpr(self): + assert compact_sexpr("(a\n (b\n c\n )\n)\n") == "(a (b c))" + + def test_selftest_from_sexpr(self): + sexpr = '(a (b c) (d e) (f (g h)))' + tree = from_sexpr(sexpr) + assert compact_sexpr(tree.as_sexpr(prettyprint=False)) == sexpr + + +class TestNode: + """ + Tests for class Node + """ + def setup(self): + self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))' + self.unique_tree = from_sexpr(self.unique_nodes_sexpr) + self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))' + self.recurr_tree = from_sexpr(self.recurring_nodes_sexpr) + + def test_str(self): + assert str(self.unique_tree) == "ceh" + assert str(self.recurr_tree) == "xey" + + def test_find(self): + found = list(self.unique_tree.find(lambda nd: not nd.children and nd.result == "e")) + assert len(found) == 1 + assert found[0].result == 'e' + found = list(self.recurr_tree.find(lambda nd: nd.tag_name == 'b')) + assert len(found) == 2 + assert found[0].result == 'x' and found[1].result == 'y' + + +if __name__ == "__main__": + from run import run_tests + run_tests("TestNode", globals())