10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit cd1cbe44 authored by Eckhart Arnold's avatar Eckhart Arnold

removed old (broken) unit tests; started adding new ones...

parent c7b50f80
......@@ -29,7 +29,7 @@ except ImportError:
import re
from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5
from logging import LOGGING
from logs import LOGGING
from parser import *
from syntaxtree import *
from version import __version__
......@@ -114,11 +114,11 @@ def compile_python_object(python_src, obj_name_ending="Grammar"):
exec(code, namespace) # safety risk?
for key in namespace.keys():
if key.endswith(obj_name_ending):
parser = namespace[key]
obj = namespace[key]
break
else:
parser = None
return parser
obj = None
return obj
def get_grammar_instance(grammar):
......@@ -196,6 +196,25 @@ def compileDSL(text_or_file, dsl_grammar, trans_table, compiler,
return result
def compileEBNF(ebnf_src, ebnf_grammar_obj=None):
"""Compiles an EBNF source file into a Grammar class
Args:
ebnf_src(str): Either the file name of an EBNF grammar or
the EBNF grammar itself as a string.
ebnf_grammar_obj: An existing instance of the
DHParser.EBNFcompiler.EBNFGrammar object. This can speed
up compilation, because no new EBNFGrammar object needs to
be instantiated.
Returns:
A Grammar class that can be instantiated for parsing a text
which conforms to the language defined by ``ebnf_src``
"""
grammar = ebnf_grammar_obj or EBNFGrammar()
grammar_src = compileDSL(ebnf_src, grammar, EBNFTransTable, EBNFCompiler())
return compile_python_object(grammar_src)
def run_compiler(source_file, compiler_suite="", extension=".xml"):
"""Compiles the a source file with a given compiler and writes the
result to a file.
......@@ -216,9 +235,9 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
alls symbols in ``symbols`` (set or other container) from
python_module ``python_module``."""
symlist = list(symbols)
grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)]
grouped = [symlist[i:i + 3] for i in range(0, len(symlist), 3)]
return ("\nfrom " + python_module + " import "
+ ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n')
+ ', \\\n '.join(', '.join(g) for g in grouped))
filepath = os.path.normpath(source_file)
with open(source_file, encoding="utf-8") as f:
......@@ -248,7 +267,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source)
except (PermissionError, FileNotFoundError, IOError) as error:
intro, outro = '', ''
syms = import_block("PyDSL", PARSER_SYMBOLS | AST_SYMBOLS | {'CompilerBase'})
syms = 'import re\n' + import_block("DHParser.syntaxtree", AST_SYMBOLS)
syms += import_block("DHParser.parser", PARSER_SYMBOLS | {'CompilerBase'}) + '\n\n'
scanner = compiler.gen_scanner_skeleton()
ast = compiler.gen_AST_skeleton()
compiler = compiler.gen_compiler_skeleton()
......
#!/usr/bin/python3
"""logging.py - basic log file support for DHParser
"""logs.py - basic log file support for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
......@@ -18,7 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
Module ``logging`` defines the global variable LOGGING which contains
Module ``logs`` defines the global variable LOGGING which contains
the name of a directory where log files shall be placed. By setting
its value to the empty string "" logging can be turned off.
......
......@@ -59,7 +59,7 @@ try:
except ImportError:
import re
from logging import LOGGING, LOGS_DIR
from logs import LOGGING, LOGS_DIR
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
error_messages, ASTTransform
......@@ -160,14 +160,14 @@ def add_parser_guard(parser_func):
parser.recursion_counter[location] += 1
grammar = parser.grammar
if grammar.track_history:
if grammar.history_tracking:
grammar.call_stack.append(parser)
grammar.moving_forward = True
# run original __call__ method
node, rest = parser_func(parser, text)
if grammar.track_history:
if grammar.history_tracking:
if grammar.moving_forward: # and result[0] == None
grammar.moving_forward = False
record = HistoryRecord(grammar.call_stack.copy(), node, len(rest))
......@@ -213,13 +213,14 @@ class Parser(metaclass=ParserMetaClass):
def __init__(self, name=None):
assert name is None or isinstance(name, str), str(name)
self.name = name or ''
self.grammar = None # center for global variables etc.
self._grammar = None # center for global variables etc.
self.reset()
def reset(self):
self.visited = dict()
self.recursion_counter = dict()
self.cycle_detection = set()
return self
def __call__(self, text):
return None, text # default behaviour: don't match
......@@ -282,9 +283,7 @@ class GrammarBase:
def __init__(self):
self.all_parsers = set()
self.dirty_flag = False
self.track_history = LOGGING
name = self.__class__.__name__
self.log_file_name = name[:-7] if name.lower().endswith('grammar') else name
self.history_tracking = LOGGING
self._reset()
self._assign_parser_names()
self.root__ = copy.deepcopy(self.__class__.root__)
......@@ -359,7 +358,7 @@ class GrammarBase:
result.pos = 0 # calculate all positions
return result
def log_parsing_history(self):
def log_parsing_history(self, log_file_name=''):
"""Writes a log of the parsing history of the most recently parsed
document.
"""
......@@ -367,10 +366,10 @@ class GrammarBase:
def prepare_line(record):
excerpt = self.document.__getitem__(slice(*record.extent))[:25].replace('\n', '\\n')
excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt
return (record.stack, record.status, excerpt)
return record.stack, record.status, excerpt
def write_log(history, log_name):
path = os.path.join(LOGS_DIR(), self.log_file_name + log_name + "_parser.log")
path = os.path.join(LOGS_DIR(), log_name + "_parser.log")
if history:
with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(history))
......@@ -379,6 +378,9 @@ class GrammarBase:
if LOGGING:
assert self.history
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], []
for record in self.history:
line = "; ".join(prepare_line(record))
......@@ -387,9 +389,9 @@ class GrammarBase:
match_history.append(line)
if record.node.errors:
errors_only.append(line)
write_log(full_history, '_full')
write_log(match_history, '_match')
write_log(errors_only, '_errors')
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
......@@ -590,7 +592,7 @@ class Optional(UnaryOperator):
"Nesting options would be redundant: %s(%s)" % \
(str(name), str(parser.name))
assert not isinstance(parser, Required), \
"Nestion options with required elements is contradictory: " \
"Nesting options with required elements is contradictory: " \
"%s(%s)" % (str(name), str(parser.name))
def __call__(self, text):
......@@ -899,12 +901,14 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
of failure,
2. A list of error messages, each of which is a tuple
(position: int, error: str)
3. The root-node of the abstract syntax tree
3. The root-node of the abstract syntax treelow
"""
assert isinstance(compiler, CompilerBase)
syntax_tree = grammar_base.parse(source)
syntax_tree.log(grammar_base.log_file_name, ext='.cst')
cname = grammar_base.__class__.__name__
log_file_name = cname[:-7] if cname.endswith('Grammar') else cname
syntax_tree.log(log_file_name, ext='.cst')
grammar_base.log_parsing_history()
assert syntax_tree.error_flag or str(syntax_tree) == source, str(syntax_tree)
......@@ -914,7 +918,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
result = None
else:
ASTTransform(syntax_tree, AST_transformations)
syntax_tree.log(grammar_base.log_file_name, ext='.ast')
syntax_tree.log(log_file_name, ext='.ast')
result = compiler.compile__(syntax_tree)
errors = syntax_tree.collect_errors()
messages = error_messages(source, errors)
......
......@@ -20,12 +20,16 @@ permissions and limitations under the License.
"""
import collections
import itertools
import os
from functools import partial
try:
import regex as re
except ImportError:
import re
from typing import NamedTuple
from logging import LOGGING, LOGS_DIR
from logs import LOGGING, LOGS_DIR
__all__ = ['WHITESPACE_KEYWORD',
......@@ -35,6 +39,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'Error',
'Node',
'error_messages',
'compact_sexpr',
'ASTTransform',
'no_transformation',
'replace_by_single_child',
......@@ -150,7 +155,7 @@ class Node:
def __str__(self):
if self.children:
return "".join([str(child) for child in self.result])
return "".join(str(child) for child in self.result)
return str(self.result)
@property
......@@ -236,7 +241,7 @@ class Node:
return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None):
def as_sexpr(self, src=None, prettyprint=True):
"""
Returns content as S-expression, i.e. in lisp-like form.
......@@ -244,6 +249,8 @@ class Node:
src: The source text or `None`. In case the source text is
given the position of the element in the text will be
reported as line and column.
prettyprint(bool): True (default), if pretty printing
of leaf nodes shall be applied for better readability.
"""
def opening(node):
......@@ -261,7 +268,8 @@ class Node:
else "'%s'" % s if s.find("'") < 0 \
else '"%s"' % s.replace('"', r'\"')
return self._tree_repr(' ', opening, lambda node: ')', pretty)
return self._tree_repr(' ', opening, lambda node: ')',
pretty if prettyprint else lambda s: s)
def as_xml(self, src=None):
"""
......@@ -318,33 +326,50 @@ class Node:
with open(os.path.join(LOGS_DIR(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr())
def find(self, match_function):
"""Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
given ``match_function`` evaluates to True. The tree is
traversed pre-order.
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
Yields:
Node: all nodes of the tree for which
``match_function(node)`` returns True
"""
if match_function(self):
yield self
else:
for child in self.children:
for nd in child.find(match_function):
yield nd
def navigate(self, path):
"""EXPERIMENTAL! NOT YET TESTED!!!
Returns the first descendant element matched by `path`, e.g.
'd/s' returns 'l' from (d (s l)(e (r x1) (r x2))
'e/r' returns 'x2'
'e' returns (r x1)(r x2)
"""Yields the results of all descendant elements matched by
``path``, e.g.
'd/s' yields 'l' from (d (s l)(e (r x1) (r x2))
'e/r' yields 'x1', then 'x2'
'e' yields (r x1)(r x2)
Parameters:
path (str): The path of the object, e.g. 'a/b/c'
path (str): The path of the object, e.g. 'a/b/c'. The
components of ``path`` can be regular expressions
Returns:
The object at the path, either a string or a Node or
``None``, if the path did not match.
"""
pl = path.strip('')
assert pl[0] != '/', 'Path must noch start with "/"!'
nd = self
for p in pl:
if isinstance(nd.result, str):
return p if (p == nd.result) and (p == pl[-1]) else None
for child in nd.result:
if str(child) == p:
nd = child
break
def nav(node, pl):
if pl:
return itertools.chain(nav(child, pl[1:]) for child in node.children
if re.match(pl[0], child.tag_name))
else:
return None
return nd
return self.result,
return nav(path.split('/'))
def error_messages(text, errors):
......@@ -359,7 +384,15 @@ def error_messages(text, errors):
for err in sorted(list(errors)))
# lambda compact_sexpr s : re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
def compact_sexpr(s):
"""Returns S-expression ``s`` as a one liner without unnecessary
whitespace.
Example:
>>> compact_sexpr("(a\n (b\n c\n )\n)\n")
(a (b c))
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
########################################################################
......
......@@ -6,17 +6,21 @@
#
#######################################################################
import re
from PyDSL import ZeroOrMore, Capture, mixin_comment, OneOrMore, \
remove_comments, partial, Lookahead, remove_scanner_tokens, \
Lookbehind, flatten, NegativeLookbehind, remove_enclosing_delimiters, \
NegativeLookahead, remove_whitespace, is_whitespace, reduce_single_child, \
RE, is_scanner_token, Retrieve, remove_children_if, \
Sequence, Token, CompilerBase, is_comment, \
remove_expendables, remove_tokens, Alternative, is_expendable, \
Optional, no_transformation, TOKEN_KEYWORD, RegExp, \
replace_by_single_child, Required, GrammarBase, WHITESPACE_KEYWORD, \
Forward, Pop
from DHParser.syntaxtree import remove_whitespace, no_transformation, replace_by_single_child, \
is_expendable, remove_children_if, TOKEN_KEYWORD, \
remove_brackets, partial, flatten, \
remove_expendables, WHITESPACE_KEYWORD, is_whitespace, \
remove_tokens, reduce_single_child
from DHParser.parser import mixin_comment, Required, Pop, \
ZeroOrMore, Token, CompilerBase, \
Sequence, Retrieve, Lookahead, \
GrammarBase, Optional, NegativeLookbehind, \
RegExp, Lookbehind, Capture, \
NegativeLookahead, Alternative, OneOrMore, \
Forward, RE
......@@ -45,9 +49,10 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "50f817c35d08825b20a95664a555d9b0"
source_hash__ = "4a1025732f79bf6787d1f753cbec7fc3"
parser_initialization__ = "upon instatiation"
wsp__ = mixin_comment(whitespace=r'\s*', comment=r'')
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'\s*', comment=r'')
wspL__ = ''
wspR__ = ''
text = RE('[^`]+')
......
#!/usr/bin/python3
"""compile_MLW.py - simple utility script for compiling MLW.ebnf
"""compile_PopRetrieve.py - test of Pop and Retrieve operators
Author: Eckhart Arnold <arnold@badw.de>
......@@ -22,7 +23,7 @@ limitations under the License.
import os
import sys
sys.path.append(os.path.abspath('../../'))
from ParserCombinators import run_compiler, source_changed
from DSLsupport import run_compiler, source_changed
if (not os.path.exists('PopRetrieve_compiler.py') or
source_changed('PopRetrieve.ebnf', 'PopRetrieve_compiler.py')):
......@@ -40,4 +41,4 @@ if errors:
errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors:
print(errors)
sys.exit(1)
\ No newline at end of file
sys.exit(1)
import os
os.system("nosetests")
class SelfTest:
def setup(self):
print("setup")
def teardown(self):
print("teardown")
def test1(self):
print("test1")
def test2(self):
print("test2")
def run_tests(tests, namespace):
""" Runs selected tests.
Args:
tests: Either a string or a list of strings that contains the
names of test or test classes. Each test and, in the case
of a test class, all tests within the test class will be
run.
namespace: The namespace for running the test, usually
``globals()`` should be used.
"""
def instantiate(cls_name):
exec("obj = " + cls_name + "()", namespace)
obj = namespace["obj"]
if "setup" in dir(obj):
obj.setup()
return obj
if isinstance(tests, str):
tests = tests.split(" ")
for test in tests:
if test.find('.') >= 0:
cls_name, method_name = test.split('.')
obj = instantiate(cls_name)
print("Running " + cls_name + "." + method_name)
exec('obj.' + method_name + '()')
else:
obj = instantiate(test)
for name in dir(obj):
if name.lower().startswith("test"):
print("Running " + test + "." + name)
exec('obj.' + name + '()')
if "teardown" in dir(obj):
obj.teardown()
if __name__ == "__main__":
# run_tests("SelfTest.test1 SelfTest")
import os
os.system("nosetests")
#!/usr/bin/python3
"""test_EBNFcompiler.py - tests of the EBNFcompiler-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
sys.path.append(os.path.abspath('../../'))
from DSLsupport import compileEBNF, run_compiler, source_changed
WRITE_LOGS = True
class TestPopRetrieve:
mini_language = """
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign
delimiter_sign = /`+/
text = /[^`]+/
"""
def setup(self):
self.minilang_parser = compileEBNF(self.mini_language)()
def test_compile_mini_language(self):
assert self.minilang_parser
def test_single_line(self):
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
syntax_tree = self.minilang_parser.parse(teststr)
assert not syntax_tree.collect_errors()
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
self.minilang_parser.log_parsing_history("test_PopRetrieve_single_line")
def test_multi_line(self):
teststr = """
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde
Absatz ohne ``` codeblock, aber
das stellt sich erst am Ende herause...
Mehrzeliger ```code block
"""
syntax_tree = self.minilang_parser.parse(teststr)
assert not syntax_tree.collect_errors()
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
self.minilang_parser.log_parsing_history("test_PopRetrieve_multi_line")
#!/usr/bin/python3
"""test_syntaxtree.py - test of syntaxtree-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import re
import sys
sys.path.append(os.path.abspath('../../'))
from syntaxtree import Node, compact_sexpr
class DummyParser:
def __init__(self, name=''):
self.name = name
def __str__(self):
return self.name or self.__class__.__name__
def __call__(self, text):
return None, text
def from_sexpr(s):
"""Generates a tree of nodes from an S-expression.
"""
def next_block(s):
s = s.strip()
while s[0] != ')':
assert s[0] == '(', s
level = 1; i = 1
while level > 0:
if s[i] == '(':
level += 1
elif s[i] == ')':
level -= 1
i += 1
yield s[:i]
s = s[i:].strip()
s = s.strip()
assert s[0] == '(', s
s = s[1:].strip()
m = re.match('\w+', s)
name = s[:m.end()]
s = s[m.end():].strip()
if s[0] == '(':
result = tuple(from_sexpr(block) for block in next_block(s))
else:
m = re.match('\w+', s)
result = s[:m.end()]
s = s[m.end():].strip()
assert s[0] == ')', s
return Node(DummyParser(name), result)
class TestSExpr:
"""
Tests for S-expression handling.
"""
def test_compact_sexpr(self):
assert compact_sexpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
def test_selftest_from_sexpr(self):
sexpr = '(a (b c) (d e) (f (g h)))'
tree = from_sexpr(sexpr)
assert compact_sexpr(tree.as_sexpr(prettyprint=False)) == sexpr
class TestNode:
"""
Tests for class Node
"""
def setup(self):