Commit cd1cbe44 authored by Eckhart Arnold's avatar Eckhart Arnold

removed old (broken) unit tests; started adding new ones...

parent c7b50f80
...@@ -29,7 +29,7 @@ except ImportError: ...@@ -29,7 +29,7 @@ except ImportError:
import re import re
from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5 from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5
from logging import LOGGING from logs import LOGGING
from parser import * from parser import *
from syntaxtree import * from syntaxtree import *
from version import __version__ from version import __version__
...@@ -114,11 +114,11 @@ def compile_python_object(python_src, obj_name_ending="Grammar"): ...@@ -114,11 +114,11 @@ def compile_python_object(python_src, obj_name_ending="Grammar"):
exec(code, namespace) # safety risk? exec(code, namespace) # safety risk?
for key in namespace.keys(): for key in namespace.keys():
if key.endswith(obj_name_ending): if key.endswith(obj_name_ending):
parser = namespace[key] obj = namespace[key]
break break
else: else:
parser = None obj = None
return parser return obj
def get_grammar_instance(grammar): def get_grammar_instance(grammar):
...@@ -196,6 +196,25 @@ def compileDSL(text_or_file, dsl_grammar, trans_table, compiler, ...@@ -196,6 +196,25 @@ def compileDSL(text_or_file, dsl_grammar, trans_table, compiler,
return result return result
def compileEBNF(ebnf_src, ebnf_grammar_obj=None):
"""Compiles an EBNF source file into a Grammar class
Args:
ebnf_src(str): Either the file name of an EBNF grammar or
the EBNF grammar itself as a string.
ebnf_grammar_obj: An existing instance of the
DHParser.EBNFcompiler.EBNFGrammar object. This can speed
up compilation, because no new EBNFGrammar object needs to
be instantiated.
Returns:
A Grammar class that can be instantiated for parsing a text
which conforms to the language defined by ``ebnf_src``
"""
grammar = ebnf_grammar_obj or EBNFGrammar()
grammar_src = compileDSL(ebnf_src, grammar, EBNFTransTable, EBNFCompiler())
return compile_python_object(grammar_src)
def run_compiler(source_file, compiler_suite="", extension=".xml"): def run_compiler(source_file, compiler_suite="", extension=".xml"):
"""Compiles the a source file with a given compiler and writes the """Compiles the a source file with a given compiler and writes the
result to a file. result to a file.
...@@ -216,9 +235,9 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -216,9 +235,9 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
alls symbols in ``symbols`` (set or other container) from alls symbols in ``symbols`` (set or other container) from
python_module ``python_module``.""" python_module ``python_module``."""
symlist = list(symbols) symlist = list(symbols)
grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)] grouped = [symlist[i:i + 3] for i in range(0, len(symlist), 3)]
return ("\nfrom " + python_module + " import " return ("\nfrom " + python_module + " import "
+ ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n') + ', \\\n '.join(', '.join(g) for g in grouped))
filepath = os.path.normpath(source_file) filepath = os.path.normpath(source_file)
with open(source_file, encoding="utf-8") as f: with open(source_file, encoding="utf-8") as f:
...@@ -248,7 +267,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -248,7 +267,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source) intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source)
except (PermissionError, FileNotFoundError, IOError) as error: except (PermissionError, FileNotFoundError, IOError) as error:
intro, outro = '', '' intro, outro = '', ''
syms = import_block("PyDSL", PARSER_SYMBOLS | AST_SYMBOLS | {'CompilerBase'}) syms = 'import re\n' + import_block("DHParser.syntaxtree", AST_SYMBOLS)
syms += import_block("DHParser.parser", PARSER_SYMBOLS | {'CompilerBase'}) + '\n\n'
scanner = compiler.gen_scanner_skeleton() scanner = compiler.gen_scanner_skeleton()
ast = compiler.gen_AST_skeleton() ast = compiler.gen_AST_skeleton()
compiler = compiler.gen_compiler_skeleton() compiler = compiler.gen_compiler_skeleton()
......
#!/usr/bin/python3 #!/usr/bin/python3
"""logging.py - basic log file support for DHParser """logs.py - basic log file support for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de) Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de) Bavarian Academy of Sciences an Humanities (badw.de)
...@@ -18,7 +18,7 @@ implied. See the License for the specific language governing ...@@ -18,7 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
Module ``logging`` defines the global variable LOGGING which contains Module ``logs`` defines the global variable LOGGING which contains
the name of a directory where log files shall be placed. By setting the name of a directory where log files shall be placed. By setting
its value to the empty string "" logging can be turned off. its value to the empty string "" logging can be turned off.
......
...@@ -59,7 +59,7 @@ try: ...@@ -59,7 +59,7 @@ try:
except ImportError: except ImportError:
import re import re
from logging import LOGGING, LOGS_DIR from logs import LOGGING, LOGS_DIR
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \ from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
error_messages, ASTTransform error_messages, ASTTransform
...@@ -160,14 +160,14 @@ def add_parser_guard(parser_func): ...@@ -160,14 +160,14 @@ def add_parser_guard(parser_func):
parser.recursion_counter[location] += 1 parser.recursion_counter[location] += 1
grammar = parser.grammar grammar = parser.grammar
if grammar.track_history: if grammar.history_tracking:
grammar.call_stack.append(parser) grammar.call_stack.append(parser)
grammar.moving_forward = True grammar.moving_forward = True
# run original __call__ method # run original __call__ method
node, rest = parser_func(parser, text) node, rest = parser_func(parser, text)
if grammar.track_history: if grammar.history_tracking:
if grammar.moving_forward: # and result[0] == None if grammar.moving_forward: # and result[0] == None
grammar.moving_forward = False grammar.moving_forward = False
record = HistoryRecord(grammar.call_stack.copy(), node, len(rest)) record = HistoryRecord(grammar.call_stack.copy(), node, len(rest))
...@@ -213,13 +213,14 @@ class Parser(metaclass=ParserMetaClass): ...@@ -213,13 +213,14 @@ class Parser(metaclass=ParserMetaClass):
def __init__(self, name=None): def __init__(self, name=None):
assert name is None or isinstance(name, str), str(name) assert name is None or isinstance(name, str), str(name)
self.name = name or '' self.name = name or ''
self.grammar = None # center for global variables etc. self._grammar = None # center for global variables etc.
self.reset() self.reset()
def reset(self): def reset(self):
self.visited = dict() self.visited = dict()
self.recursion_counter = dict() self.recursion_counter = dict()
self.cycle_detection = set() self.cycle_detection = set()
return self
def __call__(self, text): def __call__(self, text):
return None, text # default behaviour: don't match return None, text # default behaviour: don't match
...@@ -282,9 +283,7 @@ class GrammarBase: ...@@ -282,9 +283,7 @@ class GrammarBase:
def __init__(self): def __init__(self):
self.all_parsers = set() self.all_parsers = set()
self.dirty_flag = False self.dirty_flag = False
self.track_history = LOGGING self.history_tracking = LOGGING
name = self.__class__.__name__
self.log_file_name = name[:-7] if name.lower().endswith('grammar') else name
self._reset() self._reset()
self._assign_parser_names() self._assign_parser_names()
self.root__ = copy.deepcopy(self.__class__.root__) self.root__ = copy.deepcopy(self.__class__.root__)
...@@ -359,7 +358,7 @@ class GrammarBase: ...@@ -359,7 +358,7 @@ class GrammarBase:
result.pos = 0 # calculate all positions result.pos = 0 # calculate all positions
return result return result
def log_parsing_history(self): def log_parsing_history(self, log_file_name=''):
"""Writes a log of the parsing history of the most recently parsed """Writes a log of the parsing history of the most recently parsed
document. document.
""" """
...@@ -367,10 +366,10 @@ class GrammarBase: ...@@ -367,10 +366,10 @@ class GrammarBase:
def prepare_line(record): def prepare_line(record):
excerpt = self.document.__getitem__(slice(*record.extent))[:25].replace('\n', '\\n') excerpt = self.document.__getitem__(slice(*record.extent))[:25].replace('\n', '\\n')
excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt
return (record.stack, record.status, excerpt) return record.stack, record.status, excerpt
def write_log(history, log_name): def write_log(history, log_name):
path = os.path.join(LOGS_DIR(), self.log_file_name + log_name + "_parser.log") path = os.path.join(LOGS_DIR(), log_name + "_parser.log")
if history: if history:
with open(path, "w", encoding="utf-8") as f: with open(path, "w", encoding="utf-8") as f:
f.write("\n".join(history)) f.write("\n".join(history))
...@@ -379,6 +378,9 @@ class GrammarBase: ...@@ -379,6 +378,9 @@ class GrammarBase:
if LOGGING: if LOGGING:
assert self.history assert self.history
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], [] full_history, match_history, errors_only = [], [], []
for record in self.history: for record in self.history:
line = "; ".join(prepare_line(record)) line = "; ".join(prepare_line(record))
...@@ -387,9 +389,9 @@ class GrammarBase: ...@@ -387,9 +389,9 @@ class GrammarBase:
match_history.append(line) match_history.append(line)
if record.node.errors: if record.node.errors:
errors_only.append(line) errors_only.append(line)
write_log(full_history, '_full') write_log(full_history, log_file_name + '_full')
write_log(match_history, '_match') write_log(match_history, log_file_name + '_match')
write_log(errors_only, '_errors') write_log(errors_only, log_file_name + '_errors')
...@@ -590,7 +592,7 @@ class Optional(UnaryOperator): ...@@ -590,7 +592,7 @@ class Optional(UnaryOperator):
"Nesting options would be redundant: %s(%s)" % \ "Nesting options would be redundant: %s(%s)" % \
(str(name), str(parser.name)) (str(name), str(parser.name))
assert not isinstance(parser, Required), \ assert not isinstance(parser, Required), \
"Nestion options with required elements is contradictory: " \ "Nesting options with required elements is contradictory: " \
"%s(%s)" % (str(name), str(parser.name)) "%s(%s)" % (str(name), str(parser.name))
def __call__(self, text): def __call__(self, text):
...@@ -899,12 +901,14 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): ...@@ -899,12 +901,14 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
of failure, of failure,
2. A list of error messages, each of which is a tuple 2. A list of error messages, each of which is a tuple
(position: int, error: str) (position: int, error: str)
3. The root-node of the abstract syntax tree 3. The root-node of the abstract syntax treelow
""" """
assert isinstance(compiler, CompilerBase) assert isinstance(compiler, CompilerBase)
syntax_tree = grammar_base.parse(source) syntax_tree = grammar_base.parse(source)
syntax_tree.log(grammar_base.log_file_name, ext='.cst') cname = grammar_base.__class__.__name__
log_file_name = cname[:-7] if cname.endswith('Grammar') else cname
syntax_tree.log(log_file_name, ext='.cst')
grammar_base.log_parsing_history() grammar_base.log_parsing_history()
assert syntax_tree.error_flag or str(syntax_tree) == source, str(syntax_tree) assert syntax_tree.error_flag or str(syntax_tree) == source, str(syntax_tree)
...@@ -914,7 +918,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): ...@@ -914,7 +918,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
result = None result = None
else: else:
ASTTransform(syntax_tree, AST_transformations) ASTTransform(syntax_tree, AST_transformations)
syntax_tree.log(grammar_base.log_file_name, ext='.ast') syntax_tree.log(log_file_name, ext='.ast')
result = compiler.compile__(syntax_tree) result = compiler.compile__(syntax_tree)
errors = syntax_tree.collect_errors() errors = syntax_tree.collect_errors()
messages = error_messages(source, errors) messages = error_messages(source, errors)
......
...@@ -20,12 +20,16 @@ permissions and limitations under the License. ...@@ -20,12 +20,16 @@ permissions and limitations under the License.
""" """
import collections import collections
import itertools
import os import os
from functools import partial from functools import partial
try:
import regex as re
except ImportError:
import re
from typing import NamedTuple from typing import NamedTuple
from logging import LOGGING, LOGS_DIR from logs import LOGGING, LOGS_DIR
__all__ = ['WHITESPACE_KEYWORD', __all__ = ['WHITESPACE_KEYWORD',
...@@ -35,6 +39,7 @@ __all__ = ['WHITESPACE_KEYWORD', ...@@ -35,6 +39,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'Error', 'Error',
'Node', 'Node',
'error_messages', 'error_messages',
'compact_sexpr',
'ASTTransform', 'ASTTransform',
'no_transformation', 'no_transformation',
'replace_by_single_child', 'replace_by_single_child',
...@@ -150,7 +155,7 @@ class Node: ...@@ -150,7 +155,7 @@ class Node:
def __str__(self): def __str__(self):
if self.children: if self.children:
return "".join([str(child) for child in self.result]) return "".join(str(child) for child in self.result)
return str(self.result) return str(self.result)
@property @property
...@@ -236,7 +241,7 @@ class Node: ...@@ -236,7 +241,7 @@ class Node:
return head + '\n'.join([tab + dataF(s) return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None): def as_sexpr(self, src=None, prettyprint=True):
""" """
Returns content as S-expression, i.e. in lisp-like form. Returns content as S-expression, i.e. in lisp-like form.
...@@ -244,6 +249,8 @@ class Node: ...@@ -244,6 +249,8 @@ class Node:
src: The source text or `None`. In case the source text is src: The source text or `None`. In case the source text is
given the position of the element in the text will be given the position of the element in the text will be
reported as line and column. reported as line and column.
prettyprint(bool): True (default), if pretty printing
of leaf nodes shall be applied for better readability.
""" """
def opening(node): def opening(node):
...@@ -261,7 +268,8 @@ class Node: ...@@ -261,7 +268,8 @@ class Node:
else "'%s'" % s if s.find("'") < 0 \ else "'%s'" % s if s.find("'") < 0 \
else '"%s"' % s.replace('"', r'\"') else '"%s"' % s.replace('"', r'\"')
return self._tree_repr(' ', opening, lambda node: ')', pretty) return self._tree_repr(' ', opening, lambda node: ')',
pretty if prettyprint else lambda s: s)
def as_xml(self, src=None): def as_xml(self, src=None):
""" """
...@@ -318,33 +326,50 @@ class Node: ...@@ -318,33 +326,50 @@ class Node:
with open(os.path.join(LOGS_DIR(), st_file_name), "w", encoding="utf-8") as f: with open(os.path.join(LOGS_DIR(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr()) f.write(self.as_sexpr())
def find(self, match_function):
"""Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
given ``match_function`` evaluates to True. The tree is
traversed pre-order.
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
Yields:
Node: all nodes of the tree for which
``match_function(node)`` returns True
"""
if match_function(self):
yield self
else:
for child in self.children:
for nd in child.find(match_function):
yield nd
def navigate(self, path): def navigate(self, path):
"""EXPERIMENTAL! NOT YET TESTED!!! """Yields the results of all descendant elements matched by
Returns the first descendant element matched by `path`, e.g. ``path``, e.g.
'd/s' returns 'l' from (d (s l)(e (r x1) (r x2)) 'd/s' yields 'l' from (d (s l)(e (r x1) (r x2))
'e/r' returns 'x2' 'e/r' yields 'x1', then 'x2'
'e' returns (r x1)(r x2) 'e' yields (r x1)(r x2)
Parameters: Parameters:
path (str): The path of the object, e.g. 'a/b/c' path (str): The path of the object, e.g. 'a/b/c'. The
components of ``path`` can be regular expressions
Returns: Returns:
The object at the path, either a string or a Node or The object at the path, either a string or a Node or
``None``, if the path did not match. ``None``, if the path did not match.
""" """
pl = path.strip('') def nav(node, pl):
assert pl[0] != '/', 'Path must noch start with "/"!' if pl:
nd = self return itertools.chain(nav(child, pl[1:]) for child in node.children
for p in pl: if re.match(pl[0], child.tag_name))
if isinstance(nd.result, str):
return p if (p == nd.result) and (p == pl[-1]) else None
for child in nd.result:
if str(child) == p:
nd = child
break
else: else:
return None return self.result,
return nd return nav(path.split('/'))
def error_messages(text, errors): def error_messages(text, errors):
...@@ -359,7 +384,15 @@ def error_messages(text, errors): ...@@ -359,7 +384,15 @@ def error_messages(text, errors):
for err in sorted(list(errors))) for err in sorted(list(errors)))
# lambda compact_sexpr s : re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip() def compact_sexpr(s):
"""Returns S-expression ``s`` as a one liner without unnecessary
whitespace.
Example:
>>> compact_sexpr("(a\n (b\n c\n )\n)\n")
(a (b c))
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
######################################################################## ########################################################################
......
...@@ -6,17 +6,21 @@ ...@@ -6,17 +6,21 @@
# #
####################################################################### #######################################################################
import re
from PyDSL import ZeroOrMore, Capture, mixin_comment, OneOrMore, \
remove_comments, partial, Lookahead, remove_scanner_tokens, \ from DHParser.syntaxtree import remove_whitespace, no_transformation, replace_by_single_child, \
Lookbehind, flatten, NegativeLookbehind, remove_enclosing_delimiters, \ is_expendable, remove_children_if, TOKEN_KEYWORD, \
NegativeLookahead, remove_whitespace, is_whitespace, reduce_single_child, \ remove_brackets, partial, flatten, \
RE, is_scanner_token, Retrieve, remove_children_if, \ remove_expendables, WHITESPACE_KEYWORD, is_whitespace, \
Sequence, Token, CompilerBase, is_comment, \ remove_tokens, reduce_single_child
remove_expendables, remove_tokens, Alternative, is_expendable, \ from DHParser.parser import mixin_comment, Required, Pop, \
Optional, no_transformation, TOKEN_KEYWORD, RegExp, \ ZeroOrMore, Token, CompilerBase, \
replace_by_single_child, Required, GrammarBase, WHITESPACE_KEYWORD, \ Sequence, Retrieve, Lookahead, \
Forward, Pop GrammarBase, Optional, NegativeLookbehind, \
RegExp, Lookbehind, Capture, \
NegativeLookahead, Alternative, OneOrMore, \
Forward, RE
...@@ -45,9 +49,10 @@ class PopRetrieveGrammar(GrammarBase): ...@@ -45,9 +49,10 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/ delimiter_sign = /`+/
text = /[^`]+/ text = /[^`]+/
""" """
source_hash__ = "50f817c35d08825b20a95664a555d9b0" source_hash__ = "4a1025732f79bf6787d1f753cbec7fc3"
parser_initialization__ = "upon instatiation" parser_initialization__ = "upon instatiation"
wsp__ = mixin_comment(whitespace=r'\s*', comment=r'') COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'\s*', comment=r'')
wspL__ = '' wspL__ = ''
wspR__ = '' wspR__ = ''
text = RE('[^`]+') text = RE('[^`]+')
......
#!/usr/bin/python3 #!/usr/bin/python3
"""compile_MLW.py - simple utility script for compiling MLW.ebnf """compile_PopRetrieve.py - test of Pop and Retrieve operators
Author: Eckhart Arnold <arnold@badw.de> Author: Eckhart Arnold <arnold@badw.de>
...@@ -22,7 +23,7 @@ limitations under the License. ...@@ -22,7 +23,7 @@ limitations under the License.
import os import os
import sys import sys
sys.path.append(os.path.abspath('../../')) sys.path.append(os.path.abspath('../../'))
from ParserCombinators import run_compiler, source_changed from DSLsupport import run_compiler, source_changed
if (not os.path.exists('PopRetrieve_compiler.py') or if (not os.path.exists('PopRetrieve_compiler.py') or
source_changed('PopRetrieve.ebnf', 'PopRetrieve_compiler.py')): source_changed('PopRetrieve.ebnf', 'PopRetrieve_compiler.py')):
...@@ -40,4 +41,4 @@ if errors: ...@@ -40,4 +41,4 @@ if errors:
errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py') errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors: if errors:
print(errors) print(errors)
sys.exit(1) sys.exit(1)
\ No newline at end of file
import os
os.system("nosetests") class SelfTest:
def setup(self):
print("setup")
def teardown(self):
print("teardown")
def test1(self):
print("test1")
def test2(self):
print("test2")
def run_tests(tests, namespace):
""" Runs selected tests.
Args:
tests: Either a string or a list of strings that contains the
names of test or test classes. Each test and, in the case
of a test class, all tests within the test class will be
run.
namespace: The namespace for running the test, usually
``globals()`` should be used.
"""
def instantiate(cls_name):
exec("obj = " + cls_name + "()", namespace)
obj = namespace["obj"]
if "setup" in dir(obj):
obj.setup()
return obj
if isinstance(tests, str):
tests = tests.split(" ")
for test in tests:
if test.find('.') >= 0:
cls_name, method_name = test.split('.')
obj = instantiate(cls_name)
print("Running " + cls_name + "." + method_name)
exec('obj.' + method_name + '()')
else:
obj = instantiate(test)
for name in dir(obj):
if name.lower().startswith("test"):