In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit adc76f68 authored by di68kap's avatar di68kap

- Support functions for grammar- and ast-testing moved into a spearate module "testing"

- example MLW extended by adding rudimentary testcases
parent 74b39df2
......@@ -24,8 +24,9 @@ from .syntaxtree import *
from .parsers import *
from .ebnf import *
from .dsl import *
# from .testing import *
from .versionnumber import __version__
__author__ = "Eckhart Arnold <arnold@badw.de>"
__copyright__ = "http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'syntaxtree', 'parsers', 'ebnf', 'dsl'] # flat namespace
# __all__ = ['toolkit', 'syntaxtree', 'parsers', 'ebnf', 'dsl', 'testing', 'versionnumber'] # flat namespace
......@@ -369,7 +369,7 @@ class EBNFCompiler(CompilerBase):
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler)
def assemble_parser(self, definitions):
def assemble_parser(self, definitions, root_node):
# fix capture of variables that have been defined before usage [sic!]
if self.variables:
......@@ -427,9 +427,11 @@ class EBNFCompiler(CompilerBase):
declarations += [symbol + '.set(' + statement + ')']
else:
declarations += [symbol + ' = ' + statement]
known_symbols = self.rules | self.RESERVED_SYMBOLS
for nd in self.symbol_nodes:
if nd.result not in self.rules:
if nd.result not in known_symbols:
nd.add_error("Missing production for symbol '%s'" % nd.result)
root_node.error_flag = True
if self.root and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root)
declarations.append('')
......@@ -452,8 +454,9 @@ class EBNFCompiler(CompilerBase):
else:
assert nd.parser.name == "directive", nd.as_sexpr()
self._compile(nd)
node.error_flag |= nd.error_flag
return self.assemble_parser(definitions)
return self.assemble_parser(definitions, node)
def on_definition(self, node):
rule = node.result[0].result
......
......@@ -56,10 +56,8 @@ try:
except ImportError:
import re
from .toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
compact_sexpr
from .syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node, \
mock_syntax_tree
from .toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
from .syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node
from DHParser.toolkit import load_if_file, error_messages
__all__ = ['HistoryRecord',
......@@ -1087,43 +1085,8 @@ def compile_source(source, scanner, parser, transformer, compiler):
errors = syntax_tree.collect_errors()
if not errors:
result = compiler(syntax_tree)
errors = syntax_tree.collect_errors()
errors = syntax_tree.collect_errors() if syntax_tree.error_flag else []
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
def test_grammar(test_suite, parser_factory, transformer_factory):
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_suite.items():
assert set(tests.keys()).issubset({'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'})
for test_name, test_code in tests['match'].items():
cst = parser(test_code, parser_name)
tests.setdefault('__cst__', {})[test_name] = cst
if cst.error_flag:
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\t%s' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(error_messages(test_code, cst.collect_errors()))))
elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sexpr()))
elif "ast" in tests:
ast = copy.deepcopy(cst)
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
compare = mock_syntax_tree(tests["ast"][test_name])
if compare != ast:
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
compact_sexpr(compare.as_sexpr()),
compact_sexpr(ast.as_sexpr())))
for test_name, test_code in tests['fail'].items():
cst = parser(test_code, parser_name)
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
return errata
......@@ -27,7 +27,7 @@ except ImportError:
import re
from typing import NamedTuple
from .toolkit import is_logging, log_dir, expand_table, line_col, smart_list
from .toolkit import log_dir, expand_table, line_col, smart_list
__all__ = ['WHITESPACE_PTYPE',
......@@ -35,7 +35,6 @@ __all__ = ['WHITESPACE_PTYPE',
'ZOMBIE_PARSER',
'Error',
'Node',
'mock_syntax_tree',
'key_parser_name',
'key_tag_name',
'traverse',
......@@ -339,21 +338,28 @@ class Node:
self.error_flag = True
return self
def propagate_error_flags(self):
""" Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag |= child.error_flag
def collect_errors(self, clear_errors=False):
"""
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
errors = []
if self.error_flag:
errors = self.errors
if clear_errors:
self._errors = []
self.error_flag = False
if self.children:
for child in self.result:
errors.extend(child.collect_errors(clear_errors))
errors = self.errors
if clear_errors:
self._errors = []
self.error_flag = False
if self.children:
for child in self.result:
errors.extend(child.collect_errors(clear_errors))
return errors
def log(self, log_file_name):
......@@ -423,60 +429,6 @@ class Node:
return nav(path.split('/'))
def mock_syntax_tree(sexpr):
"""Generates a tree of nodes from an S-expression.
Example:
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
"c"
)
)
"""
def next_block(s):
s = s.strip()
while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s
level = 1;
i = 1
while level > 0:
if s[i] == '(':
level += 1
elif s[i] == ')':
level -= 1
i += 1
yield s[:i]
s = s[i:].strip()
sexpr = sexpr.strip()
if sexpr[0] != '(': raise ValueError('"(" expected, not ' + sexpr[:10])
# assert sexpr[0] == '(', sexpr
sexpr = sexpr[1:].strip()
m = re.match('[\w:]+', sexpr)
name, class_name = (sexpr[:m.end()].split(':') + [''])[:2]
sexpr = sexpr[m.end():].strip()
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result)
########################################################################
#
# syntax tree transformation functions
......
"""testing.py - test support for DHParser based grammars and compilers
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import copy
import regex as re
from DHParser import Node, error_messages
from DHParser.syntaxtree import MockParser
from DHParser.toolkit import compact_sexpr
def mock_syntax_tree(sexpr):
"""Generates a tree of nodes from an S-expression.
Example:
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
"c"
)
)
"""
def next_block(s):
s = s.strip()
while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s
level = 1;
i = 1
while level > 0:
if s[i] == '(':
level += 1
elif s[i] == ')':
level -= 1
i += 1
yield s[:i]
s = s[i:].strip()
sexpr = sexpr.strip()
if sexpr[0] != '(': raise ValueError('"(" expected, not ' + sexpr[:10])
# assert sexpr[0] == '(', sexpr
sexpr = sexpr[1:].strip()
m = re.match('[\w:]+', sexpr)
name, class_name = (sexpr[:m.end()].split(':') + [''])[:2]
sexpr = sexpr[m.end():].strip()
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result)
def test_grammar(test_suite, parser_factory, transformer_factory):
"""Unit tests for a grammar-parser and ast transformations.
"""
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_suite.items():
assert set(tests.keys()).issubset({'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'})
for test_name, test_code in tests.get('match', dict()).items():
cst = parser(test_code, parser_name)
tests.setdefault('__cst__', {})[test_name] = cst
if cst.error_flag:
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\t%s' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(error_messages(test_code, cst.collect_errors()))))
elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sexpr()))
elif "ast" in tests:
ast = copy.deepcopy(cst)
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
compare = mock_syntax_tree(tests["ast"][test_name])
if compare != ast:
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
compact_sexpr(compare.as_sexpr()),
compact_sexpr(ast.as_sexpr())))
for test_name, test_code in tests.get('fail', dict()).items():
cst = parser(test_code, parser_name)
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
return errata
def runner(tests, namespace):
""" Runs all or some selected tests from a test suite. To run all
tests in a module, call ``runner("", globals())`` from within
that module.
Args:
tests: Either a string or a list of strings that contains the
names of test or test classes. Each test and, in the case
of a test class, all tests within the test class will be
run.
namespace: The namespace for running the test, usually
``globals()`` should be used.
Example:
class TestSomething()
def setup(self):
pass
def teardown(self):
pass
def test_something(self):
pass
if __name__ == "__main__":
from run import runner
runner("", globals())
"""
def instantiate(cls_name):
exec("obj = " + cls_name + "()", namespace)
obj = namespace["obj"]
if "setup" in dir(obj):
obj.setup()
return obj
if tests:
if isinstance(tests, str):
tests = tests.split(" ")
else:
# collect all test classes, in case no methods or classes have been passed explicitly
tests = []
for name in namespace.keys():
if name.lower().startswith('test') and inspect.isclass(namespace[name]):
tests.append(name)
obj = None
for test in tests:
try:
if test.find('.') >= 0:
cls_name, method_name = test.split('.')
obj = instantiate(cls_name)
print("Running " + cls_name + "." + method_name)
exec('obj.' + method_name + '()')
else:
obj = instantiate(test)
for name in dir(obj):
if name.lower().startswith("test"):
print("Running " + test + "." + name)
exec('obj.' + name + '()')
finally:
if "teardown" in dir(obj):
obj.teardown()
# EBNF-Syntax für MLW-Artikel
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = both # Leerraum vor und nach Literalen wird automatisch entfernt
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = right # Leerraum vor und nach Literalen wird automatisch entfernt
##############################################################################
Artikel = [LEER]
§LemmaPosition [ArtikelKopf]
§BedeutungsPosition
§Autorinfo
[LEER] DATEI_ENDE
Artikel = [LZ]
§LemmaPosition
[ArtikelKopf]
§BedeutungsPosition
§Autorinfo
[LZ] DATEI_ENDE
#### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" §Lemma [LemmaVarianten] §GrammatikPosition
LemmaPosition = "LEMMA" [LZ] §HauptLemma [LemmaVarianten] §GrammatikPosition
Lemma = [klassisch] [gesichert] WORT_KLEIN [LEER]
klassisch = "*"
gesichert = "$"
HauptLemma = [klassisch] [gesichert] lemma
klassisch = "*"
gesichert = "$"
LemmaVarianten = "VARIANTEN" [LEER]
§LVariante { TRENNER LVariante }
[TRENNER LVZusatz] [TRENNER]
LemmaVarianten = { (LZ|TR) lemma }+
[ (LZ|TR) LemmaZusatz] [LZ]
LVariante = ~/(?:[a-z]|-)+/~ # Buchstabenfolge mit Trennzeichen "-"
LVZusatz = "ZUSATZ" zs_typ
zs_typ = "sim."
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LemmaZusatz = "ZUSATZ" lzs_typ
lzs_typ = "sim."
#### GRAMMATIK-POSITION ######################################################
## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LEER] §wortart §TRENNER §Flexion [genus]
{GrammatikVariante} [TRENNER]
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §TR §Flexion [genus]
{GrammatikVariante} [TR]
wortart = "nomen" | "n." |
"verb" | "v." |
"adverb" | "adv." |
"adjektiv" | "adj."
GrammatikVariante = TRENNER GVariante
GrammatikVariante = TR GVariante
GVariante = Flexionen [genus] ":" Beleg
Flexion = Flexion { "," §Flexion }
Flexionen = Flexion { "," §Flexion }
Flexion = /-?[a-z]+/~
genus = "maskulinum" | "m." |
......@@ -57,8 +57,8 @@ genus = "maskulinum" | "m." |
#### ARTIKEL-KOPF ############################################################
ArtikelKopf = SchreibweisenPosition
SchreibweisenPosition = "SCHREIBWEISE" [LEER] §SWTyp ":" [LEER]
§SWVariante { TRENNER SWVariante} [LEER]
SchreibweisenPosition = "SCHREIBWEISE" [LZ] §SWTyp ":" [LZ]
§SWVariante { TR SWVariante} [LZ]
SWTyp = "script." | "script. fat-"
SWVariante = Schreibweise ":" Beleg
Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
......@@ -70,40 +70,42 @@ VerweisZiel = ~/<\w+>/~
#### BEDEUTUNGS-POSITION #####################################################
BedeutungsPosition = { "BEDEUTUNG" [LEER] §Bedeutung }+
BedeutungsPosition = { "BEDEUTUNG" [LZ] §Bedeutung }+
Bedeutung = (Interpretamente | Bedeutungskategorie) [Belege]
Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ [LEER]
Interpretamente = LateinischeBedeutung [LEER] §DeutscheBedeutung [LEER]
Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ [LZ]
Interpretamente = LateinischeBedeutung [LZ] §DeutscheBedeutung [LZ]
LateinischeBedeutung = "LAT" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
DeutscheBedeutung = "DEU" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
Belege = "BELEGE" [LEER] { "*" EinBeleg }
EinBeleg = { !([LEER] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
Belege = "BELEGE" [LZ] { "*" EinBeleg }
EinBeleg = { !([LZ] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
/\s*.*\s*/ }+
[Zusatz]
Zusatz = "ZUSATZ" /\s*.*/ TRENNER
Zusatz = "ZUSATZ" /\s*.*/ TR
#### AUTOR/AUTORIN ###########################################################
Autorinfo = ("AUTORIN" | "AUTOR") Name
Name = WORT { WORT | NAMENS_ABKÜRZUNG }
Name = { NAME | NAMENS_ABKÜRZUNG }+
#### ATOMARE AUSDRÜCKE #######################################################
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀ]\./
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
WORT_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
WORT_KLEIN = /[a-zäöüß]+/~
DEU_WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
DEU_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
DEU_KLEIN = /[a-zäöüß]+/~
LAT_WORT = /[a-z]+/~
LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
TRENNER = /\s*;\s*/ | { ZSPRUNG }+
ZSPRUNG = /\n/~
TR = /\s*;\s*/ | { NEUE_ZEILE }+ # Trenner
NEUE_ZEILE = /\n/~
LEER = /\s+/ # horizontaler und(!) vertikaler Leerraum
LZ = /\s+/ # Leerzeichen oder -zeilen
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
This diff is collapsed.
#!/usr/bin/python3
"""recompile_grammar.py - recompiles any .ebnf files in the current
directory if necessary
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
from DHParser.ebnf import grammar_changed
from DHParser.dsl import compile_on_disk
def compile(name):
base, ext = os.path.splitext(name)
compiler_name = base + '_compiler.py'
if (not os.path.exists(compiler_name) or
grammar_changed(compiler_name, name)):
print("recompiling parser for: " + name)
errors = compile_on_disk(name)
if errors:
print("Errors while compiling: " + name + '!')
with open(base + '_errors.txt', 'w') as f:
for e in errors:
f.write(e)
f.write('\n')
for entry in os.listdir():
if entry.lower().endswith('.ebnf') and os.path.isfile(entry):
compile(entry)
#!/usr/bin/python3
"""compile_MLW.py - simple utility script for compiling MLW.ebnf
"""compile_MLW-entry.py - simple utility script for compiling a sample
MLW entry
Author: Eckhart Arnold <arnold@badw.de>
......
LEMMA *facitergula
LEMMA facitergula
VARIANTEN
fasc-itergula
......
#!/usr/bin/python3
"""test_MLW_grammar.py - test code for the MLW grammar
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import DHParser.testing
from DHParser import parsers
# from DHParser.dsl import load_compiler_suite
from MLW_compiler import get_MLW_grammar, get_MLW_transformer
MLW_TEST_CASES_LEMMA_POSITION = {
"lemma": {
"match": {
1: "facitergula",
2: "facitergul|a",
3: "fasc|itergula"
},
"fail": {
9: "duo vocabula"
}
},
"HauptLemma" : {