Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

Commit ab1f1788 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- LaTeX more tests, Bug encountered (see TODO in class parsers.Grammar!)

parent d1967501
......@@ -326,8 +326,8 @@ class EBNFCompiler(Compiler):
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'filter': dict()} # alt. 'retrieve_filter'
'tokens': set(), # alt. 'scanner_tokens'
'filter': dict()} # alt. 'filter'
@property
def result(self) -> str:
......@@ -416,7 +416,7 @@ class EBNFCompiler(Compiler):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for retrieve_filter filters of pop or retrieve operators
# add default functions for filter filters of pop or retrieve operators
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
......@@ -586,7 +586,7 @@ class EBNFCompiler(Compiler):
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['filter']:
custom_args = ['retrieve_filter=%s' % self.directives['filter'][str(arg)]]
custom_args = ['filter=%s' % self.directives['filter'][str(arg)]]
self.variables.add(str(arg)) # cast(str, arg.result)
elif len(node.children) > 2:
......
......@@ -53,12 +53,11 @@ import abc
import copy
import os
from functools import partial
try:
import regex as re
except ImportError:
import re
from typing import Any, Callable, Collection, Dict, Iterator, List, Set, Tuple, Union
from typing import Any, Callable, Dict, Iterator, List, Set, Tuple, Union
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, ParserBase, \
......@@ -342,7 +341,7 @@ class Grammar:
self.root__.apply(self._add_parser)
def __getitem__(self, key):
return self.__dict__[key]
return getattr(self, key)
def _reset(self):
# variables stored and recalled by Capture and Retrieve parsers
......@@ -357,6 +356,7 @@ class Grammar:
# also needed for call stack tracing
self.moving_forward = True
# TODO: Either make sure not to miss out unconnected parsers or raise an error! Actually, the EBNF-Compiler should keep track of this!
def _add_parser(self, parser: Parser) -> None:
"""Adds the copy of the classes parser object to this
particular instance of Grammar.
......@@ -389,7 +389,8 @@ class Grammar:
self.history_tracking = is_logging()
self.document = document
parser = self[start_parser] if isinstance(start_parser, str) else start_parser
assert parser.grammar == self, "Cannot run parsers from a differen grammar object!"
assert parser.grammar == self, "Cannot run parsers from a different grammar object!" \
" %s vs. %s" % (str(self), str(parser.grammar))
stitches = [] # type: List[Node]
rest = document
if not rest:
......@@ -721,7 +722,7 @@ class NaryOperator(Parser):
def __init__(self, *parsers: Parser, name: str = '') -> None:
super(NaryOperator, self).__init__(name)
# assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers # type: Collection ## [Parser]
self.parsers = parsers # type: Container ## [Parser]
def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo)
......@@ -981,37 +982,37 @@ class Capture(UnaryOperator):
return None, text
def nop_filter(stack):
RetrieveFilter = Callable[[List[str]], str]
def nop_filter(stack: List[str]) -> str:
return stack[-1]
def counterpart_filter(stack):
def counterpart_filter(stack: List[str]) -> str:
value = stack[-1]
return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<")
def accumulating_filter(stack):
def accumulating_filter(stack: List[str]) -> str:
return "".join(stack)
RetrFilter = Callable[[List[str]], str]
class Retrieve(Parser):
def __init__(self, symbol: Parser, retrieve_filter: RetrFilter = None, name: str = '') -> None:
def __init__(self, symbol: Parser, filter: RetrieveFilter = None, name: str = '') -> None:
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol
self.retrieve_filter = retrieve_filter if retrieve_filter else nop_filter
self.filter = filter if filter else nop_filter
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.retrieve_filter, self.name)
return self.__class__(self.symbol, self.filter, self.name)
def __call__(self, text: str) -> Tuple[Node, str]:
try:
stack = self.grammar.variables[self.symbol.name]
value = self.retrieve_filter(stack)
value = self.filter(stack)
self.pick_value(stack)
except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self,
......
......@@ -27,7 +27,7 @@ except ImportError:
import re
from DHParser import Node, error_messages
from DHParser.toolkit import compact_sexpr, is_logging, log_dir
from DHParser.toolkit import compact_sexpr, is_logging
from DHParser.syntaxtree import MockParser
from DHParser.ebnf import grammar_changed
from DHParser.dsl import compile_on_disk
......@@ -154,10 +154,10 @@ def unit_from_configfile(config_filename):
return unit
def unit_from_json(config_filename):
def unit_from_json(json_filename):
"""Reads a grammar unit test from a json file.
"""
with open(config_filename, 'r') as f:
with open(json_filename, 'r') as f:
unit = json.load(f)
for symbol in unit:
for stage in unit[symbol]:
......@@ -168,17 +168,16 @@ def unit_from_json(config_filename):
# TODO: add support for yaml, cson, toml
def unit_from_file(config_filename):
def unit_from_file(filename):
"""Reads a grammar unit test from a file. The format of the file is
determined by the ending of its name.
"""
fname = config_filename
if fname.endswith(".json"):
return unit_from_json(fname)
elif fname.endswith(".ini"):
return unit_from_configfile(fname)
if filename.endswith(".json"):
return unit_from_json(filename)
elif filename.endswith(".ini"):
return unit_from_configfile(filename)
else:
raise ValueError("Unknown unit test file type: " + fname[fname.rfind('.'):])
raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
def report(test_unit):
......@@ -208,21 +207,27 @@ def report(test_unit):
return '\n'.join(report)
def grammar_unit(test_unit, parser_factory, transformer_factory):
def grammar_unit(test_unit, parser_factory, transformer_factory, verbose=False):
"""Unit tests for a grammar-parser and ast transformations.
"""
if isinstance(test_unit, str):
unit_name = os.path.basename(os.path.splitext(test_unit)[0])
unit_dir, unit_name = os.path.split(os.path.splitext(test_unit)[0])
test_unit = unit_from_file(test_unit)
else:
unit_name = str(id(test_unit))
if verbose:
print("\nUnit: " + unit_name)
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_unit.items():
assert set(tests.keys()).issubset(UNIT_STAGES)
if verbose:
print(' Match-Tests for parser "' + parser_name + '"')
for test_name, test_code in tests.get('match', dict()).items():
if verbose:
infostr = ' match-test "' + test_name + '" ... '
errflag = len(errata)
cst = parser(test_code, parser_name)
tests.setdefault('__cst__', {})[test_name] = cst
if "ast" in tests or is_logging():
......@@ -246,32 +251,48 @@ def grammar_unit(test_unit, parser_factory, transformer_factory):
compact_sexpr(compare.as_sexpr()),
compact_sexpr(ast.as_sexpr())))
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose:
print(infostr + "OK" if len(errata) == errflag else "FAIL")
if verbose:
print(' Fail-Tests for parser "' + parser_name + '"')
for test_name, test_code in tests.get('fail', dict()).items():
if verbose:
infostr = ' fail-test "' + test_name + '" ... '
errflag = len(errata)
cst = parser(test_code, parser_name)
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose:
print(infostr + "OK" if len(errata) == errflag else "FAIL")
if is_logging():
with open(os.path.join(log_dir(), unit_name + '.report'), 'w') as f:
report_dir = os.path.join(unit_dir, "REPORT")
if not os.path.exists(report_dir):
os.mkdir(report_dir)
with open(os.path.join(report_dir, unit_name + '.report'), 'w') as f:
f.write(report(test_unit))
return errata
def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown_filetypes=False):
def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown_filetypes=False,
verbose=False):
"""Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name.
"""
all_errors = collections.OrderedDict()
if verbose:
print("\nScanning test-directory: " + directory)
for filename in sorted(os.listdir(directory)):
if filename.lower().find("test") >= 0:
try:
print("Running grammar tests in: " + filename)
if verbose:
print("\nRunning grammar tests from: " + filename)
errata = grammar_unit(os.path.join(directory, filename),
parser_factory, transformer_factory)
parser_factory, transformer_factory, verbose)
if errata:
all_errors[filename] = errata
except ValueError as e:
......@@ -284,7 +305,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown
for error in all_errors[filename]:
error_report.append('\t' + '\n\t'.join(error.split('\n')))
if error_report:
return ('Test suite "%s" revealed some errors:\n' %directory) + '\n'.join(error_report)
return ('Test suite "%s" revealed some errors:\n' % directory) + '\n'.join(error_report)
return ''
......
......@@ -29,7 +29,7 @@ word_sequence = { TEXTCHUNK WSPC }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\begin{figure}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\\w+/~
NAME = /\w+/~
......
......@@ -3,5 +3,16 @@
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
[fail:paragraph]
1 : \begin{enumerate}
2 : \item
3 : und Vieh; \paragraph
[match:sequence]
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
\ No newline at end of file
#!/usr/bin/python3
"""test_grammar.py - runs the unit tests for the LaTeX grammar
"""tst_grammar.py - runs the unit tests for the LaTeX grammar
Author: Eckhart Arnold <arnold@badw.de>
......@@ -26,8 +26,8 @@ from DHParser import testing
from DHParser import toolkit
from LaTeXCompiler import get_grammar, get_transformer
with toolkit.logging():
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer)
with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer, verbose=True)
assert not error_report, error_report
......@@ -120,25 +120,34 @@ class TestRegex:
class TestGrammar:
def test_pos_values_initialized(self):
# checks whether pos values in the parsing result and in the
# history record have been initialized
def setup(self):
grammar = r"""@whitespace = horizontal
haupt = textzeile LEERZEILE
textzeile = { WORT }+
WORT = /[^ \t]+/~
LEERZEILE = /\n[ \t]*(?=\n)/~
"""
result, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
assert result
self.pyparser, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
assert self.pyparser
assert not messages
def test_pos_values_initialized(self):
# checks whether pos values in the parsing result and in the
# history record have been initialized
with logging("LOGS"):
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
result = parser("no_file_name*")
parser = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
parser("no_file_name*")
for record in parser.history:
assert not record.node or record.node.pos >= 0
def test_select_parsing(self):
parser = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
parser("wort", "WORT")
parser("eine Zeile", "textzeile")
parser("kein Haupt", "haupt")
parser("so ist es richtig", "haupt")
if __name__ == "__main__":
from DHParser.testing import runner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment