testing.py 19.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# testing.py - test support for DHParser based grammars and compilers
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
"""
Module ``testing`` contains support for unit-testing domain specific
languages. Tests for arbitrarily small components of the Grammar can
be written into test files with ini-file syntax in order to test
whether the parser matches or fails as expected. It can also be
tested whether it produces an expected concrete or abstract syntax tree.
Usually, however, unexpected failure to match a certain string is the
main cause of trouble when constructing a context free Grammar.
"""
27
28


29
import collections
30
# import configparser
31
import copy
Eckhart Arnold's avatar
Eckhart Arnold committed
32
import fnmatch
di68kap's avatar
di68kap committed
33
import inspect
34
import itertools
35
36
import json
import os
37
import sys
38

39
from DHParser.error import is_error, adjust_error_locations
40
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
41
from DHParser.parse import UnknownParserError
42
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
43
44
45
from DHParser.toolkit import re, typing

from typing import Tuple
46

47
__all__ = ('unit_from_configfile',
48
49
50
51
52
53
54
           'unit_from_json',
           'unit_from_file',
           'get_report',
           'grammar_unit',
           'grammar_suite',
           'runner')

55
UNIT_STAGES = {'match*', 'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
56

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# def unit_from_configfile(config_filename):
#     """
#     Reads a grammar unit test from a config file.
#     """
#     cfg = configparser.ConfigParser(interpolation=None)
#     cfg.read(config_filename, encoding="utf8")
#     OD = collections.OrderedDict
#     unit = OD()
#     for section in cfg.sections():
#         symbol, stage = section.split(':')
#         if stage not in UNIT_STAGES:
#             if symbol in UNIT_STAGES:
#                 symbol, stage = stage, symbol
#             else:
#                 raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
#         for testkey, testcode in cfg[section].items():
#             if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
#                 testcode = testcode[3:-3]
#                 # testcode = testcode.replace('\\#', '#')
#                 testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
#             elif testcode[:1] + testcode[-1:] in {"''", '""'}:
#                 testcode = testcode[1:-1]
#             unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
#     # print(json.dumps(unit, sort_keys=True, indent=4))
#     return unit

83
RX_SECTION = re.compile('\s*\[(?P<stage>\w+\*?):(?P<symbol>\w+)\]')
84
RE_VALUE = '(?:"""((?:.|\n)*?)""")|' + "(?:'''((?:.|\n)*?)''')|" + \
85
           '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
86
87
88
89
# the following does not work with pypy3, because pypy's re-engine does not
# support local flags, e.g. '(?s: )'
# RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
#            '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
90
91
92
RX_ENTRY = re.compile('\s*(\w+)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
RX_COMMENT = re.compile('\s*#.*\n')

93

94
def unit_from_configfile(config_filename):
95
96
97
98
99
100
101
102
103
    """ Reads grammar unit tests contained in a file in config file (.ini)
    syntax.

    Args:
        config_filename (str): A config file containing Grammar unit-tests

    Returns:
        A dictionary representing the unit tests.
    """
104
105
106
107
108
109
110
    def eat_comments(txt, pos):
        m = RX_COMMENT.match(txt, pos)
        while m:
            pos = m.span()[1]
            m = RX_COMMENT.match(txt, pos)
        return pos

Eckhart Arnold's avatar
Eckhart Arnold committed
111
    with open(config_filename, 'r', encoding="utf-8") as f:
112
113
114
        cfg = f.read()
        cfg = cfg.replace('\t', '    ')

115
116
    OD = collections.OrderedDict
    unit = OD()
117
118
119
120
121
122

    pos = eat_comments(cfg, 0)
    section_match = RX_SECTION.match(cfg, pos)
    while section_match:
        d = section_match.groupdict()
        stage = d['stage']
123
        if stage not in UNIT_STAGES:
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
        symbol = d['symbol']
        pos = eat_comments(cfg, section_match.span()[1])

        entry_match = RX_ENTRY.match(cfg, pos)
        if entry_match is None:
            raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
        while entry_match:
            testkey, testcode = [group for group in entry_match.groups() if group is not None]
            lines = testcode.split('\n')
            if len(lines) > 1:
                indent = sys.maxsize
                for line in lines[1:]:
                    indent = min(indent, len(line) - len(line.lstrip()))
                for i in range(1, len(lines)):
                    lines[i] = lines[i][indent:]
                testcode = '\n'.join(lines)
141
            unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
142
143
144
145
146
147
148
149
            pos = eat_comments(cfg, entry_match.span()[1])
            entry_match = RX_ENTRY.match(cfg, pos)

        section_match = RX_SECTION.match(cfg, pos)

    if pos != len(cfg):
        raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))

150
    return unit
151

152

153
def unit_from_json(json_filename):
154
    """
155
    Reads grammar unit tests from a json file.
156
    """
di68kap's avatar
di68kap committed
157
    with open(json_filename, 'r', encoding='utf8') as f:
158
159
160
161
        unit = json.load(f)
    for symbol in unit:
        for stage in unit[symbol]:
            if stage not in UNIT_STAGES:
162
                raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
163
164
    return unit

165
# TODO: add support for yaml, cson, toml
166
167


168
def unit_from_file(filename):
169
170
    """
    Reads a grammar unit test from a file. The format of the file is
171
172
    determined by the ending of its name.
    """
173
    if filename.endswith(".json"):
di68kap's avatar
di68kap committed
174
        test_unit = unit_from_json(filename)
175
    elif filename.endswith(".ini"):
di68kap's avatar
di68kap committed
176
        test_unit = unit_from_configfile(filename)
177
    else:
178
        raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
179

di68kap's avatar
di68kap committed
180
181
182
183
184
    # Check for ambiguous Test names
    errors = []
    for parser_name, tests in test_unit.items():
        m_names = set(tests.get('match', dict()).keys())
        f_names = set(tests.get('fail', dict()).keys())
185
186
        intersection = list(m_names & f_names)
        intersection.sort()
di68kap's avatar
di68kap committed
187
188
189
190
191
192
193
194
195
        if intersection:
            errors.append("Same names %s assigned to match and fail test "
                          "of parser %s." % (str(intersection), parser_name))
    if errors:
        raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
                               + '\n'.join(errors))

    return test_unit

196

197
198
199
200
201
202
203
204
def all_match_tests(tests):
    """Returns all match tests from ``tests``, This includes match tests
    marked with an asterix for CST-output as well as unmarked match-tests.
    """
    return itertools.chain(tests.get('match', dict()).items(),
                           tests.get('match*', dict()).items())


205
def get_report(test_unit):
206
    """
207
208
209
210
    Returns a text-report of the results of a grammar unit test. The report
    lists the source of all tests as well as the error messages, if a test
    failed or the abstract-syntax-tree (AST) in case of success.

211
212
    If an asterix has been appended to the test name then the concrete syntax
    tree will also be added to the report in this particular case.
213
214
215
216
217

    The purpose of the latter is to help constructing and debugging
    of AST-Transformations. It is better to switch the CST-output on and off
    with the asterix marker when needed than to output the CST for all tests
    which would unneccesarily bloat the test reports.
218
    """
219
220
221
222
    def indent(txt):
        lines = txt.split('\n')
        lines[0] = '    ' + lines[0]
        return "\n    ".join(lines)
223
224
225
226
    report = []
    for parser_name, tests in test_unit.items():
        heading = 'Test of parser: "%s"' % parser_name
        report.append('\n\n%s\n%s\n' % (heading, '=' * len(heading)))
227
        for test_name, test_code in tests.get('match', dict()).items():
228
229
230
            heading = 'Match-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
231
            report.append(indent(test_code))
232
233
234
235
236
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
            ast = tests.get('__ast__', {}).get(test_name, None)
237
            cst = tests.get('__cst__', {}).get(test_name, None)
238
            if cst and (not ast or str(test_name).endswith('*')):
239
                report.append('\n### CST')
240
                report.append(indent(cst.as_sxpr()))
241
            if ast:
242
                report.append('\n### AST')
243
                report.append(indent(ast.as_sxpr()))
di68kap's avatar
di68kap committed
244
245
246
247
248
249
250
251
252
        for test_name, test_code in tests.get('fail', dict()).items():
            heading = 'Fail-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
            report.append(indent(test_code))
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
253
254
255
    return '\n'.join(report)


256
def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, verbose=False):
257
258
    """
    Unit tests for a grammar-parser and ast transformations.
259
    """
260
    if isinstance(test_unit, str):
261
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
262
        test_unit = unit_from_file(test_unit)
263
264
    else:
        unit_name = str(id(test_unit))
265
266
    if verbose:
        print("\nUnit: " + unit_name)
267
268
269
    errata = []
    parser = parser_factory()
    transform = transformer_factory()
270
    for parser_name, tests in test_unit.items():
271
        assert parser_name, "Missing parser name in test %s!" % unit_name
272
        assert set(tests.keys()).issubset(UNIT_STAGES)
273
274
        if verbose:
            print('  Match-Tests for parser "' + parser_name + '"')
275
276
277
278
279
280
281
282
283
284
285
286
        match_tests = set(tests['match'].keys())
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not ast_tests <= match_tests:
                raise AssertionError('AST-Tests %s lack corresponding match-tests!'
                                     % str(ast_tests - match_tests))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not cst_tests <= match_tests:
                raise AssertionError('CST-Tests %s lack corresponding match-tests!'
                                     % str(cst_tests - match_tests))
        for test_name, test_code in tests.get('match', dict()).items():
287
288
289
            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                errflag = len(errata)
290
291
292
293
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
294
            log_ST(cst, "match_%s_%s.cst" % (parser_name, test_name))
295
            tests.setdefault('__cst__', {})[test_name] = cst
296
            if "ast" in tests or report:
297
298
299
                ast = copy.deepcopy(cst)
                transform(ast)
                tests.setdefault('__ast__', {})[test_name] = ast
300
                log_ST(ast, "match_%s_%s.ast" % (parser_name, test_name))
301
            if is_error(cst.error_flag):
eckhart's avatar
eckhart committed
302
                errors = adjust_error_locations(cst.collect_errors(), test_code)
Eckhart Arnold's avatar
Eckhart Arnold committed
303
                errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.:  %s\n\n\t%s\n\n' %
304
                              (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
305
                               '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
306
                tests.setdefault('__err__', {})[test_name] = errata[-1]
307
                # write parsing-history log only in case of failure!
308
                if is_logging():
309
                    log_parsing_history(parser, "match_%s_%s.log" % (parser_name, test_name))
310
            elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
eckhart's avatar
eckhart committed
311
312
                errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
                              (test_name, parser_name, cst.as_sxpr()))
313
            elif "ast" in tests:
314
315
316
317
                try:
                    compare = mock_syntax_tree(tests["ast"][test_name])
                except KeyError:
                    pass
318
319
320
321
                if compare != ast:
                    errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
                                  '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                                  % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
Eckhart Arnold's avatar
Eckhart Arnold committed
322
323
                                     flatten_sxpr(compare.as_sxpr()),
                                     flatten_sxpr(ast.as_sxpr())))
324
                    tests.setdefault('__err__', {})[test_name] = errata[-1]
325
            if verbose:
326
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
327

328
        if verbose and 'fail' in tests:
329
            print('  Fail-Tests for parser "' + parser_name + '"')
330
        for test_name, test_code in tests.get('fail', dict()).items():
331
332
333
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                errflag = len(errata)
334
335
336
337
338
            # cst = parser(test_code, parser_name)
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
339
            if not is_error(cst.error_flag):
340
341
                errata.append('Fail test "%s" for parser "%s" yields match instead of '
                              'expected failure!' % (test_name, parser_name))
342
                tests.setdefault('__err__', {})[test_name] = errata[-1]
343
                # write parsing-history log only in case of test-failure
344
                if is_logging():
345
                    log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
346
            if verbose:
Eckhart Arnold's avatar
Eckhart Arnold committed
347
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
348

349
350
    # write test-report
    if report:
351
        report_dir = "REPORT"
352
353
        if not os.path.exists(report_dir):
            os.mkdir(report_dir)
di68kap's avatar
di68kap committed
354
        with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
355
            f.write(get_report(test_unit))
356

357
358
359
    return errata


Eckhart Arnold's avatar
Eckhart Arnold committed
360
361
362
363
def grammar_suite(directory, parser_factory, transformer_factory,
                  fn_patterns=['*test*'],
                  ignore_unknown_filetypes=False,
                  report=True, verbose=True):
364
365
    """
    Runs all grammar unit tests in a directory. A file is considered a test
366
367
    unit, if it has the word "test" in its name.
    """
368
    if not isinstance(fn_patterns, collections.abc.Iterable):
Eckhart Arnold's avatar
Eckhart Arnold committed
369
        fn_patterns = [fn_patterns]
370
    all_errors = collections.OrderedDict()
371
372
    if verbose:
        print("\nScanning test-directory: " + directory)
373
374
    save_cwd = os.getcwd()
    os.chdir(directory)
eckhart's avatar
eckhart committed
375
376
    if is_logging():
        clear_logs()
377
    for filename in sorted(os.listdir()):
Eckhart Arnold's avatar
Eckhart Arnold committed
378
        if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
379
            try:
380
381
                if verbose:
                    print("\nRunning grammar tests from: " + filename)
382
383
                errata = grammar_unit(filename, parser_factory,
                                      transformer_factory, report, verbose)
384
385
386
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
387
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
388
                    raise e
389
    os.chdir(save_cwd)
eckhart's avatar
eckhart committed
390
391
    error_report = []
    err_N = 0
392
393
    if all_errors:
        for filename in all_errors:
di68kap's avatar
di68kap committed
394
            error_report.append('Errors found by unit test "%s":\n' % filename)
di68kap's avatar
di68kap committed
395
            err_N += len(all_errors[filename])
396
397
398
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
di68kap's avatar
di68kap committed
399
400
401
402
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n'
                % (directory, err_N, 's' if err_N > 1 else '') + '\n'.join(error_report))
eckhart's avatar
eckhart committed
403
404
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
405
406
407
    return ''


408
def runner(test_classes, namespace):
409
410
    """
    Runs all or some selected Python unit tests found in the
eckhart's avatar
eckhart committed
411
    namespace. To run all tests in a module, call
412
    ``runner("", globals())`` from within that module.
413

414
415
416
417
    Unit-Tests are either classes, the name of which starts with
    "Test" and methods, the name of which starts with "test" contained
    in such classes or functions, the name of which starts with "test".

418
419
420
421
422
    Args:
        tests: Either a string or a list of strings that contains the
            names of test or test classes. Each test and, in the case
            of a test class, all tests within the test class will be
            run.
eckhart's avatar
eckhart committed
423
        namespace: The namespace for running the test, usually
424
            ``globals()`` should be used.
eckhart's avatar
eckhart committed
425

426
427
428
429
430
431
432
433
    Example:
        class TestSomething()
            def setup(self):
                pass
            def teardown(self):
                pass
            def test_something(self):
                pass
eckhart's avatar
eckhart committed
434

435
        if __name__ == "__main__":
di68kap's avatar
di68kap committed
436
            from DHParser.testing import runner
eckhart's avatar
eckhart committed
437
            runner("", globals())
438
439
440
441
442
443
444
445
    """
    def instantiate(cls_name):
        exec("obj = " + cls_name + "()", namespace)
        obj = namespace["obj"]
        if "setup" in dir(obj):
            obj.setup()
        return obj

446
447
448
    if test_classes:
        if isinstance(test_classes, str):
            test_classes = test_classes.split(" ")
449
450
    else:
        # collect all test classes, in case no methods or classes have been passed explicitly
451
452
        test_classes = []
        test_functions = []
453
        for name in namespace.keys():
454
455
456
457
458
459
            if name.lower().startswith('test'):
                if inspect.isclass(namespace[name]):
                    test_classes.append(name)
                elif inspect.isfunction(namespace[name]):
                    test_functions.append(name)

460
461

    obj = None
462
    for test in test_classes:
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
        try:
            if test.find('.') >= 0:
                cls_name, method_name = test.split('.')
                obj = instantiate(cls_name)
                print("Running " + cls_name + "." + method_name)
                exec('obj.' + method_name + '()')
            else:
                obj = instantiate(test)
                for name in dir(obj):
                    if name.lower().startswith("test"):
                        print("Running " + test + "." + name)
                        exec('obj.' + name + '()')
        finally:
            if "teardown" in dir(obj):
                obj.teardown()
478
479
480
481

    for test in test_functions:
        exec(test + '()', namespace)