Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

testing.py 20.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# testing.py - test support for DHParser based grammars and compilers
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
"""
Module ``testing`` contains support for unit-testing domain specific
languages. Tests for arbitrarily small components of the Grammar can
be written into test files with ini-file syntax in order to test
whether the parser matches or fails as expected. It can also be
tested whether it produces an expected concrete or abstract syntax tree.
Usually, however, unexpected failure to match a certain string is the
main cause of trouble when constructing a context free Grammar.
"""
27
28


29
import collections
30
# import configparser
31
import copy
Eckhart Arnold's avatar
Eckhart Arnold committed
32
import fnmatch
di68kap's avatar
di68kap committed
33
import inspect
34
import itertools
35
36
import json
import os
37
import sys
38

39
from DHParser.error import is_error, adjust_error_locations
40
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
41
from DHParser.parse import UnknownParserError
eckhart's avatar
eckhart committed
42
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
43
44
45
from DHParser.toolkit import re, typing

from typing import Tuple
46

47
__all__ = ('unit_from_configfile',
48
49
50
51
52
           'unit_from_json',
           'unit_from_file',
           'get_report',
           'grammar_unit',
           'grammar_suite',
53
           'reset_unit',
54
55
           'runner')

56
57
UNIT_STAGES = {'match*', 'match', 'fail', 'ast', 'cst'}
RESULT_STAGES = {'__cst__', '__ast__', '__err__'}
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# def unit_from_configfile(config_filename):
#     """
#     Reads a grammar unit test from a config file.
#     """
#     cfg = configparser.ConfigParser(interpolation=None)
#     cfg.read(config_filename, encoding="utf8")
#     OD = collections.OrderedDict
#     unit = OD()
#     for section in cfg.sections():
#         symbol, stage = section.split(':')
#         if stage not in UNIT_STAGES:
#             if symbol in UNIT_STAGES:
#                 symbol, stage = stage, symbol
#             else:
#                 raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
#         for testkey, testcode in cfg[section].items():
#             if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
#                 testcode = testcode[3:-3]
#                 # testcode = testcode.replace('\\#', '#')
#                 testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
#             elif testcode[:1] + testcode[-1:] in {"''", '""'}:
#                 testcode = testcode[1:-1]
#             unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
#     # print(json.dumps(unit, sort_keys=True, indent=4))
#     return unit

85
RX_SECTION = re.compile('\s*\[(?P<stage>\w+):(?P<symbol>\w+)\]')
86
RE_VALUE = '(?:"""((?:.|\n)*?)""")|' + "(?:'''((?:.|\n)*?)''')|" + \
87
           '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
88
89
90
91
# the following does not work with pypy3, because pypy's re-engine does not
# support local flags, e.g. '(?s: )'
# RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
#            '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
92
RX_ENTRY = re.compile('\s*(\w+\*?)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
93
94
RX_COMMENT = re.compile('\s*#.*\n')

95

96
def unit_from_configfile(config_filename):
97
98
99
100
101
102
103
104
105
    """ Reads grammar unit tests contained in a file in config file (.ini)
    syntax.

    Args:
        config_filename (str): A config file containing Grammar unit-tests

    Returns:
        A dictionary representing the unit tests.
    """
106
107
108
109
110
111
112
    def eat_comments(txt, pos):
        m = RX_COMMENT.match(txt, pos)
        while m:
            pos = m.span()[1]
            m = RX_COMMENT.match(txt, pos)
        return pos

Eckhart Arnold's avatar
Eckhart Arnold committed
113
    with open(config_filename, 'r', encoding="utf-8") as f:
114
115
116
        cfg = f.read()
        cfg = cfg.replace('\t', '    ')

117
118
    OD = collections.OrderedDict
    unit = OD()
119
120
121
122
123
124

    pos = eat_comments(cfg, 0)
    section_match = RX_SECTION.match(cfg, pos)
    while section_match:
        d = section_match.groupdict()
        stage = d['stage']
125
        if stage not in UNIT_STAGES:
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
            raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
        symbol = d['symbol']
        pos = eat_comments(cfg, section_match.span()[1])

        entry_match = RX_ENTRY.match(cfg, pos)
        if entry_match is None:
            raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
        while entry_match:
            testkey, testcode = [group for group in entry_match.groups() if group is not None]
            lines = testcode.split('\n')
            if len(lines) > 1:
                indent = sys.maxsize
                for line in lines[1:]:
                    indent = min(indent, len(line) - len(line.lstrip()))
                for i in range(1, len(lines)):
                    lines[i] = lines[i][indent:]
                testcode = '\n'.join(lines)
143
            unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
144
145
146
147
148
149
150
151
            pos = eat_comments(cfg, entry_match.span()[1])
            entry_match = RX_ENTRY.match(cfg, pos)

        section_match = RX_SECTION.match(cfg, pos)

    if pos != len(cfg):
        raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))

152
    return unit
153

154

155
def unit_from_json(json_filename):
156
    """
157
    Reads grammar unit tests from a json file.
158
    """
di68kap's avatar
di68kap committed
159
    with open(json_filename, 'r', encoding='utf8') as f:
160
161
162
163
        unit = json.load(f)
    for symbol in unit:
        for stage in unit[symbol]:
            if stage not in UNIT_STAGES:
164
                raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
165
166
    return unit

167
# TODO: add support for yaml, cson, toml
168
169


170
def unit_from_file(filename):
171
172
    """
    Reads a grammar unit test from a file. The format of the file is
173
174
    determined by the ending of its name.
    """
175
    if filename.endswith(".json"):
di68kap's avatar
di68kap committed
176
        test_unit = unit_from_json(filename)
177
    elif filename.endswith(".ini"):
di68kap's avatar
di68kap committed
178
        test_unit = unit_from_configfile(filename)
179
    else:
180
        raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
181

di68kap's avatar
di68kap committed
182
183
184
185
186
    # Check for ambiguous Test names
    errors = []
    for parser_name, tests in test_unit.items():
        m_names = set(tests.get('match', dict()).keys())
        f_names = set(tests.get('fail', dict()).keys())
187
188
        intersection = list(m_names & f_names)
        intersection.sort()
di68kap's avatar
di68kap committed
189
190
191
192
193
194
195
196
197
        if intersection:
            errors.append("Same names %s assigned to match and fail test "
                          "of parser %s." % (str(intersection), parser_name))
    if errors:
        raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
                               + '\n'.join(errors))

    return test_unit

198

199
200
201
202
203
204
205
206
def all_match_tests(tests):
    """Returns all match tests from ``tests``, This includes match tests
    marked with an asterix for CST-output as well as unmarked match-tests.
    """
    return itertools.chain(tests.get('match', dict()).items(),
                           tests.get('match*', dict()).items())


207
def get_report(test_unit):
208
    """
209
210
211
212
    Returns a text-report of the results of a grammar unit test. The report
    lists the source of all tests as well as the error messages, if a test
    failed or the abstract-syntax-tree (AST) in case of success.

213
214
    If an asterix has been appended to the test name then the concrete syntax
    tree will also be added to the report in this particular case.
215
216
217
218

    The purpose of the latter is to help constructing and debugging
    of AST-Transformations. It is better to switch the CST-output on and off
    with the asterix marker when needed than to output the CST for all tests
219
    which would unnecessarily bloat the test reports.
220
    """
221
222
223
224
    def indent(txt):
        lines = txt.split('\n')
        lines[0] = '    ' + lines[0]
        return "\n    ".join(lines)
225
226
227
228
    report = []
    for parser_name, tests in test_unit.items():
        heading = 'Test of parser: "%s"' % parser_name
        report.append('\n\n%s\n%s\n' % (heading, '=' * len(heading)))
229
        for test_name, test_code in tests.get('match', dict()).items():
230
231
232
            heading = 'Match-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
233
            report.append(indent(test_code))
234
235
236
237
238
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
            ast = tests.get('__ast__', {}).get(test_name, None)
239
            cst = tests.get('__cst__', {}).get(test_name, None)
240
            if cst and (not ast or str(test_name).endswith('*')):
241
                report.append('\n### CST')
242
                report.append(indent(cst.as_sxpr()))
243
            if ast:
244
                report.append('\n### AST')
245
                report.append(indent(ast.as_sxpr()))
di68kap's avatar
di68kap committed
246
247
248
249
250
        for test_name, test_code in tests.get('fail', dict()).items():
            heading = 'Fail-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
            report.append(indent(test_code))
251
252
253
254
            messages = tests.get('__msg__', {}).get(test_name, "")
            if messages:
                report.append('\n### Messages:')
                report.append(messages)
di68kap's avatar
di68kap committed
255
256
257
258
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
259
260
261
    return '\n'.join(report)


262
def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, verbose=False):
263
264
    """
    Unit tests for a grammar-parser and ast transformations.
265
    """
266
    if isinstance(test_unit, str):
267
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
268
        test_unit = unit_from_file(test_unit)
269
    else:
270
        unit_name = 'unit_test_' + str(id(test_unit))
271
272
    if verbose:
        print("\nUnit: " + unit_name)
273
274
275
    errata = []
    parser = parser_factory()
    transform = transformer_factory()
276

277
    for parser_name, tests in test_unit.items():
278
        assert parser_name, "Missing parser name in test %s!" % unit_name
279
280
281
282
283
284
        assert not any (test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
285
286
        if verbose:
            print('  Match-Tests for parser "' + parser_name + '"')
287
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
288
289
290
291
292
293
294
295
296
297
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not ast_tests <= match_tests:
                raise AssertionError('AST-Tests %s lack corresponding match-tests!'
                                     % str(ast_tests - match_tests))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not cst_tests <= match_tests:
                raise AssertionError('CST-Tests %s lack corresponding match-tests!'
                                     % str(cst_tests - match_tests))
298
299
300

        # run match tests

301
        for test_name, test_code in tests.get('match', dict()).items():
302
303
304
            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                errflag = len(errata)
305
306
307
308
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
eckhart's avatar
eckhart committed
309
            clean_test_name = str(test_name).replace('*', '')
di68kap's avatar
di68kap committed
310
            log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
311
            tests.setdefault('__cst__', {})[test_name] = cst
312
            if "ast" in tests or report:
313
314
315
                ast = copy.deepcopy(cst)
                transform(ast)
                tests.setdefault('__ast__', {})[test_name] = ast
di68kap's avatar
di68kap committed
316
                log_ST(ast, "match_%s_%s.ast" % (parser_name, clean_test_name))
317
            if is_error(cst.error_flag):
eckhart's avatar
eckhart committed
318
                errors = adjust_error_locations(cst.collect_errors(), test_code)
Eckhart Arnold's avatar
Eckhart Arnold committed
319
                errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.:  %s\n\n\t%s\n\n' %
320
                              (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
321
                               '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
322
                tests.setdefault('__err__', {})[test_name] = errata[-1]
323
                # write parsing-history log only in case of failure!
324
                if is_logging():
di68kap's avatar
di68kap committed
325
                    log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name))
326
            elif "cst" in tests and parse_sxpr(tests["cst"][test_name]) != cst:
eckhart's avatar
eckhart committed
327
328
                errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
                              (test_name, parser_name, cst.as_sxpr()))
329
            elif "ast" in tests:
330
                try:
331
                    compare = parse_sxpr(tests["ast"][test_name])
332
333
                except KeyError:
                    pass
334
335
336
337
                if compare != ast:
                    errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
                                  '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                                  % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
Eckhart Arnold's avatar
Eckhart Arnold committed
338
339
                                     flatten_sxpr(compare.as_sxpr()),
                                     flatten_sxpr(ast.as_sxpr())))
340
                    tests.setdefault('__err__', {})[test_name] = errata[-1]
341
            if verbose:
342
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
343

344
        if verbose and 'fail' in tests:
345
            print('  Fail-Tests for parser "' + parser_name + '"')
346
347
348

        # run fail tests

349
        for test_name, test_code in tests.get('fail', dict()).items():
350
351
352
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                errflag = len(errata)
353
354
355
356
            # cst = parser(test_code, parser_name)
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
eckhart's avatar
eckhart committed
357
358
                node = Node(ZOMBIE_PARSER, "").init_pos(0)
                cst = RootNode().swallow(node).add_error(node, str(upe))
359
            if not is_error(cst.error_flag):
360
361
                errata.append('Fail test "%s" for parser "%s" yields match instead of '
                              'expected failure!' % (test_name, parser_name))
362
                tests.setdefault('__err__', {})[test_name] = errata[-1]
363
                # write parsing-history log only in case of test-failure
364
                if is_logging():
365
                    log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
366
367
368
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.collect_errors())
369
            if verbose:
Eckhart Arnold's avatar
Eckhart Arnold committed
370
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
371

372
373
    # write test-report
    if report:
374
        report_dir = "REPORT"
375
376
        if not os.path.exists(report_dir):
            os.mkdir(report_dir)
di68kap's avatar
di68kap committed
377
        with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
378
            f.write(get_report(test_unit))
379

380
381
382
    return errata


383
def reset_unit(test_unit):
eckhart's avatar
eckhart committed
384
385
386
387
    """
    Resets the tests in ``test_unit`` by removing all results and error
    messages.
    """
388
389
390
391
392
393
394
395
396
    for parser, tests in test_unit.items():
        for key in list(tests.keys()):
            if key not in UNIT_STAGES:
                if key not in RESULT_STAGES:
                    print('Removing unknown component %s from test %s' % (key, parser))
                del tests[key]



Eckhart Arnold's avatar
Eckhart Arnold committed
397
398
399
400
def grammar_suite(directory, parser_factory, transformer_factory,
                  fn_patterns=['*test*'],
                  ignore_unknown_filetypes=False,
                  report=True, verbose=True):
401
402
    """
    Runs all grammar unit tests in a directory. A file is considered a test
403
404
    unit, if it has the word "test" in its name.
    """
405
    if not isinstance(fn_patterns, collections.abc.Iterable):
Eckhart Arnold's avatar
Eckhart Arnold committed
406
        fn_patterns = [fn_patterns]
407
    all_errors = collections.OrderedDict()
408
409
    if verbose:
        print("\nScanning test-directory: " + directory)
410
411
    save_cwd = os.getcwd()
    os.chdir(directory)
eckhart's avatar
eckhart committed
412
413
    if is_logging():
        clear_logs()
414
    for filename in sorted(os.listdir()):
Eckhart Arnold's avatar
Eckhart Arnold committed
415
        if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
416
            try:
417
418
                if verbose:
                    print("\nRunning grammar tests from: " + filename)
419
420
                errata = grammar_unit(filename, parser_factory,
                                      transformer_factory, report, verbose)
421
422
423
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
424
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
425
                    raise e
426
    os.chdir(save_cwd)
eckhart's avatar
eckhart committed
427
428
    error_report = []
    err_N = 0
429
430
    if all_errors:
        for filename in all_errors:
di68kap's avatar
di68kap committed
431
            error_report.append('Errors found by unit test "%s":\n' % filename)
di68kap's avatar
di68kap committed
432
            err_N += len(all_errors[filename])
433
434
435
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
di68kap's avatar
di68kap committed
436
437
438
439
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n'
                % (directory, err_N, 's' if err_N > 1 else '') + '\n'.join(error_report))
eckhart's avatar
eckhart committed
440
441
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
442
443
444
    return ''


445
def runner(test_classes, namespace):
446
447
    """
    Runs all or some selected Python unit tests found in the
eckhart's avatar
eckhart committed
448
    namespace. To run all tests in a module, call
449
    ``runner("", globals())`` from within that module.
450

451
452
453
454
    Unit-Tests are either classes, the name of which starts with
    "Test" and methods, the name of which starts with "test" contained
    in such classes or functions, the name of which starts with "test".

455
456
457
458
459
    Args:
        tests: Either a string or a list of strings that contains the
            names of test or test classes. Each test and, in the case
            of a test class, all tests within the test class will be
            run.
eckhart's avatar
eckhart committed
460
        namespace: The namespace for running the test, usually
461
            ``globals()`` should be used.
eckhart's avatar
eckhart committed
462

463
464
465
466
467
468
469
470
    Example:
        class TestSomething()
            def setup(self):
                pass
            def teardown(self):
                pass
            def test_something(self):
                pass
eckhart's avatar
eckhart committed
471

472
        if __name__ == "__main__":
di68kap's avatar
di68kap committed
473
            from DHParser.testing import runner
eckhart's avatar
eckhart committed
474
            runner("", globals())
475
476
477
478
479
480
481
482
    """
    def instantiate(cls_name):
        exec("obj = " + cls_name + "()", namespace)
        obj = namespace["obj"]
        if "setup" in dir(obj):
            obj.setup()
        return obj

483
484
485
    if test_classes:
        if isinstance(test_classes, str):
            test_classes = test_classes.split(" ")
486
487
    else:
        # collect all test classes, in case no methods or classes have been passed explicitly
488
489
        test_classes = []
        test_functions = []
490
        for name in namespace.keys():
491
492
493
494
495
496
            if name.lower().startswith('test'):
                if inspect.isclass(namespace[name]):
                    test_classes.append(name)
                elif inspect.isfunction(namespace[name]):
                    test_functions.append(name)

497
498

    obj = None
499
    for test in test_classes:
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
        try:
            if test.find('.') >= 0:
                cls_name, method_name = test.split('.')
                obj = instantiate(cls_name)
                print("Running " + cls_name + "." + method_name)
                exec('obj.' + method_name + '()')
            else:
                obj = instantiate(test)
                for name in dir(obj):
                    if name.lower().startswith("test"):
                        print("Running " + test + "." + name)
                        exec('obj.' + name + '()')
        finally:
            if "teardown" in dir(obj):
                obj.teardown()
515
516
517
518

    for test in test_functions:
        exec(test + '()', namespace)