21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

testing.py 21.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# testing.py - test support for DHParser based grammars and compilers
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
"""
Module ``testing`` contains support for unit-testing domain specific
languages. Tests for arbitrarily small components of the Grammar can
be written into test files with ini-file syntax in order to test
whether the parser matches or fails as expected. It can also be
tested whether it produces an expected concrete or abstract syntax tree.
Usually, however, unexpected failure to match a certain string is the
main cause of trouble when constructing a context free Grammar.
"""
27
28


29
import collections
30
# import configparser
31
import copy
Eckhart Arnold's avatar
Eckhart Arnold committed
32
import fnmatch
di68kap's avatar
di68kap committed
33
import inspect
34
import itertools
35
36
import json
import os
37
import sys
38

39
from DHParser.error import is_error, adjust_error_locations
40
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
41
from DHParser.parse import UnknownParserError
eckhart's avatar
eckhart committed
42
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
43
44
45
from DHParser.toolkit import re, typing

from typing import Tuple
46

47
__all__ = ('unit_from_configfile',
48
49
50
51
52
           'unit_from_json',
           'unit_from_file',
           'get_report',
           'grammar_unit',
           'grammar_suite',
53
           'reset_unit',
54
55
           'runner')

56
57
UNIT_STAGES = {'match*', 'match', 'fail', 'ast', 'cst'}
RESULT_STAGES = {'__cst__', '__ast__', '__err__'}
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# def unit_from_configfile(config_filename):
#     """
#     Reads a grammar unit test from a config file.
#     """
#     cfg = configparser.ConfigParser(interpolation=None)
#     cfg.read(config_filename, encoding="utf8")
#     OD = collections.OrderedDict
#     unit = OD()
#     for section in cfg.sections():
#         symbol, stage = section.split(':')
#         if stage not in UNIT_STAGES:
#             if symbol in UNIT_STAGES:
#                 symbol, stage = stage, symbol
#             else:
#                 raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
#         for testkey, testcode in cfg[section].items():
#             if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
#                 testcode = testcode[3:-3]
#                 # testcode = testcode.replace('\\#', '#')
#                 testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
#             elif testcode[:1] + testcode[-1:] in {"''", '""'}:
#                 testcode = testcode[1:-1]
#             unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
#     # print(json.dumps(unit, sort_keys=True, indent=4))
#     return unit

85
RX_SECTION = re.compile('\s*\[(?P<stage>\w+):(?P<symbol>\w+)\]')
86
RE_VALUE = '(?:"""((?:.|\n)*?)""")|' + "(?:'''((?:.|\n)*?)''')|" + \
87
           '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
88
89
90
91
# the following does not work with pypy3, because pypy's re-engine does not
# support local flags, e.g. '(?s: )'
# RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
#            '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
92
RX_ENTRY = re.compile('\s*(\w+\*?)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
93
94
RX_COMMENT = re.compile('\s*#.*\n')

95

96
def unit_from_configfile(config_filename):
97
98
99
100
101
102
103
104
105
    """ Reads grammar unit tests contained in a file in config file (.ini)
    syntax.

    Args:
        config_filename (str): A config file containing Grammar unit-tests

    Returns:
        A dictionary representing the unit tests.
    """
eckhart's avatar
eckhart committed
106
107
    # TODO: issue a warning if the same match:xxx or fail:xxx block appears more than once

108
109
110
111
112
113
114
    def eat_comments(txt, pos):
        m = RX_COMMENT.match(txt, pos)
        while m:
            pos = m.span()[1]
            m = RX_COMMENT.match(txt, pos)
        return pos

Eckhart Arnold's avatar
Eckhart Arnold committed
115
    with open(config_filename, 'r', encoding="utf-8") as f:
116
117
118
        cfg = f.read()
        cfg = cfg.replace('\t', '    ')

119
120
    OD = collections.OrderedDict
    unit = OD()
121
122
123
124
125
126

    pos = eat_comments(cfg, 0)
    section_match = RX_SECTION.match(cfg, pos)
    while section_match:
        d = section_match.groupdict()
        stage = d['stage']
127
        if stage not in UNIT_STAGES:
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
            raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
        symbol = d['symbol']
        pos = eat_comments(cfg, section_match.span()[1])

        entry_match = RX_ENTRY.match(cfg, pos)
        if entry_match is None:
            raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
        while entry_match:
            testkey, testcode = [group for group in entry_match.groups() if group is not None]
            lines = testcode.split('\n')
            if len(lines) > 1:
                indent = sys.maxsize
                for line in lines[1:]:
                    indent = min(indent, len(line) - len(line.lstrip()))
                for i in range(1, len(lines)):
                    lines[i] = lines[i][indent:]
                testcode = '\n'.join(lines)
145
            unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
146
147
148
149
150
151
152
153
            pos = eat_comments(cfg, entry_match.span()[1])
            entry_match = RX_ENTRY.match(cfg, pos)

        section_match = RX_SECTION.match(cfg, pos)

    if pos != len(cfg):
        raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))

154
    return unit
155

156

157
def unit_from_json(json_filename):
158
    """
159
    Reads grammar unit tests from a json file.
160
    """
di68kap's avatar
di68kap committed
161
    with open(json_filename, 'r', encoding='utf8') as f:
162
163
164
165
        unit = json.load(f)
    for symbol in unit:
        for stage in unit[symbol]:
            if stage not in UNIT_STAGES:
166
                raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
167
168
    return unit

169
# TODO: add support for yaml, cson, toml
170
171


172
def unit_from_file(filename):
173
174
    """
    Reads a grammar unit test from a file. The format of the file is
175
176
    determined by the ending of its name.
    """
177
    if filename.endswith(".json"):
di68kap's avatar
di68kap committed
178
        test_unit = unit_from_json(filename)
179
    elif filename.endswith(".ini"):
di68kap's avatar
di68kap committed
180
        test_unit = unit_from_configfile(filename)
181
    else:
182
        raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
183

di68kap's avatar
di68kap committed
184
185
186
    # Check for ambiguous Test names
    errors = []
    for parser_name, tests in test_unit.items():
di68kap's avatar
di68kap committed
187
188
189
190
191
192
193
194
        # normalize case for test category names
        keys = list(tests.keys())
        for key in keys:
            new_key = key.lower()
            if new_key != key:
                tests[new_key] = tests[keys]
                del tests[keys]

di68kap's avatar
di68kap committed
195
196
        m_names = set(tests.get('match', dict()).keys())
        f_names = set(tests.get('fail', dict()).keys())
197
198
        intersection = list(m_names & f_names)
        intersection.sort()
di68kap's avatar
di68kap committed
199
200
201
202
203
204
205
206
207
        if intersection:
            errors.append("Same names %s assigned to match and fail test "
                          "of parser %s." % (str(intersection), parser_name))
    if errors:
        raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
                               + '\n'.join(errors))

    return test_unit

208

209
210
211
212
213
214
215
216
def all_match_tests(tests):
    """Returns all match tests from ``tests``, This includes match tests
    marked with an asterix for CST-output as well as unmarked match-tests.
    """
    return itertools.chain(tests.get('match', dict()).items(),
                           tests.get('match*', dict()).items())


217
def get_report(test_unit):
218
    """
219
220
221
222
    Returns a text-report of the results of a grammar unit test. The report
    lists the source of all tests as well as the error messages, if a test
    failed or the abstract-syntax-tree (AST) in case of success.

223
224
    If an asterix has been appended to the test name then the concrete syntax
    tree will also be added to the report in this particular case.
225
226
227
228

    The purpose of the latter is to help constructing and debugging
    of AST-Transformations. It is better to switch the CST-output on and off
    with the asterix marker when needed than to output the CST for all tests
229
    which would unnecessarily bloat the test reports.
230
    """
231
232
233
234
    def indent(txt):
        lines = txt.split('\n')
        lines[0] = '    ' + lines[0]
        return "\n    ".join(lines)
235
236
237
238
    report = []
    for parser_name, tests in test_unit.items():
        heading = 'Test of parser: "%s"' % parser_name
        report.append('\n\n%s\n%s\n' % (heading, '=' * len(heading)))
239
        for test_name, test_code in tests.get('match', dict()).items():
240
241
242
            heading = 'Match-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
243
            report.append(indent(test_code))
244
245
246
247
248
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
            ast = tests.get('__ast__', {}).get(test_name, None)
249
            cst = tests.get('__cst__', {}).get(test_name, None)
250
            if cst and (not ast or str(test_name).endswith('*')):
251
                report.append('\n### CST')
252
                report.append(indent(cst.as_sxpr()))
253
            if ast:
254
                report.append('\n### AST')
255
                report.append(indent(ast.as_sxpr()))
di68kap's avatar
di68kap committed
256
257
258
259
260
        for test_name, test_code in tests.get('fail', dict()).items():
            heading = 'Fail-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
            report.append(indent(test_code))
261
262
263
264
            messages = tests.get('__msg__', {}).get(test_name, "")
            if messages:
                report.append('\n### Messages:')
                report.append(messages)
di68kap's avatar
di68kap committed
265
266
267
268
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
269
270
271
    return '\n'.join(report)


272
def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, verbose=False):
273
274
    """
    Unit tests for a grammar-parser and ast transformations.
275
    """
di68kap's avatar
di68kap committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key):
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            raise AssertionError('%s-test %s for parser %s missing !?'
                                 % (category, test_name, parser_name))
        return value

291
    if isinstance(test_unit, str):
292
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
293
        test_unit = unit_from_file(test_unit)
294
    else:
295
        unit_name = 'unit_test_' + str(id(test_unit))
296
297
    if verbose:
        print("\nUnit: " + unit_name)
298
299
300
    errata = []
    parser = parser_factory()
    transform = transformer_factory()
301

302
    for parser_name, tests in test_unit.items():
303
        assert parser_name, "Missing parser name in test %s!" % unit_name
304
305
306
307
308
309
        assert not any (test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
310
311
        if verbose:
            print('  Match-Tests for parser "' + parser_name + '"')
312
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
313
314
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
di68kap's avatar
di68kap committed
315
316
317
            if not {clean_key(k) for k in ast_tests} <= {clean_key(k) for k in match_tests}:
                raise AssertionError('AST-Tests %s for parser %s lack corresponding match-tests!'
                                     % (str(ast_tests - match_tests), parser_name))
318
319
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
di68kap's avatar
di68kap committed
320
            if not {clean_key(k) for k in cst_tests} <= {clean_key(k) for k in match_tests}:
321
322
                raise AssertionError('CST-Tests %s lack corresponding match-tests!'
                                     % str(cst_tests - match_tests))
323
324
325

        # run match tests

326
        for test_name, test_code in tests.get('match', dict()).items():
327
328
329
            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                errflag = len(errata)
330
331
332
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
333
                cst = cst.new_error(Node(ZOMBIE_PARSER, "").init_pos(0), str(upe))
eckhart's avatar
eckhart committed
334
            clean_test_name = str(test_name).replace('*', '')
eckhart's avatar
eckhart committed
335
            # log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
336
            tests.setdefault('__cst__', {})[test_name] = cst
337
            if "ast" in tests or report:
338
339
340
                ast = copy.deepcopy(cst)
                transform(ast)
                tests.setdefault('__ast__', {})[test_name] = ast
eckhart's avatar
eckhart committed
341
                # log_ST(ast, "match_%s_%s.ast" % (parser_name, clean_test_name))
342
            if is_error(cst.error_flag):
eckhart's avatar
eckhart committed
343
                errors = adjust_error_locations(cst.collect_errors(), test_code)
Eckhart Arnold's avatar
Eckhart Arnold committed
344
                errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.:  %s\n\n\t%s\n\n' %
345
                              (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
346
                               '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
di68kap's avatar
di68kap committed
347
                # tests.setdefault('__err__', {})[test_name] = errata[-1]
348
                # write parsing-history log only in case of failure!
349
                if is_logging():
di68kap's avatar
di68kap committed
350
                    log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name))
di68kap's avatar
di68kap committed
351
            elif "cst" in tests and parse_sxpr(get(tests, "cst", test_name)) != cst:
eckhart's avatar
eckhart committed
352
353
                errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
                              (test_name, parser_name, cst.as_sxpr()))
354
            elif "ast" in tests:
di68kap's avatar
di68kap committed
355
                compare = parse_sxpr(get(tests, "ast", test_name))
356
357
358
359
                if compare != ast:
                    errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
                                  '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                                  % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
Eckhart Arnold's avatar
Eckhart Arnold committed
360
361
                                     flatten_sxpr(compare.as_sxpr()),
                                     flatten_sxpr(ast.as_sxpr())))
di68kap's avatar
di68kap committed
362
363
            if errata:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
364
            if verbose:
365
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
366

367
        if verbose and 'fail' in tests:
368
            print('  Fail-Tests for parser "' + parser_name + '"')
369
370
371

        # run fail tests

372
        for test_name, test_code in tests.get('fail', dict()).items():
373
374
375
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                errflag = len(errata)
376
377
378
379
            # cst = parser(test_code, parser_name)
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
eckhart's avatar
eckhart committed
380
                node = Node(ZOMBIE_PARSER, "").init_pos(0)
eckhart's avatar
eckhart committed
381
                cst = RootNode(node).new_error(node, str(upe))
382
            if not is_error(cst.error_flag):
383
384
                errata.append('Fail test "%s" for parser "%s" yields match instead of '
                              'expected failure!' % (test_name, parser_name))
385
                tests.setdefault('__err__', {})[test_name] = errata[-1]
386
                # write parsing-history log only in case of test-failure
387
                if is_logging():
388
                    log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
389
390
391
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.collect_errors())
392
            if verbose:
Eckhart Arnold's avatar
Eckhart Arnold committed
393
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
394

395
396
    # write test-report
    if report:
397
        report_dir = "REPORT"
398
399
        if not os.path.exists(report_dir):
            os.mkdir(report_dir)
di68kap's avatar
di68kap committed
400
        with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
401
            f.write(get_report(test_unit))
402

403
404
405
    return errata


406
def reset_unit(test_unit):
eckhart's avatar
eckhart committed
407
408
409
410
    """
    Resets the tests in ``test_unit`` by removing all results and error
    messages.
    """
411
412
413
414
415
416
417
418
419
    for parser, tests in test_unit.items():
        for key in list(tests.keys()):
            if key not in UNIT_STAGES:
                if key not in RESULT_STAGES:
                    print('Removing unknown component %s from test %s' % (key, parser))
                del tests[key]



Eckhart Arnold's avatar
Eckhart Arnold committed
420
421
422
423
def grammar_suite(directory, parser_factory, transformer_factory,
                  fn_patterns=['*test*'],
                  ignore_unknown_filetypes=False,
                  report=True, verbose=True):
424
425
    """
    Runs all grammar unit tests in a directory. A file is considered a test
426
427
    unit, if it has the word "test" in its name.
    """
428
    if not isinstance(fn_patterns, collections.abc.Iterable):
Eckhart Arnold's avatar
Eckhart Arnold committed
429
        fn_patterns = [fn_patterns]
430
    all_errors = collections.OrderedDict()
431
432
    if verbose:
        print("\nScanning test-directory: " + directory)
433
434
    save_cwd = os.getcwd()
    os.chdir(directory)
eckhart's avatar
eckhart committed
435
436
    if is_logging():
        clear_logs()
437
    for filename in sorted(os.listdir()):
Eckhart Arnold's avatar
Eckhart Arnold committed
438
        if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
439
            try:
440
441
                if verbose:
                    print("\nRunning grammar tests from: " + filename)
442
443
                errata = grammar_unit(filename, parser_factory,
                                      transformer_factory, report, verbose)
444
445
446
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
447
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
448
                    raise e
449
    os.chdir(save_cwd)
eckhart's avatar
eckhart committed
450
451
    error_report = []
    err_N = 0
452
453
    if all_errors:
        for filename in all_errors:
di68kap's avatar
di68kap committed
454
            error_report.append('Errors found by unit test "%s":\n' % filename)
di68kap's avatar
di68kap committed
455
            err_N += len(all_errors[filename])
456
457
458
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
di68kap's avatar
di68kap committed
459
460
461
462
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n'
                % (directory, err_N, 's' if err_N > 1 else '') + '\n'.join(error_report))
eckhart's avatar
eckhart committed
463
464
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
465
466
467
    return ''


468
def runner(test_classes, namespace):
469
470
    """
    Runs all or some selected Python unit tests found in the
eckhart's avatar
eckhart committed
471
    namespace. To run all tests in a module, call
472
    ``runner("", globals())`` from within that module.
473

474
475
476
477
    Unit-Tests are either classes, the name of which starts with
    "Test" and methods, the name of which starts with "test" contained
    in such classes or functions, the name of which starts with "test".

478
479
480
481
482
    Args:
        tests: Either a string or a list of strings that contains the
            names of test or test classes. Each test and, in the case
            of a test class, all tests within the test class will be
            run.
eckhart's avatar
eckhart committed
483
        namespace: The namespace for running the test, usually
484
            ``globals()`` should be used.
eckhart's avatar
eckhart committed
485

486
487
488
489
490
491
492
493
    Example:
        class TestSomething()
            def setup(self):
                pass
            def teardown(self):
                pass
            def test_something(self):
                pass
eckhart's avatar
eckhart committed
494

495
        if __name__ == "__main__":
di68kap's avatar
di68kap committed
496
            from DHParser.testing import runner
eckhart's avatar
eckhart committed
497
            runner("", globals())
498
499
500
501
502
503
504
505
    """
    def instantiate(cls_name):
        exec("obj = " + cls_name + "()", namespace)
        obj = namespace["obj"]
        if "setup" in dir(obj):
            obj.setup()
        return obj

506
507
508
    if test_classes:
        if isinstance(test_classes, str):
            test_classes = test_classes.split(" ")
509
510
    else:
        # collect all test classes, in case no methods or classes have been passed explicitly
511
512
        test_classes = []
        test_functions = []
513
        for name in namespace.keys():
514
515
516
517
518
519
            if name.lower().startswith('test'):
                if inspect.isclass(namespace[name]):
                    test_classes.append(name)
                elif inspect.isfunction(namespace[name]):
                    test_functions.append(name)

520
521

    obj = None
522
    for test in test_classes:
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
        try:
            if test.find('.') >= 0:
                cls_name, method_name = test.split('.')
                obj = instantiate(cls_name)
                print("Running " + cls_name + "." + method_name)
                exec('obj.' + method_name + '()')
            else:
                obj = instantiate(test)
                for name in dir(obj):
                    if name.lower().startswith("test"):
                        print("Running " + test + "." + name)
                        exec('obj.' + name + '()')
        finally:
            if "teardown" in dir(obj):
                obj.teardown()
538
539
540
541

    for test in test_functions:
        exec(test + '()', namespace)