testing.py 18.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# testing.py - test support for DHParser based grammars and compilers
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
"""
Module ``testing`` contains support for unit-testing domain specific
languages. Tests for arbitrarily small components of the Grammar can
be written into test files with ini-file syntax in order to test
whether the parser matches or fails as expected. It can also be
tested whether it produces an expected concrete or abstract syntax tree.
Usually, however, unexpected failure to match a certain string is the
main cause of trouble when constructing a context free Grammar.
"""
27
28


29
import collections
30
# import configparser
31
import copy
Eckhart Arnold's avatar
Eckhart Arnold committed
32
import fnmatch
di68kap's avatar
di68kap committed
33
import inspect
34
import itertools
35
36
import json
import os
37
import sys
38

39
from DHParser.error import is_error, adjust_error_locations
40
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
41
from DHParser.parse import UnknownParserError
42
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
43
44
45
from DHParser.toolkit import re, typing

from typing import Tuple
46

47
__all__ = ('unit_from_configfile',
48
49
50
51
52
53
54
           'unit_from_json',
           'unit_from_file',
           'get_report',
           'grammar_unit',
           'grammar_suite',
           'runner')

55
UNIT_STAGES = {'match*', 'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
56

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# def unit_from_configfile(config_filename):
#     """
#     Reads a grammar unit test from a config file.
#     """
#     cfg = configparser.ConfigParser(interpolation=None)
#     cfg.read(config_filename, encoding="utf8")
#     OD = collections.OrderedDict
#     unit = OD()
#     for section in cfg.sections():
#         symbol, stage = section.split(':')
#         if stage not in UNIT_STAGES:
#             if symbol in UNIT_STAGES:
#                 symbol, stage = stage, symbol
#             else:
#                 raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
#         for testkey, testcode in cfg[section].items():
#             if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
#                 testcode = testcode[3:-3]
#                 # testcode = testcode.replace('\\#', '#')
#                 testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
#             elif testcode[:1] + testcode[-1:] in {"''", '""'}:
#                 testcode = testcode[1:-1]
#             unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
#     # print(json.dumps(unit, sort_keys=True, indent=4))
#     return unit

83
RX_SECTION = re.compile('\s*\[(?P<stage>\w+\*?):(?P<symbol>\w+)\]')
84
RE_VALUE = '(?:"""((?:.|\n)*?)""")|' + "(?:'''((?:.|\n)*?)''')|" + \
85
           '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
86
87
88
89
# the following does not work with pypy3, because pypy's re-engine does not
# support local flags, e.g. '(?s: )'
# RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
#            '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
90
91
92
RX_ENTRY = re.compile('\s*(\w+)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
RX_COMMENT = re.compile('\s*#.*\n')

93

94
def unit_from_configfile(config_filename):
95
96
97
98
99
100
101
102
103
    """ Reads grammar unit tests contained in a file in config file (.ini)
    syntax.

    Args:
        config_filename (str): A config file containing Grammar unit-tests

    Returns:
        A dictionary representing the unit tests.
    """
104
105
106
107
108
109
110
    def eat_comments(txt, pos):
        m = RX_COMMENT.match(txt, pos)
        while m:
            pos = m.span()[1]
            m = RX_COMMENT.match(txt, pos)
        return pos

Eckhart Arnold's avatar
Eckhart Arnold committed
111
    with open(config_filename, 'r', encoding="utf-8") as f:
112
113
114
        cfg = f.read()
        cfg = cfg.replace('\t', '    ')

115
116
    OD = collections.OrderedDict
    unit = OD()
117
118
119
120
121
122

    pos = eat_comments(cfg, 0)
    section_match = RX_SECTION.match(cfg, pos)
    while section_match:
        d = section_match.groupdict()
        stage = d['stage']
123
        if stage not in UNIT_STAGES:
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
        symbol = d['symbol']
        pos = eat_comments(cfg, section_match.span()[1])

        entry_match = RX_ENTRY.match(cfg, pos)
        if entry_match is None:
            raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
        while entry_match:
            testkey, testcode = [group for group in entry_match.groups() if group is not None]
            lines = testcode.split('\n')
            if len(lines) > 1:
                indent = sys.maxsize
                for line in lines[1:]:
                    indent = min(indent, len(line) - len(line.lstrip()))
                for i in range(1, len(lines)):
                    lines[i] = lines[i][indent:]
                testcode = '\n'.join(lines)
141
            unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
142
143
144
145
146
147
148
149
            pos = eat_comments(cfg, entry_match.span()[1])
            entry_match = RX_ENTRY.match(cfg, pos)

        section_match = RX_SECTION.match(cfg, pos)

    if pos != len(cfg):
        raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))

150
    return unit
151

152

153
def unit_from_json(json_filename):
154
    """
155
    Reads grammar unit tests from a json file.
156
    """
di68kap's avatar
di68kap committed
157
    with open(json_filename, 'r', encoding='utf8') as f:
158
159
160
161
        unit = json.load(f)
    for symbol in unit:
        for stage in unit[symbol]:
            if stage not in UNIT_STAGES:
162
                raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
163
164
    return unit

165
# TODO: add support for yaml, cson, toml
166
167


168
def unit_from_file(filename):
169
170
    """
    Reads a grammar unit test from a file. The format of the file is
171
172
    determined by the ending of its name.
    """
173
    if filename.endswith(".json"):
di68kap's avatar
di68kap committed
174
        test_unit = unit_from_json(filename)
175
    elif filename.endswith(".ini"):
di68kap's avatar
di68kap committed
176
        test_unit = unit_from_configfile(filename)
177
    else:
178
        raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
179

di68kap's avatar
di68kap committed
180
181
182
183
184
    # Check for ambiguous Test names
    errors = []
    for parser_name, tests in test_unit.items():
        m_names = set(tests.get('match', dict()).keys())
        f_names = set(tests.get('fail', dict()).keys())
185
186
        intersection = list(m_names & f_names)
        intersection.sort()
di68kap's avatar
di68kap committed
187
188
189
190
191
192
193
194
195
        if intersection:
            errors.append("Same names %s assigned to match and fail test "
                          "of parser %s." % (str(intersection), parser_name))
    if errors:
        raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
                               + '\n'.join(errors))

    return test_unit

196

197
198
199
200
201
202
203
204
def all_match_tests(tests):
    """Returns all match tests from ``tests``, This includes match tests
    marked with an asterix for CST-output as well as unmarked match-tests.
    """
    return itertools.chain(tests.get('match', dict()).items(),
                           tests.get('match*', dict()).items())


205
def get_report(test_unit):
206
    """
207
208
209
210
211
212
213
214
215
216
217
218
    Returns a text-report of the results of a grammar unit test. The report
    lists the source of all tests as well as the error messages, if a test
    failed or the abstract-syntax-tree (AST) in case of success.

    If an asterix has been appended to the parser name (e.g.
    '[match:identifier*]') then the concrete syntax tree will also be
    added to the report in this particular case.

    The purpose of the latter is to help constructing and debugging
    of AST-Transformations. It is better to switch the CST-output on and off
    with the asterix marker when needed than to output the CST for all tests
    which would unneccesarily bloat the test reports.
219
    """
220
221
222
223
    def indent(txt):
        lines = txt.split('\n')
        lines[0] = '    ' + lines[0]
        return "\n    ".join(lines)
224
225
226
227
    report = []
    for parser_name, tests in test_unit.items():
        heading = 'Test of parser: "%s"' % parser_name
        report.append('\n\n%s\n%s\n' % (heading, '=' * len(heading)))
228
229
        cst_output = frozenset(tests.get('match*', dict()).keys())
        for test_name, test_code in all_match_tests(tests):
230
231
232
            heading = 'Match-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
233
            report.append(indent(test_code))
234
235
236
237
238
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
            ast = tests.get('__ast__', {}).get(test_name, None)
239
            cst = tests.get('__cst__', {}).get(test_name, None)
240
            if cst and (not ast or test_name in cst_output):
241
                report.append('\n### CST')
242
                report.append(indent(cst.as_sxpr()))
243
            if ast:
244
                report.append('\n### AST')
245
                report.append(indent(ast.as_sxpr()))
di68kap's avatar
di68kap committed
246
247
248
249
250
251
252
253
254
        for test_name, test_code in tests.get('fail', dict()).items():
            heading = 'Fail-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
            report.append(indent(test_code))
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
255
256
257
    return '\n'.join(report)


258
def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, verbose=False):
259
260
    """
    Unit tests for a grammar-parser and ast transformations.
261
    """
262
    if isinstance(test_unit, str):
263
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
264
        test_unit = unit_from_file(test_unit)
265
266
    else:
        unit_name = str(id(test_unit))
267
268
    if verbose:
        print("\nUnit: " + unit_name)
269
270
271
    errata = []
    parser = parser_factory()
    transform = transformer_factory()
272
    for parser_name, tests in test_unit.items():
273
        assert parser_name, "Missing parser name in test %s!" % unit_name
274
        assert set(tests.keys()).issubset(UNIT_STAGES)
275
276
        if verbose:
            print('  Match-Tests for parser "' + parser_name + '"')
277
        for test_name, test_code in all_match_tests(tests):
278
279
280
            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                errflag = len(errata)
281
282
283
284
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
285
            log_ST(cst, "match_%s_%s.cst" % (parser_name, test_name))
286
            tests.setdefault('__cst__', {})[test_name] = cst
287
            if "ast" in tests or report:
288
289
290
                ast = copy.deepcopy(cst)
                transform(ast)
                tests.setdefault('__ast__', {})[test_name] = ast
291
                log_ST(ast, "match_%s_%s.ast" % (parser_name, test_name))
292
            if is_error(cst.error_flag):
eckhart's avatar
eckhart committed
293
                errors = adjust_error_locations(cst.collect_errors(), test_code)
Eckhart Arnold's avatar
Eckhart Arnold committed
294
                errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.:  %s\n\n\t%s\n\n' %
295
                              (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
296
                               '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
297
                tests.setdefault('__err__', {})[test_name] = errata[-1]
298
                # write parsing-history log only in case of failure!
299
                if is_logging():
300
                    log_parsing_history(parser, "match_%s_%s.log" % (parser_name, test_name))
301
            elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
eckhart's avatar
eckhart committed
302
303
                errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
                              (test_name, parser_name, cst.as_sxpr()))
304
305
306
307
308
309
            elif "ast" in tests:
                compare = mock_syntax_tree(tests["ast"][test_name])
                if compare != ast:
                    errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
                                  '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                                  % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
Eckhart Arnold's avatar
Eckhart Arnold committed
310
311
                                     flatten_sxpr(compare.as_sxpr()),
                                     flatten_sxpr(ast.as_sxpr())))
312
                    tests.setdefault('__err__', {})[test_name] = errata[-1]
313
            if verbose:
314
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
315

316
        if verbose and 'fail' in tests:
317
            print('  Fail-Tests for parser "' + parser_name + '"')
318
        for test_name, test_code in tests.get('fail', dict()).items():
319
320
321
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                errflag = len(errata)
322
323
324
325
326
            # cst = parser(test_code, parser_name)
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
327
            if not is_error(cst.error_flag):
328
329
                errata.append('Fail test "%s" for parser "%s" yields match instead of '
                              'expected failure!' % (test_name, parser_name))
330
                tests.setdefault('__err__', {})[test_name] = errata[-1]
331
                # write parsing-history log only in case of test-failure
332
                if is_logging():
333
                    log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
334
            if verbose:
Eckhart Arnold's avatar
Eckhart Arnold committed
335
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
336

337
338
    # write test-report
    if report:
339
        report_dir = "REPORT"
340
341
        if not os.path.exists(report_dir):
            os.mkdir(report_dir)
di68kap's avatar
di68kap committed
342
        with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
343
            f.write(get_report(test_unit))
344

345
346
347
    return errata


Eckhart Arnold's avatar
Eckhart Arnold committed
348
349
350
351
def grammar_suite(directory, parser_factory, transformer_factory,
                  fn_patterns=['*test*'],
                  ignore_unknown_filetypes=False,
                  report=True, verbose=True):
352
353
    """
    Runs all grammar unit tests in a directory. A file is considered a test
354
355
    unit, if it has the word "test" in its name.
    """
356
    if not isinstance(fn_patterns, collections.abc.Iterable):
Eckhart Arnold's avatar
Eckhart Arnold committed
357
        fn_patterns = [fn_patterns]
358
    all_errors = collections.OrderedDict()
359
360
    if verbose:
        print("\nScanning test-directory: " + directory)
361
362
    save_cwd = os.getcwd()
    os.chdir(directory)
eckhart's avatar
eckhart committed
363
364
    if is_logging():
        clear_logs()
365
    for filename in sorted(os.listdir()):
Eckhart Arnold's avatar
Eckhart Arnold committed
366
        if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
367
            try:
368
369
                if verbose:
                    print("\nRunning grammar tests from: " + filename)
370
371
                errata = grammar_unit(filename, parser_factory,
                                      transformer_factory, report, verbose)
372
373
374
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
375
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
376
                    raise e
377
    os.chdir(save_cwd)
eckhart's avatar
eckhart committed
378
379
    error_report = []
    err_N = 0
380
381
    if all_errors:
        for filename in all_errors:
di68kap's avatar
di68kap committed
382
            error_report.append('Errors found by unit test "%s":\n' % filename)
di68kap's avatar
di68kap committed
383
            err_N += len(all_errors[filename])
384
385
386
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
di68kap's avatar
di68kap committed
387
388
389
390
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n'
                % (directory, err_N, 's' if err_N > 1 else '') + '\n'.join(error_report))
eckhart's avatar
eckhart committed
391
392
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
393
394
395
    return ''


396
def runner(test_classes, namespace):
397
398
    """
    Runs all or some selected Python unit tests found in the
eckhart's avatar
eckhart committed
399
    namespace. To run all tests in a module, call
400
    ``runner("", globals())`` from within that module.
401

402
403
404
405
    Unit-Tests are either classes, the name of which starts with
    "Test" and methods, the name of which starts with "test" contained
    in such classes or functions, the name of which starts with "test".

406
407
408
409
410
    Args:
        tests: Either a string or a list of strings that contains the
            names of test or test classes. Each test and, in the case
            of a test class, all tests within the test class will be
            run.
eckhart's avatar
eckhart committed
411
        namespace: The namespace for running the test, usually
412
            ``globals()`` should be used.
eckhart's avatar
eckhart committed
413

414
415
416
417
418
419
420
421
    Example:
        class TestSomething()
            def setup(self):
                pass
            def teardown(self):
                pass
            def test_something(self):
                pass
eckhart's avatar
eckhart committed
422

423
        if __name__ == "__main__":
di68kap's avatar
di68kap committed
424
            from DHParser.testing import runner
eckhart's avatar
eckhart committed
425
            runner("", globals())
426
427
428
429
430
431
432
433
    """
    def instantiate(cls_name):
        exec("obj = " + cls_name + "()", namespace)
        obj = namespace["obj"]
        if "setup" in dir(obj):
            obj.setup()
        return obj

434
435
436
    if test_classes:
        if isinstance(test_classes, str):
            test_classes = test_classes.split(" ")
437
438
    else:
        # collect all test classes, in case no methods or classes have been passed explicitly
439
440
        test_classes = []
        test_functions = []
441
        for name in namespace.keys():
442
443
444
445
446
447
            if name.lower().startswith('test'):
                if inspect.isclass(namespace[name]):
                    test_classes.append(name)
                elif inspect.isfunction(namespace[name]):
                    test_functions.append(name)

448
449

    obj = None
450
    for test in test_classes:
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
        try:
            if test.find('.') >= 0:
                cls_name, method_name = test.split('.')
                obj = instantiate(cls_name)
                print("Running " + cls_name + "." + method_name)
                exec('obj.' + method_name + '()')
            else:
                obj = instantiate(test)
                for name in dir(obj):
                    if name.lower().startswith("test"):
                        print("Running " + test + "." + name)
                        exec('obj.' + name + '()')
        finally:
            if "teardown" in dir(obj):
                obj.teardown()
466
467
468
469

    for test in test_functions:
        exec(test + '()', namespace)