testing.py 17.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# testing.py - test support for DHParser based grammars and compilers
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
"""
Module ``testing`` contains support for unit-testing domain specific
languages. Tests for arbitrarily small components of the Grammar can
be written into test files with ini-file syntax in order to test
whether the parser matches or fails as expected. It can also be
tested whether it produces an expected concrete or abstract syntax tree.
Usually, however, unexpected failure to match a certain string is the
main cause of trouble when constructing a context free Grammar.
"""
27
28


29
import collections
30
# import configparser
31
import copy
Eckhart Arnold's avatar
Eckhart Arnold committed
32
import fnmatch
di68kap's avatar
di68kap committed
33
import inspect
34
35
import json
import os
36
import sys
37

38
from DHParser.error import is_error, adjust_error_locations
39
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
40
from DHParser.parse import UnknownParserError
41
42
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
from DHParser.toolkit import re
43

44
__all__ = ('unit_from_configfile',
45
46
47
48
49
50
51
           'unit_from_json',
           'unit_from_file',
           'get_report',
           'grammar_unit',
           'grammar_suite',
           'runner')

52
UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
53

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# def unit_from_configfile(config_filename):
#     """
#     Reads a grammar unit test from a config file.
#     """
#     cfg = configparser.ConfigParser(interpolation=None)
#     cfg.read(config_filename, encoding="utf8")
#     OD = collections.OrderedDict
#     unit = OD()
#     for section in cfg.sections():
#         symbol, stage = section.split(':')
#         if stage not in UNIT_STAGES:
#             if symbol in UNIT_STAGES:
#                 symbol, stage = stage, symbol
#             else:
#                 raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
#         for testkey, testcode in cfg[section].items():
#             if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
#                 testcode = testcode[3:-3]
#                 # testcode = testcode.replace('\\#', '#')
#                 testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
#             elif testcode[:1] + testcode[-1:] in {"''", '""'}:
#                 testcode = testcode[1:-1]
#             unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
#     # print(json.dumps(unit, sort_keys=True, indent=4))
#     return unit

RX_SECTION = re.compile('\s*\[(?P<stage>\w+):(?P<symbol>\w+)\]')
81
RE_VALUE = '(?:"""((?:.|\n)*?)""")|' + "(?:'''((?:.|\n)*?)''')|" + \
82
           '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
83
84
85
86
# the following does not work with pypy3, because pypy's re-engine does not
# support local flags, e.g. '(?s: )'
# RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
#            '(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)*    .*)*)'
87
88
89
RX_ENTRY = re.compile('\s*(\w+)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
RX_COMMENT = re.compile('\s*#.*\n')

90

91
def unit_from_configfile(config_filename):
92
93
94
95
96
97
98
99
100
    """ Reads grammar unit tests contained in a file in config file (.ini)
    syntax.

    Args:
        config_filename (str): A config file containing Grammar unit-tests

    Returns:
        A dictionary representing the unit tests.
    """
101
102
103
104
105
106
107
    def eat_comments(txt, pos):
        m = RX_COMMENT.match(txt, pos)
        while m:
            pos = m.span()[1]
            m = RX_COMMENT.match(txt, pos)
        return pos

Eckhart Arnold's avatar
Eckhart Arnold committed
108
    with open(config_filename, 'r', encoding="utf-8") as f:
109
110
111
        cfg = f.read()
        cfg = cfg.replace('\t', '    ')

112
113
    OD = collections.OrderedDict
    unit = OD()
114
115
116
117
118
119

    pos = eat_comments(cfg, 0)
    section_match = RX_SECTION.match(cfg, pos)
    while section_match:
        d = section_match.groupdict()
        stage = d['stage']
120
        if stage not in UNIT_STAGES:
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
            raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
        symbol = d['symbol']
        pos = eat_comments(cfg, section_match.span()[1])

        entry_match = RX_ENTRY.match(cfg, pos)
        if entry_match is None:
            raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
        while entry_match:
            testkey, testcode = [group for group in entry_match.groups() if group is not None]
            lines = testcode.split('\n')
            if len(lines) > 1:
                indent = sys.maxsize
                for line in lines[1:]:
                    indent = min(indent, len(line) - len(line.lstrip()))
                for i in range(1, len(lines)):
                    lines[i] = lines[i][indent:]
                testcode = '\n'.join(lines)
138
            unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
139
140
141
142
143
144
145
146
            pos = eat_comments(cfg, entry_match.span()[1])
            entry_match = RX_ENTRY.match(cfg, pos)

        section_match = RX_SECTION.match(cfg, pos)

    if pos != len(cfg):
        raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))

147
    return unit
148

149

150
def unit_from_json(json_filename):
151
    """
152
    Reads grammar unit tests from a json file.
153
    """
di68kap's avatar
di68kap committed
154
    with open(json_filename, 'r', encoding='utf8') as f:
155
156
157
158
        unit = json.load(f)
    for symbol in unit:
        for stage in unit[symbol]:
            if stage not in UNIT_STAGES:
159
                raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
160
161
    return unit

162
# TODO: add support for yaml, cson, toml
163
164


165
def unit_from_file(filename):
166
167
    """
    Reads a grammar unit test from a file. The format of the file is
168
169
    determined by the ending of its name.
    """
170
    if filename.endswith(".json"):
di68kap's avatar
di68kap committed
171
        test_unit = unit_from_json(filename)
172
    elif filename.endswith(".ini"):
di68kap's avatar
di68kap committed
173
        test_unit = unit_from_configfile(filename)
174
    else:
175
        raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
176

di68kap's avatar
di68kap committed
177
178
179
180
181
    # Check for ambiguous Test names
    errors = []
    for parser_name, tests in test_unit.items():
        m_names = set(tests.get('match', dict()).keys())
        f_names = set(tests.get('fail', dict()).keys())
182
183
        intersection = list(m_names & f_names)
        intersection.sort()
di68kap's avatar
di68kap committed
184
185
186
187
188
189
190
191
192
        if intersection:
            errors.append("Same names %s assigned to match and fail test "
                          "of parser %s." % (str(intersection), parser_name))
    if errors:
        raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
                               + '\n'.join(errors))

    return test_unit

193

194
def get_report(test_unit):
195
196
    """
    Returns a text-report of the results of a grammar unit test.
197
    """
198
199
200
201
    def indent(txt):
        lines = txt.split('\n')
        lines[0] = '    ' + lines[0]
        return "\n    ".join(lines)
202
203
204
205
206
207
208
209
    report = []
    for parser_name, tests in test_unit.items():
        heading = 'Test of parser: "%s"' % parser_name
        report.append('\n\n%s\n%s\n' % (heading, '=' * len(heading)))
        for test_name, test_code in tests.get('match', dict()).items():
            heading = 'Match-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
210
            report.append(indent(test_code))
211
212
213
214
215
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
            ast = tests.get('__ast__', {}).get(test_name, None)
216
217
218
            cst = tests.get('__cst__', {}).get(test_name, None)
            if cst and (not ast or cst == ast):
                report.append('\n### CST')
219
                report.append(indent(cst.as_sxpr()))
220
            elif ast:
221
                report.append('\n### AST')
222
                report.append(indent(ast.as_sxpr()))
di68kap's avatar
di68kap committed
223
224
225
226
227
228
229
230
231
        for test_name, test_code in tests.get('fail', dict()).items():
            heading = 'Fail-test "%s"' % test_name
            report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
            report.append('### Test-code:')
            report.append(indent(test_code))
            error = tests.get('__err__', {}).get(test_name, "")
            if error:
                report.append('\n### Error:')
                report.append(error)
232
233
234
    return '\n'.join(report)


235
def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, verbose=False):
236
237
    """
    Unit tests for a grammar-parser and ast transformations.
238
    """
239
    if isinstance(test_unit, str):
240
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
241
        test_unit = unit_from_file(test_unit)
242
243
    else:
        unit_name = str(id(test_unit))
244
245
    if verbose:
        print("\nUnit: " + unit_name)
246
247
248
    errata = []
    parser = parser_factory()
    transform = transformer_factory()
249
250
    for parser_name, tests in test_unit.items():
        assert set(tests.keys()).issubset(UNIT_STAGES)
251
252
        if verbose:
            print('  Match-Tests for parser "' + parser_name + '"')
253
        for test_name, test_code in tests.get('match', dict()).items():
254
255
256
            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                errflag = len(errata)
257
258
259
260
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
261
            log_ST(cst, "match_%s_%s.cst" % (parser_name, test_name))
262
            tests.setdefault('__cst__', {})[test_name] = cst
263
            if "ast" in tests or report:
264
265
266
                ast = copy.deepcopy(cst)
                transform(ast)
                tests.setdefault('__ast__', {})[test_name] = ast
267
                log_ST(ast, "match_%s_%s.ast" % (parser_name, test_name))
268
            if is_error(cst.error_flag):
eckhart's avatar
eckhart committed
269
                errors = adjust_error_locations(cst.collect_errors(), test_code)
Eckhart Arnold's avatar
Eckhart Arnold committed
270
                errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.:  %s\n\n\t%s\n\n' %
271
                              (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
272
                               '\n\t'.join(str(m).replace('\n', '\n\t\t') for m in errors)))
273
                tests.setdefault('__err__', {})[test_name] = errata[-1]
274
                # write parsing-history log only in case of failure!
275
                if is_logging():
276
                    log_parsing_history(parser, "match_%s_%s.log" % (parser_name, test_name))
277
            elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
eckhart's avatar
eckhart committed
278
279
                errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
                              (test_name, parser_name, cst.as_sxpr()))
280
281
282
283
284
285
            elif "ast" in tests:
                compare = mock_syntax_tree(tests["ast"][test_name])
                if compare != ast:
                    errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
                                  '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                                  % (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
Eckhart Arnold's avatar
Eckhart Arnold committed
286
287
                                     flatten_sxpr(compare.as_sxpr()),
                                     flatten_sxpr(ast.as_sxpr())))
288
                    tests.setdefault('__err__', {})[test_name] = errata[-1]
289
            if verbose:
290
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
291

292
        if verbose and 'fail' in tests:
293
            print('  Fail-Tests for parser "' + parser_name + '"')
294
        for test_name, test_code in tests.get('fail', dict()).items():
295
296
297
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                errflag = len(errata)
298
299
300
301
302
            # cst = parser(test_code, parser_name)
            try:
                cst = parser(test_code, parser_name)
            except UnknownParserError as upe:
                cst = Node(ZOMBIE_PARSER, "").add_error(str(upe)).init_pos(0)
303
            if not is_error(cst.error_flag):
304
305
                errata.append('Fail test "%s" for parser "%s" yields match instead of '
                              'expected failure!' % (test_name, parser_name))
306
                tests.setdefault('__err__', {})[test_name] = errata[-1]
307
                # write parsing-history log only in case of test-failure
308
                if is_logging():
309
                    log_parsing_history(parser, "fail_%s_%s.log" % (parser_name, test_name))
310
            if verbose:
Eckhart Arnold's avatar
Eckhart Arnold committed
311
                print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
312

313
314
    # write test-report
    if report:
315
        report_dir = "REPORT"
316
317
        if not os.path.exists(report_dir):
            os.mkdir(report_dir)
di68kap's avatar
di68kap committed
318
        with open(os.path.join(report_dir, unit_name + '.md'), 'w', encoding='utf8') as f:
319
            f.write(get_report(test_unit))
320

321
322
323
    return errata


Eckhart Arnold's avatar
Eckhart Arnold committed
324
325
326
327
def grammar_suite(directory, parser_factory, transformer_factory,
                  fn_patterns=['*test*'],
                  ignore_unknown_filetypes=False,
                  report=True, verbose=True):
328
329
    """
    Runs all grammar unit tests in a directory. A file is considered a test
330
331
    unit, if it has the word "test" in its name.
    """
332
    if not isinstance(fn_patterns, collections.abc.Iterable):
Eckhart Arnold's avatar
Eckhart Arnold committed
333
        fn_patterns = [fn_patterns]
334
    all_errors = collections.OrderedDict()
335
336
    if verbose:
        print("\nScanning test-directory: " + directory)
337
338
    save_cwd = os.getcwd()
    os.chdir(directory)
eckhart's avatar
eckhart committed
339
340
    if is_logging():
        clear_logs()
341
    for filename in sorted(os.listdir()):
Eckhart Arnold's avatar
Eckhart Arnold committed
342
        if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
343
            try:
344
345
                if verbose:
                    print("\nRunning grammar tests from: " + filename)
346
347
                errata = grammar_unit(filename, parser_factory,
                                      transformer_factory, report, verbose)
348
349
350
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
351
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
352
                    raise e
353
    os.chdir(save_cwd)
eckhart's avatar
eckhart committed
354
355
    error_report = []
    err_N = 0
356
357
    if all_errors:
        for filename in all_errors:
di68kap's avatar
di68kap committed
358
            error_report.append('Errors found by unit test "%s":\n' % filename)
di68kap's avatar
di68kap committed
359
            err_N += len(all_errors[filename])
360
361
362
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
di68kap's avatar
di68kap committed
363
364
365
366
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n'
                % (directory, err_N, 's' if err_N > 1 else '') + '\n'.join(error_report))
eckhart's avatar
eckhart committed
367
368
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
369
370
371
    return ''


372
def runner(test_classes, namespace):
373
374
    """
    Runs all or some selected Python unit tests found in the
eckhart's avatar
eckhart committed
375
    namespace. To run all tests in a module, call
376
    ``runner("", globals())`` from within that module.
377

378
379
380
381
    Unit-Tests are either classes, the name of which starts with
    "Test" and methods, the name of which starts with "test" contained
    in such classes or functions, the name of which starts with "test".

382
383
384
385
386
    Args:
        tests: Either a string or a list of strings that contains the
            names of test or test classes. Each test and, in the case
            of a test class, all tests within the test class will be
            run.
eckhart's avatar
eckhart committed
387
        namespace: The namespace for running the test, usually
388
            ``globals()`` should be used.
eckhart's avatar
eckhart committed
389

390
391
392
393
394
395
396
397
    Example:
        class TestSomething()
            def setup(self):
                pass
            def teardown(self):
                pass
            def test_something(self):
                pass
eckhart's avatar
eckhart committed
398

399
        if __name__ == "__main__":
di68kap's avatar
di68kap committed
400
            from DHParser.testing import runner
eckhart's avatar
eckhart committed
401
            runner("", globals())
402
403
404
405
406
407
408
409
    """
    def instantiate(cls_name):
        exec("obj = " + cls_name + "()", namespace)
        obj = namespace["obj"]
        if "setup" in dir(obj):
            obj.setup()
        return obj

410
411
412
    if test_classes:
        if isinstance(test_classes, str):
            test_classes = test_classes.split(" ")
413
414
    else:
        # collect all test classes, in case no methods or classes have been passed explicitly
415
416
        test_classes = []
        test_functions = []
417
        for name in namespace.keys():
418
419
420
421
422
423
            if name.lower().startswith('test'):
                if inspect.isclass(namespace[name]):
                    test_classes.append(name)
                elif inspect.isfunction(namespace[name]):
                    test_functions.append(name)

424
425

    obj = None
426
    for test in test_classes:
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
        try:
            if test.find('.') >= 0:
                cls_name, method_name = test.split('.')
                obj = instantiate(cls_name)
                print("Running " + cls_name + "." + method_name)
                exec('obj.' + method_name + '()')
            else:
                obj = instantiate(test)
                for name in dir(obj):
                    if name.lower().startswith("test"):
                        print("Running " + test + "." + name)
                        exec('obj.' + name + '()')
        finally:
            if "teardown" in dir(obj):
                obj.teardown()
442
443
444
445

    for test in test_functions:
        exec(test + '()', namespace)