test_testing.py 10.6 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_testing.py - tests of the testing-module of DHParser
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
22
23
import os
import re
24
import sys
25
from functools import partial
26
27
28

sys.path.extend(['../', './'])

29
from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
di68kap's avatar
di68kap committed
30
from DHParser.transform import traverse, remove_expendables, remove_empty, \
Eckhart Arnold's avatar
Eckhart Arnold committed
31
    replace_by_single_child, reduce_single_child, flatten
32
from DHParser.dsl import grammar_provider
33
from DHParser.testing import get_report, grammar_unit, unit_from_file, \
34
    reset_unit
di68kap's avatar
di68kap committed
35
36
from DHParser.log import logging

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

CFG_FILE_1 = '''
# a comment

[match:ParserA]
M1: test
M2: 'test'
M3: "test"
M4: """
    test
        proper multiline indent?
    """

# another comment

[fail:ParserA]
F1: test
'''

CFG_FILE_2 = '''
[match:LB]
1:  """
    """

[fail:LB]
10: """ """

[match:BedeutungsPosition]
M1: """
    BEDEUTUNG
    LAT pannus, faciale, sudarium
    DEU Gesichts-, Schweißtuch {usu liturg.: de re v. p. 32, 63}:"""
'''

CFG_FILE_3 = r'''
[match:paragraph]
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
    Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
    als streng geschieden sind. Der Viehstand ist der bedeutendste.

2 : Paragraphs may contain {\em inline blocks} as well as \emph{inline commands}
    and also special \& characters.

[fail:paragraph]
20: Paragraphs are separated by gaps.

    Like this one.

21: \begin{enumerate}
86
87
88

[match:csttest]
M1*: """Trigger CST-output with an asterix!"""
89
90
91
92
93
'''


class TestTestfiles:
    def setup(self):
94
        with open('configfile_test_1.ini', 'w', encoding="utf-8") as f:
95
            f.write(CFG_FILE_1)
96
        with open('configfile_test_2.ini', 'w', encoding="utf-8") as f:
97
            f.write(CFG_FILE_2)
98
        with open('configfile_test_3.ini', 'w', encoding="utf-8") as f:
99
100
101
102
103
104
105
106
            f.write(CFG_FILE_3)

    def teardown(self):
        os.remove('configfile_test_1.ini')
        os.remove('configfile_test_2.ini')
        os.remove('configfile_test_3.ini')

    def test_unit_from_config_file(self):
107
        unit = unit_from_file('configfile_test_1.ini')
108
        assert list(unit.keys()) == ['ParserA']
109
        assert list(unit['ParserA'].keys()) == ['match', 'fail'], str(list(unit['ParserA'].keys()))
110
111
112
113
114
115
        assert list(unit['ParserA']['match'].keys()) == ['M1', 'M2', 'M3', 'M4']
        assert list(unit['ParserA']['fail'].keys()) == ['F1']
        testcase = unit['ParserA']['match']['M4']
        lines = testcase.split('\n')
        assert len(lines[2]) - len(lines[2].lstrip()) == 4

116
        unit = unit_from_file('configfile_test_2.ini')
117
118
119
120
121
        txt = unit['BedeutungsPosition']['match']['M1']
        txt.split('\n')
        for line in txt:
            assert line.rstrip()[0:1] != ' '

122
        unit = unit_from_file('configfile_test_3.ini')
123

124
125
126
127
128
129
130
131
132
133
134
135
136
137

ARITHMETIC_EBNF = """
    @ whitespace = linefeed
    formula = [ //~ ] expr
    expr = expr ("+"|"-") term | term
    term = term ("*"|"/") factor | factor
    factor = /[0-9]+/~
    # example:  "5 + 3 * 4"
    """


ARITHMETIC_EBNF_transformation_table = {
    # AST Transformations for the DSL-grammar
    "formula": [remove_expendables],
138
    "term, expr": [replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
139
140
    "factor": [remove_expendables, reduce_single_child],
    (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
141
    "*": [remove_expendables, replace_by_single_child]
142
143
144
145
146
147
}


ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)


di68kap's avatar
di68kap committed
148
149
150
151
152
153
154
155
156
157
158
159
160
def clean_report():
    if os.path.exists('REPORT'):
        files = os.listdir('REPORT')
        flag = False
        for file in files:
            if re.match(r'unit_test_\d+\.md', file):
                os.remove(os.path.join('REPORT', file))
            else:
                flag = True
        if not flag:
            os.rmdir('REPORT')


161
162
163
class TestGrammarTest:
    cases = {
        "factor": {
164
            "match": {
165
166
167
168
169
170
171
172
173
174
                1: "0",
                2: "314",
            },
            "fail": {
                3: "21F",
                4: "G123"
            }
        },
        "term": {
            "match": {
175
                '1*': "4 * 5",
176
177
178
179
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
180
                '1*': "(term (factor 4) (:Token *) (factor 5))",
181
182
183
184
185
186
187
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
            },
            "fail": {
                4: "4 + 5",
                5: "20 / 4 - 3"
            }
188
189
190
191
192
        },
        "no_match_tests_specified": {
            "fail": {
                1: "+ 4 5"
            }
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        }
    }

    failure_cases = {
        "term": {
            "match": {
                1: "4 + 5",     # error: this should fail
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
                1: "(term (factor 4) (:Token *) (factor 5))",
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))"  # error 19 != 20
            },
            "fail": {
                4: "4 * 5",     # error: this should match
                5: "20 / 4 - 3"
            }
        }
    }

Eckhart Arnold's avatar
Eckhart Arnold committed
215
    def teardown(self):
di68kap's avatar
di68kap committed
216
        clean_report()
Eckhart Arnold's avatar
Eckhart Arnold committed
217

218
    def test_testing_grammar(self):
219
        parser_fac = grammar_provider(ARITHMETIC_EBNF)
220
        trans_fac = lambda : ARITHMETIC_EBNFTransform
221
        # reset_unit(self.cases)
222
        errata = grammar_unit(self.cases, parser_fac, trans_fac)
223
        assert errata, "Unknown parser, but no error message!?"
224
225
        report = get_report(self.cases)
        assert report.find('### CST') >= 0
226
        errata = grammar_unit(self.failure_cases, parser_fac, trans_fac)
227
228
        # for e in errata:
        #     print(e)
229
230
        assert len(errata) == 3, str(errata)

231
232
233
234
235
236
237
    # def test_get_report(self):
    #     parser_fac = grammar_provider(ARITHMETIC_EBNF)
    #     trans_fac = lambda : ARITHMETIC_EBNFTransform
    #     reset_unit(self.cases)
    #     grammar_unit(self.cases, parser_fac, trans_fac)
    #     report = get_report(self.cases)
    #     assert report.find('### CST') >= 0
238

239
240
241
242
243
244
245
246
247
248
249
    def test_fail_failtest(self):
        """Failure test should not pass if it failed because the parser is unknown."""
        fcases = {}
        fcases['berm'] = {}
        fcases['berm']['fail'] = self.failure_cases['term']['fail']
        errata = grammar_unit(fcases,
                              grammar_provider(ARITHMETIC_EBNF),
                              lambda : ARITHMETIC_EBNFTransform)
        assert errata


di68kap's avatar
di68kap committed
250
251
252
253
254
255
class TestLookahead:
    """
    Testing of Expressions with trailing Lookahead-Parser.
    """
    EBNF = r"""
        document = { category | entry } { LF }
256
        category = { LF } sequence_of_letters { /:/ sequence_of_letters } /:/ §&(LF sequence_of_letters) 
di68kap's avatar
di68kap committed
257
258
259
260
261
262
263
264
265
        entry = { LF } sequence_of_letters !/:/
        sequence_of_letters = /[A-Za-z0-9 ]+/
        LF = / *\n/
    """

    cases = {
        "category": {
            "match": {
                1: """Mountains: big:
266
267
                          K2""",
                2: """Rivers:"""  # allowed because lookahaead failure occurs at end of file and is mandatory!
di68kap's avatar
di68kap committed
268
269
270
271
272
273
274
275
276
277
            },
            "fail": {
                6: """Mountains: big:"""
            }
        }
    }

    fail_cases = {
        "category": {
            "match": {
di68kap's avatar
di68kap committed
278
                1: """Mountains: b""",  # stop sign ":" is missing
di68kap's avatar
di68kap committed
279
                2: """Rivers: 
di68kap's avatar
di68kap committed
280
281
282
283
                         # not allowed""",
                2: """Mountains:        
                          K2
                      Rivers:"""  # lookahead only covers K2
di68kap's avatar
di68kap committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
            },
            "fail": {
                1: """Mountains: big:
                          K2"""
            }
        }
    }

    def setup(self):
        self.grammar_fac = grammar_provider(TestLookahead.EBNF)
        self.trans_fac = lambda : partial(traverse, processing_table={"*": [flatten, remove_empty]})

    def teardown(self):
        clean_report()

    def test_selftest(self):
        doc = """
            Mountains: big:
                Mount Everest
                K2
            Mountains: medium:
                Denali
                Alpomayo
            Rivers:
                Nile   
            """
        grammar = self.grammar_fac()
        cst = grammar(doc)
        assert not cst.error_flag
        # trans = self.trans_fac()
        # trans(cst)
        # print(cst.as_sxpr())

    def test_unit_lookahead(self):
        errata = grammar_unit(self.cases, self.grammar_fac, self.trans_fac)
eckhart's avatar
eckhart committed
319
        assert not errata, str(errata)
di68kap's avatar
di68kap committed
320
321
322
        errata = grammar_unit(self.fail_cases, self.grammar_fac, self.trans_fac)
        assert errata

323
324
325
326
327
328

class TestSExpr:
    """
    Tests for S-expression handling.
    """
    def test_compact_sexpr(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
329
        assert flatten_sxpr("(a\n    (b\n        c\n    )\n)\n") == "(a (b c))"
330
331
332

    def test_mock_syntax_tree(self):
        sexpr = '(a (b c) (d e) (f (g h)))'
333
        tree = parse_sxpr(sexpr)
Eckhart Arnold's avatar
Eckhart Arnold committed
334
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr
335
336
337
338

        # test different quotation marks
        sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
        sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
339
        tree = parse_sxpr(sexpr)
Eckhart Arnold's avatar
Eckhart Arnold committed
340
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped
341
342

        sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
343
        tree = parse_sxpr(sexpr_clean)
Eckhart Arnold's avatar
Eckhart Arnold committed
344
        assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean
345

346
        tree = parse_sxpr(sexpr_stripped)
Eckhart Arnold's avatar
Eckhart Arnold committed
347
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
348
349
350

    def test_mock_syntax_tree_with_classes(self):
        sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))'
351
        tree = parse_sxpr(sexpr)
352
353
354
355
356
357
358
359
360
        assert tree.tag_name == 'a'
        assert tree.result[0].tag_name == 'b'
        assert tree.result[1].tag_name == ':class3'
        assert tree.result[2].tag_name == 'c'


if __name__ == "__main__":
    from DHParser.testing import runner
    runner("", globals())