The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

test_ebnf.py 27.3 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_ebnf.py - tests of the ebnf module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
                             

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
from multiprocessing import Pool
Eckhart Arnold's avatar
Eckhart Arnold committed
25

Eckhart Arnold's avatar
Eckhart Arnold committed
26
27
sys.path.extend(['../', './'])

Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.toolkit import compile_python_object, get_config_value, set_config_value, re
29
from DHParser.preprocess import nil_preprocessor
30
from DHParser import compile_source
31
from DHParser.error import has_errors, Error
32
from DHParser.syntaxtree import WHITESPACE_PTYPE
eckhart's avatar
eckhart committed
33
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, \
34
35
    get_ebnf_compiler, compile_ebnf, DHPARSER_IMPORTS
from DHParser.dsl import CompilationError, compileDSL, grammar_provider
36
from DHParser.testing import grammar_unit
37
38


39
40
41
42
43
44
45
46
47
48
49
class TestDirectives:
    mini_language = """
        expression =  term  { ("+" | "-") term }
        term       =  factor  { ("*" | "/") factor }
        factor     =  constant | "("  expression  ")"
        constant   =  digit { digit } [ //~ ]
        digit      = /0/ | /1/ | /2/ | /3/ | /4/ | /5/ | /6/ | /7/ | /8/ | /9/ 
        """

    def test_whitespace_linefeed(self):
        lang = "@ whitespace = linefeed\n" + self.mini_language
50
        MinilangParser = grammar_provider(lang)
51
52
        parser = MinilangParser()
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
53
        syntax_tree = parser("3 + 4 * 12")
54
        # parser.log_parsing_history("WSP")
55
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
56
        syntax_tree = parser("3 + 4 \n * 12")
57
        # parser.log_parsing_history("WSPLF")
58
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
59
        syntax_tree = parser("3 + 4 \n \n * 12")
60
        assert syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
61
        syntax_tree = parser("3 + 4 \n\n * 12")
62
        assert syntax_tree.errors_sorted
63
64
65

    def test_whitespace_vertical(self):
        lang = "@ whitespace = vertical\n" + self.mini_language
66
        parser = grammar_provider(lang)()
67
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
68
        syntax_tree = parser("3 + 4 * 12")
69
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
70
        syntax_tree = parser("3 + 4 \n * 12")
71
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
72
        syntax_tree = parser("3 + 4 \n \n * 12")
73
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
74
        syntax_tree = parser("3 + 4 \n\n * 12")
75
        assert not syntax_tree.errors_sorted
76

77
78
    def test_whitespace_horizontal(self):
        lang = "@ whitespace = horizontal\n" + self.mini_language
79
        parser = grammar_provider(lang)()
80
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
81
        syntax_tree = parser("3 + 4 * 12")
82
        assert not syntax_tree.errors_sorted
Eckhart Arnold's avatar
Eckhart Arnold committed
83
        syntax_tree = parser("3 + 4 \n * 12")
84
        assert syntax_tree.errors_sorted
85

86

Eckhart Arnold's avatar
Eckhart Arnold committed
87
88
89
90
91
92
93
94
95
96
97
98
class TestReservedSymbols:
    def test_comment_usage(self):
        lang = r"""
        @comment = /#.*(?:\n|$)/
        document = text [ COMMENT__ ]
        text = /[^#]+/
        """
        parser = grammar_provider(lang)()

    def test_whitespace(self):
        lang = r"""
        @whitespace = /\s*/
99
        document = WSP_RE__ { word WSP_RE__ }
Eckhart Arnold's avatar
Eckhart Arnold committed
100
101
102
103
104
105
106
107
        word = /\w+/ 
        """
        parser = grammar_provider(lang)()

    def test_mixin(self):
        lang = r"""
        @comment = /#.*(?:\n|$)/
        @whitespace = /\s*/
108
        document = WSP_RE__ { word WSP_RE__ }
Eckhart Arnold's avatar
Eckhart Arnold committed
109
110
111
112
113
114
115
        word = /\w+/ 
        """
        parser = grammar_provider(lang)()
        result = parser("test # kommentar")
        assert not result.error_flag, str(result.as_sxpr())


116
class TestEBNFParser:
Eckhart Arnold's avatar
Eckhart Arnold committed
117
    cases = {
118
119
120
121
122
123
124
        "list_": {
            "match": {
                1: "hund",
                2: "hund, katze,maus",
                3: "hund , katze"
            },
            "fail": {
125
126
127
                4: "123",
                5: '"literal"',
                6: "/regexp/"
128
129
130
131
            }
        }
    }

132
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
133
        self.EBNF = get_ebnf_grammar()
134

135
136
    def test_RE(self):
        gr = get_ebnf_grammar()
137
        m = gr.regexp.parsers[0].regexp.match(r'/[\\\\]/ xxx /')
138
139
140
141
        rs = m.group()
        assert rs.find('x') < 0, rs.group()
        rx = re.compile(rs[1:-1])
        assert rx.match(r'\\')
142

143
    def test_literal(self):
144
        snippet = '"text" '
Eckhart Arnold's avatar
Eckhart Arnold committed
145
        result = self.EBNF(snippet, 'literal')
146
147
        assert not result.error_flag
        assert str(result) == snippet
148
        assert result.select(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
149

150
151
152
        result = self.EBNF('"text" ', 'literal')
        assert not result.error_flag
        result = self.EBNF(' "text"', 'literal')
153
154
        assert result.error_flag  # literals catch following, but not leading whitespace

155
156
157
158
159
160
161
162
    def test_plaintext(self):
        result = self.EBNF('`plain`', 'plaintext')
        assert not result.error_flag

    def test_list(self):
        grammar_unit(self.cases, get_ebnf_grammar, get_ebnf_transformer)


163

164
165
166
167
168
169
170
171
172
173
174
class TestParserNameOverwriteBug:
    def test_term_bug(self):
        grammar = get_ebnf_grammar()
        st = grammar('impossible = [§"an optional requirement"]')
        get_ebnf_transformer()(st)
        lang = """series = "A" "B" §"C" "D"
        """
        parser = get_ebnf_grammar()
        st = grammar(lang)
        get_ebnf_transformer()(st)
        result = get_ebnf_compiler()(st)
175
        messages = st.errors_sorted
176
177
        assert not has_errors(messages), str(messages)

178
179
180
181
182
183
184
185
186
    def test_single_mandatory_bug(self):
        lang = """series = § /B/"""
        result, messages, ast = compile_ebnf(lang)
        assert result.find('Required') < 0
        parser = grammar_provider(lang)()
        st = parser('B')
        assert not st.error_flag


187

188
189
class TestSemanticValidation:
    def check(self, minilang, bool_filter=lambda x: x):
Eckhart Arnold's avatar
Eckhart Arnold committed
190
        grammar = get_ebnf_grammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
191
        st = grammar(minilang)
192
        assert not st.errors_sorted
193
        EBNFTransform()(st)
194
        assert bool_filter(st.errors_sorted)
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

    def test_illegal_nesting(self):
        self.check('impossible = { [ "an optional requirement" ] }')

    def test_illegal_nesting_option_required(self):
        self.check('impossible = [ §"an optional requirement" ]')

    def test_illegal_nesting_oneormore_option(self):
        self.check('impossible = { [ "no use"] }+')

    def test_legal_nesting(self):
        self.check('possible = { [ "+" ] "1" }', lambda x: not x)


class TestCompilerErrors:
    def test_error_propagation(self):
211
        ebnf = "@ literalws = wrongvalue  # testing error propagation\n"
Eckhart Arnold's avatar
Eckhart Arnold committed
212
213
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('ErrorPropagationTest'))
214
215
        assert messages

216
217
218
    def test_undefined_symbols(self):
        """Use of undefined symbols should be reported.
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
219
220
221
        save = get_config_value('static_analysis')
        set_config_value('static_analysis', 'early')

222
223
224
225
226
        ebnf = """syntax = { intermediary }
                  intermediary = "This symbol is " [ badly_spelled ] "!"
                  bedly_spilled = "wrong" """
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
Eckhart Arnold's avatar
Eckhart Arnold committed
227
        # print(messages)
228
229
        assert messages

Eckhart Arnold's avatar
Eckhart Arnold committed
230
231
        set_config_value('static_analysis', save)

232
233
234
    def test_no_error(self):
        """But reserved symbols should not be repoted as undefined.
        """
235
        ebnf = """nothing =  WSP_RE__ | COMMENT__\n"""
236
237
238
239
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
        assert not bool(messages), messages

240

241
class TestSelfHosting:
Eckhart Arnold's avatar
Eckhart Arnold committed
242
243
244
    grammar = r"""
        # EBNF-Grammar in EBNF

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
        @ comment    = /#.*(?:\n|$)/                    # comments start with '#' and eat all chars up to and including '\n'
        @ whitespace = /\s*/                            # whitespace includes linefeed
        @ literalws  = right                            # trailing whitespace of literals will be ignored tacitly
        
        syntax     = [~//] { definition | directive } §EOF
        definition = symbol §"=" expression
        directive  = "@" §symbol "=" (regexp | literal | symbol) { "," (regexp | literal | symbol) }
        
        expression = term { "|" term }
        term       = { ["§"] factor }+                       # "§" means all following factors mandatory
        factor     = [flowmarker] [retrieveop] symbol !"="   # negative lookahead to be sure it's not a definition
                   | [flowmarker] literal
                   | [flowmarker] plaintext
                   | [flowmarker] regexp
                   | [flowmarker] whitespace
                   | [flowmarker] oneormore
                   | [flowmarker] group
                   | [flowmarker] unordered
                   | repetition
                   | option
        
        flowmarker = "!"  | "&"                         # '!' negative lookahead, '&' positive lookahead
                   | "-!" | "-&"                        # '-' negative lookbehind, '-&' positive lookbehind
        retrieveop = "::" | ":"                         # '::' pop, ':' retrieve
        
        group      = "(" §expression ")"
        unordered  = "<" §expression ">"                # elements of expression in arbitrary order
        oneormore  = "{" expression "}+"
        repetition = "{" §expression "}"
        option     = "[" §expression "]"
        
        symbol     = /(?!\d)\w+/~                       # e.g. expression, factor, parameter_list
        literal    = /"(?:[^"]|\\")*?"/~                # e.g. "(", '+', 'while'
                   | /'(?:[^']|\\')*?'/~                # whitespace following literals will be ignored tacitly.
        plaintext  = /`(?:[^"]|\\")*?`/~                # like literal but does not eat whitespace
        regexp     = /\/(?:\\\/|[^\/])*?\//~            # e.g. /\w+/, ~/#.*(?:\n|$)/~
        whitespace = /~/~                               # insignificant whitespace
        
        EOF = !/./
Eckhart Arnold's avatar
Eckhart Arnold committed
284
285
        """

286
287
    def test_self(self):
        compiler_name = "EBNF"
Eckhart Arnold's avatar
Eckhart Arnold committed
288
289
290
291
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
292
293
294
        assert not errors, str(errors)
        # compile the grammar again using the result of the previous
        # compilation as parser
295
        compileDSL(self.grammar, nil_preprocessor, result, get_ebnf_transformer(), compiler)
296

Eckhart Arnold's avatar
Eckhart Arnold committed
297
298
299
300
301
302
303
304
305
    def multiprocessing_task(self):
        compiler_name = "EBNF"
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
        return errors

    def test_multiprocessing(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
306
        with Pool() as pool:
Eckhart Arnold's avatar
Eckhart Arnold committed
307
            res = [pool.apply_async(self.multiprocessing_task, ()) for i in range(4)]
308
            errors = [r.get(timeout=10) for r in res]
Eckhart Arnold's avatar
Eckhart Arnold committed
309
310
        for i, e in enumerate(errors):
            assert not e, ("%i: " % i) + str(e)
311
312


313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
class TestBoundaryCases:
    def setup(self):
        self.gr = get_ebnf_grammar()
        self.tr = get_ebnf_transformer()
        self.cp = get_ebnf_compiler()

    def test_empty_grammar(self):
        t = self.gr("")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_single_statement_grammar(self):
        t = self.gr("i = /i/")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_two_statement_grammar(self):
        t = self.gr("i = k {k}\nk = /k/")
        self.tr(t)
        r = self.cp(t)
        assert r

337
338
339
340
    def test_unconnected_symbols(self):
        ebnf = """root = /.*/
                  unconnected = /.*/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
341
342
343
344
345
346
347
348
        result, messages, AST = compile_source(ebnf, nil_preprocessor,
                                               get_ebnf_grammar(),
                                               get_ebnf_transformer(),
                                               get_ebnf_compiler())
        if messages:
            assert not has_errors(messages), "Unconnected rules should result in a warning, " \
                "not an error: " + str(messages)
            grammar_src = result
349
            grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
350
                                            r'get_(?:\w+_)?grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
351
352
353
        else:
            assert False, "EBNF compiler should warn about unconnected rules."

354
355
356
357
        assert grammar['root'], "Grammar objects should be subscriptable by parser names!"
        try:
            unconnected = grammar['unconnected']
        except KeyError:
Eckhart Arnold's avatar
Eckhart Arnold committed
358
            assert False, "Grammar objects should be able to cope with unconnected parsers!"
359
360
361
362
363
364
        try:
            nonexistant = grammar['nonexistant']
            assert False, "Grammar object shoul raise a KeyError if subscripted by " \
                          "a non-existant parser name!"
        except KeyError:
            pass
365
366
367
368
369
370
371


class TestSynonymDetection:
    def test_synonym_detection(self):
        ebnf = """a = b
                  b = /b/
        """
372
        grammar = grammar_provider(ebnf)()
di68kap's avatar
di68kap committed
373
374
        assert grammar['a'].pname == 'a', grammar['a'].pname
        assert grammar['b'].pname == 'b', grammar['b'].pname
375
        assert grammar('b').as_sxpr().count('b') == 2
376

377

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
class TestFlowControlOperators:
    def setup(self):
        self.t1 = """
        All work and no play 
        makes Jack a dull boy
        END
        """
        self.t2 = "All word and not play makes Jack a dull boy END\n"

    def test_lookbehind_indirect(self):
        lang = r"""
            document = ws sequence doc_end ws         
            sequence = { !end word ws }+
            doc_end  = -&SUCC_LB end        
            ws       = /\s*/
            end      = /END/
            word     = /\w+/
            SUCC_LB  = indirection
396
            indirection = /\s*?\n/
397
398
399
400
401
        """
        parser = grammar_provider(lang)()
        cst = parser(self.t1)
        assert not cst.error_flag, cst.as_sxpr()
        cst = parser(self.t2)
402
        # this should fail, because 'END' is not preceded by a line feed
403
404
        assert cst.error_flag, cst.as_sxpr()

405
406
407
408
    def test_required_error_reporting(self):
        """Tests whether failures to comply with the required operator '§'
        are correctly reported as such.
        """
409
        lang1 = r"nonsense == /\w+/~  # wrong_equal_sign"
410
411
412
413
414
415
416
417
418
419
420
421
        lang2 = "nonsense = [^{}%]+  # someone forgot the '/'-delimiters for regular expressions"
        try:
            parser_class = grammar_provider(lang1)
            assert False, "Compilation error expected."
        except CompilationError as error:
            pass
        try:
            parser_class = grammar_provider(lang2)
            assert False, "Compilation error expected."
        except CompilationError as error:
            pass

422

423
424
425
426
427
428
429
430
431
432
433
434
435
class TestWhitespace:
    def test_whitespace(self):
        tail = r"""
            WORD     =  /\w+/~
            EOF      =  !/./
        """
        lang1 = r'document = "DOC" { WORD } EOF' + tail
        parser = grammar_provider(lang1)()
        cst = parser("DOC Wörter Wörter Wörter")
        assert not cst.error_flag
        cst = parser("DOCWörter Wörter Wörter")
        assert not cst.error_flag

436
        lang2 = r'document = `DOC`{ WORD } EOF' + tail
437
438
439
440
441
442
        parser = grammar_provider(lang2)()
        cst = parser("DOC Wörter Wörter Wörter")
        assert cst.error_flag
        cst = parser("DOCWörter Wörter Wörter")
        assert not cst.error_flag

443
        lang3 = r'document = `DOC`~ { WORD } EOF' + tail
444
445
446
447
448
449
450
        parser = grammar_provider(lang3)()
        cst = parser("DOC Wörter Wörter Wörter")
        assert not cst.error_flag
        cst = parser("DOCWörter Wörter Wörter")
        assert not cst.error_flag


451
452
453
454
class TestAllSome:
    def test_all(self):
        ebnf = 'prefix = <"A" "B">'
        grammar = grammar_provider(ebnf)()
455
        assert grammar('B A').content == 'B A'
456
457
458
459

    def test_some(self):
        ebnf = 'prefix = <"A" | "B">'
        grammar = grammar_provider(ebnf)()
460
461
        assert grammar('B A').content == 'B A'
        assert grammar('B').content == 'B'
462

463

464
class TestErrorCustomization:
465
    """
466
    Customized Errors replace existing errors with alternative
467
468
    error codes and messages that are more helptful to the user.
    """
469
    def test_customized_mandatory_continuation(self):
470
471
472
473
474
475
476
477
478
479
480
        lang = """
            document = series | /.*/
            @series_error = "a user defined error message"
            series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("X");  assert not st.error_flag
        st = parser("ABCD");  assert not st.error_flag
        st = parser("A_CD");  assert not st.error_flag
        st = parser("AB_D");  assert st.error_flag
481
482
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "a user defined error message"
483
484
        # transitivity of mandatory-operator
        st = parser("ABC_");  assert st.error_flag
485
486
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "a user defined error message"
487

488
    def test_customized_error_case_sensitive(self):
489
490
491
492
493
494
495
496
        lang = """
            document = Series | /.*/
            @Series_error = "a user defined error message"
            Series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("ABC_");  assert st.error_flag
497
498
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "a user defined error message"
499

500
    def test_multiple_error_messages(self):
501
        lang = r"""
502
503
504
            document = series | /.*/
            @series_error = '_', "the underscore is wrong in this place"
            @series_error = '*', "the asterix is wrong in this place"
505
            @series_error = /(?<=C)\w/, 'C cannot be followed by {0}'
506
507
508
509
510
511
512
            @series_error = /\w/, "wrong letter {0} in place of {1}"
            @series_error = "fallback error message"
            series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("AB*D");  assert st.error_flag
513
514
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "the asterix is wrong in this place"
515
516
        # transitivity of mandatory-operator
        st = parser("ABC_");  assert st.error_flag
517
518
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "the underscore is wrong in this place"
519
        st = parser("ABiD");  assert st.error_flag
520
521
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message.startswith('wrong letter')
522
        st = parser("AB+D");  assert st.error_flag
523
524
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message == "fallback error message"
525
        st = parser("ABCi");  assert st.error_flag
526
527
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
        assert st.errors_sorted[0].message.startswith('C cannot be followed by')
528
529
530


class TestErrorCustomizationErrors:
531
532
533
    def test_ambiguous_error_customization(self):
        lang = """
            document = series 
534
            @series_error = "ambiguous error message: does it apply to first or second '§'?"
535
536
537
538
539
540
            series = "A" § "B" "C" | "X" § "Y" "Z" 
            """
        try:
            parser = grammar_provider(lang)()
            assert False, "CompilationError because of ambiguous error message exptected!"
        except CompilationError as compilation_error:
Eckhart Arnold's avatar
Eckhart Arnold committed
541
            err = compilation_error.errors[0]
542
            assert err.code == Error.AMBIGUOUS_ERROR_HANDLING, str(compilation_error)
543
544

    def test_unsed_error_customization(self):
545
546
547
548
549
550
        lang = """
            document = series | other
            @other_error = "a user defined error message"
            series = "A" § "B" "C"
            other = "X" | "Y" | "Z"
            """
eckhart's avatar
eckhart committed
551
        result, messages, ast = compile_ebnf(lang)
552
        assert messages[0].code == Error.UNUSED_ERROR_HANDLING_WARNING
553

554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
    def test_multiple_resume_definitions(self):
        lang = """
            document = series
            @series_resume = /B/, /C/, /D/, /E/, /F/, /G/
            @series_resume = /X/, /Y/
            series = "A" §"B" "C" "D" "E" "F" "G"
            """
        result, messages, ast = compile_ebnf(lang)
        assert messages[0].code == Error.REDEFINED_DIRECTIVE

    def test_multiple_skip_definitions(self):
        lang = """
            document = series
            @series_skip = /B/, /C/, /D/, /E/, /F/, /G/
            @series_skip = /X/, /Y/
            series = "A" §"B" "C" "D" "E" "F" "G"
            """
        result, messages, ast = compile_ebnf(lang)
        assert messages[0].code == Error.REDEFINED_DIRECTIVE

574
575
576

class TestCustomizedResumeParsing:
    def setup(self):
577
        lang = r"""
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
            @ alpha_resume = 'BETA', GAMMA_STR
            @ beta_resume = GAMMA_RE
            @ bac_resume = /GA\w+/
            document = alpha [beta] gamma "."
            alpha = "ALPHA" abc
                abc = §"a" "b" "c"
              beta = "BETA" (bac | bca)
                bac = "b" "a" §"c"
                bca = "b" "c" §"a"
              gamma = "GAMMA" §(cab | cba)
                cab = "c" "a" §"b"
                cba = "c" "b" §"a"
            GAMMA_RE = /GA\w+/
            GAMMA_STR = "GAMMA"
            """
593
        self.gr = grammar_provider(lang)()
594
595
596
597
598
599
600
601
602
603

    def test_several_resume_rules_innermost_rule_matching(self):
        gr = self.gr
        content = 'ALPHA abc BETA bad GAMMA cab .'
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
604
        assert len(cst.errors_sorted) == 1
605

606
607
608
609
610
611
612
        content = 'ALPHA acb BETA bad GAMMA cab .'
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
613
        assert len(cst.errors_sorted) == 2
614

615
616
617
618
619
620
621
        content = 'ALPHA acb GAMMA cab .'
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
622
        assert len(cst.errors_sorted) == 1
623

624

625
626
627
class TestInSeriesResume:
    def setup(self):
        lang = """
628
629
630
631
            document = series
            @series_skip = /B/, /C/, /D/, /E/, /F/, /G/
            series = "A" §"B" "C" "D" "E" "F" "G"
            """
632
        self.gr = grammar_provider(lang)()
633
634
635
636
637

    def test_garbage_in_series(self):
        st = self.gr('ABCDEFG')
        assert not st.error_flag
        st = self.gr('AB XYZ CDEFG')
638
        errors = st.errors_sorted
639
640
        assert len(errors) == 1 and errors[0].code == Error.MANDATORY_CONTINUATION
        st = self.gr('AB XYZ CDE XYZ FG')
641
        errors = st.errors_sorted
642
643
        assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)
        st = self.gr('AB XYZ CDE XNZ FG')  # fails to resume parsing
644
        errors = st.errors_sorted
645
646
647
648
        assert len(errors) >= 1 and errors[0].code == Error.MANDATORY_CONTINUATION

    def test_series_gap(self):
        st = self.gr('ABDEFG')
649
        errors = st.errors_sorted
650
651
        assert len(errors) == 1 and errors[0].code == Error.MANDATORY_CONTINUATION
        st = self.gr('ABXEFG')  # two missing, one wrong element added
652
        errors = st.errors_sorted
653
654
        assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)
        st = self.gr('AB_DE_G')
655
        errors = st.errors_sorted
656
657
658
659
        assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)

    def test_series_permutation(self):
        st = self.gr('ABEDFG')
660
        errors = st.errors_sorted
661
662
        assert len(errors) >= 1  # cannot really recover from permutation errors

663

664
class TestAllOfResume:
665
666
667
    def setup(self):
        lang = """
            document = allof
668
669
670
            @ allof_error = '{} erwartet, {} gefunden :-('
            @ allof_skip = /A/, /B/, /C/, /D/, /E/, /F/, /G/
            allof = < "A" "B" § "C" "D" "E" "F" "G" >
671
        """
672
        self.gr = grammar_provider(lang)()
673
674

    def test_garbage_added(self):
675
676
        st = self.gr('GFCBAED')
        assert not st.error_flag
677
        st = self.gr('GFCB XYZ AED')
678
        errors = st.errors_sorted
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
        assert errors[0].code == Error.MANDATORY_CONTINUATION
        assert str(errors[0]).find(':-(') >= 0


    def test_allof_resume_later(self):
        lang = """
            document = flow "."
            @ flow_resume = '.'
            flow = allof | series
            @ allof_error = '{} erwartet, {} gefunden :-('
            allof = < "A" "B" § "C" "D" "E" "F" "G" >
            series = "E" "X" "Y" "Z"
        """
        gr = grammar_provider(lang)()
        st = gr('GFCBAED.')
        assert not st.error_flag
        st = gr('GFCBAED.')
        assert not st.error_flag
        st = gr('EXYZ.')
        assert not st.error_flag
        st = gr('EDXYZ.')
        assert st.error_flag
701
        assert len(st.errors_sorted) == 1
702
        st = gr('FCB_GAED.')
703
        assert len(st.errors_sorted) == 1
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725


    def test_complex_resume_task(self):
        lang = """
            document = flow { flow } "."
            @ flow_resume = '.'
            flow = allof | series
            @ allof_error = '{} erwartet, {} gefunden :-('
            @ allof_resume = 'E', 'A'
            allof = < "A" "B" § "C" "D" "E" "F" "G" >
            @ series_resume = 'E', 'A'
            series = "E" "X" §"Y" "Z"
        """
        gr = grammar_provider(lang)()
        st = gr('GFCBAED.')
        assert not st.error_flag
        st = gr('GFCBAED.')
        assert not st.error_flag
        st = gr('EXYZ.')
        assert not st.error_flag
        st = gr('EDXYZ.')
        assert st.error_flag
726
        assert len(st.errors_sorted) == 1
727
        st = gr('FCB_GAED.')
728
        assert len(st.errors_sorted) == 2
729
        st = gr('EXY EXYZ.')
730
        assert len(st.errors_sorted) == 1
731

732

733
class TestStaticAnalysis:
Eckhart Arnold's avatar
Eckhart Arnold committed
734
735
736
737
738
739
740
741
742
743
744
    def test_static_analysis(self):
        save = get_config_value('static_analysis')
        set_config_value('static_analysis', 'early')

        minilang = """forever = { // } \n"""
        try:
            parser_class = grammar_provider(minilang)
        except CompilationError as error:
            assert all(e.code == Error.INFINITE_LOOP for e in error.errors)

        set_config_value('static_analysis', save)
745

746

747
if __name__ == "__main__":
748
    from DHParser.testing import runner
749
    runner("", globals())