test_parse.py 28.8 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parse.py - tests of the parsers-module of DHParser
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
23
from functools import partial
24

Eckhart Arnold's avatar
Eckhart Arnold committed
25
sys.path.extend(['../', './'])
26

27
from DHParser.toolkit import compile_python_object
28
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
29
from DHParser.error import Error
eckhart's avatar
eckhart committed
30
31
32
from DHParser.parse import Retrieve, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
    RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
    UnknownParserError
33
from DHParser import compile_source
Eckhart Arnold's avatar
Eckhart Arnold committed
34
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
35
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
36
37


eckhart's avatar
eckhart committed
38
39
40
41
42
43
44
45
46
47
class TestParserClass:
    def test_apply(self):
        minilang ="""
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
        gr = grammar_provider(minilang)()
        l = []
        def visitor(p: Parser):
di68kap's avatar
di68kap committed
48
            l.append(p.pname + p.ptype)
eckhart's avatar
eckhart committed
49
50
51
52
53
54
55
56
57
58
59
60
        gr.root__.apply(visitor)
        s1 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s2 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s3 = ", ".join(l)
        # print(s1);  print(s2);  print(s3)
        assert s1 == s2 == s3


61
class TestInfiLoopsAndRecursion:
62
    def test_direct_left_recursion1(self):
eckhart's avatar
eckhart committed
63
        minilang ="""
64
65
66
67
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
68
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
69
        parser = grammar_provider(minilang)()
70
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
71
        syntax_tree = parser(snippet)
72
        assert not syntax_tree.error_flag, str(syntax_tree.errors())
73
        assert snippet == str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
74
        if is_logging():
75
            log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
76
            log_parsing_history(parser, "test_LeftRecursion_direct")
77

78
    def test_direct_left_recursion2(self):
79
80
81
82
83
84
        minilang = """
            expr = ex
            ex   = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
85
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
86
        parser = grammar_provider(minilang)()
87
88
        assert parser
        syntax_tree = parser(snippet)
89
        assert not syntax_tree.error_flag, syntax_tree.errors()
90
        assert snippet == str(syntax_tree)
91

92
    def test_indirect_left_recursion1(self):
93
94
        minilang = """
            Expr    = //~ (Product | Sum | Value)
95
96
            Product = Expr { ('*' | '/') Expr }+
            Sum     = Expr { ('+' | '-') Expr }+
97
98
            Value   = /[0-9.]+/~ | '(' Expr ')'
            """
99
        parser = grammar_provider(minilang)()
100
        assert parser
101
        snippet = "8 * 4"
102
        syntax_tree = parser(snippet)
103
        assert not syntax_tree.error_flag, syntax_tree.errors()
104
105
        snippet = "7 + 8 * 4"
        syntax_tree = parser(snippet)
106
        assert not syntax_tree.error_flag, syntax_tree.errors()
107
108
        snippet = "9 + 8 * (4 + 3)"
        syntax_tree = parser(snippet)
109
        assert not syntax_tree.error_flag, syntax_tree.errors()
110
        assert snippet == str(syntax_tree)
111
        if is_logging():
112
            log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
113
            log_parsing_history(parser, "test_LeftRecursion_indirect")
114

eckhart's avatar
eckhart committed
115
116
117
    def test_infinite_loops(self):
        minilang = """not_forever = { // } \n"""
        snippet = " "
118
        parser = grammar_provider(minilang)()
Eckhart Arnold's avatar
Eckhart Arnold committed
119
        syntax_tree = parser(snippet)
120
        assert syntax_tree.error_flag
eckhart's avatar
eckhart committed
121
        # print(syntax_tree.as_sxpr())
122
        # print(syntax_tree.errors())
123

124

Eckhart Arnold's avatar
Eckhart Arnold committed
125
class TestFlowControl:
126
127
    def setup(self):
        self.t1 = """
eckhart's avatar
eckhart committed
128
        All work and no play
129
130
131
132
133
        makes Jack a dull boy
        END
        """
        self.t2 = "All word and not play makes Jack a dull boy END\n"

Eckhart Arnold's avatar
Eckhart Arnold committed
134
135
136
    def test_lookbehind(self):
        ws = RegExp('\s*')
        end = RegExp("END")
137
        doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
Eckhart Arnold's avatar
Eckhart Arnold committed
138
139
140
141
142
        word = RegExp('\w+')
        sequence = OneOrMore(NegativeLookahead(end) + word + ws)
        document = ws + sequence + doc_end + ws

        parser = Grammar(document)
143
144
145
146
147
148
149
        cst = parser(self.t1)
        assert not cst.error_flag, cst.as_sxpr()
        cst = parser(self.t2)
        assert cst.error_flag, cst.as_sxpr()

    def test_lookbehind_indirect(self):
        class LookbehindTestGrammar(Grammar):
eckhart's avatar
eckhart committed
150
            parser_initialization__ = ["upon instantiation"]
151
152
            ws = RegExp('\\s*')
            end = RegExp('END')
153
            SUCC_LB = RegExp('\\s*?\\n')
154
155
156
157
158
159
160
161
            doc_end = Series(Lookbehind(SUCC_LB), end)
            word = RegExp('\w+')
            sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
            document = Series(ws, sequence, doc_end, ws)
            root__ = document

        parser = LookbehindTestGrammar()
        cst = parser(self.t1)
Eckhart Arnold's avatar
Eckhart Arnold committed
162
        assert not cst.error_flag, cst.as_sxpr()
163
        cst = parser(self.t2)
Eckhart Arnold's avatar
Eckhart Arnold committed
164
165
166
        assert cst.error_flag, cst.as_sxpr()


167
168
169
170
171
172
173
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
174
175
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
176
        assert result
177
        assert not messages, str(messages)
178
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
eckhart's avatar
eckhart committed
179
180
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
181
        assert node.tag_name == "regex"
182
183
        assert str(node) == 'abc+def'

184
185
186
187
188
189
190
191
192
193
194
195
196
    def test_multilineRegex_wo_Comments(self):
        mlregex = r"""
        regex =  /\w+ 
                  [+]  
                  \w* /
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages, str(messages)
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
197
        assert node.tag_name == "regex"
198
199
        assert str(node) == 'abc+def'

200
201
202
203
204
205
206
207
208
209
210
211
212
213
    def text_ignore_case(self):
        mlregex = r"""
        @ ignorecase = True
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('Alpha')
        assert node
        assert not node.error_flag
        assert rest == ''
214
        assert node.tag_name == "regex"
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
        assert str(node) == 'Alpha'

        mlregex = r"""
        @ ignorecase = False
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('Alpha')
        assert node.error_flag


230
231
232
233
234
235
236
237
238
239
240
241
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
Eckhart Arnold's avatar
Eckhart Arnold committed
242
243
        result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
                                    get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
244
        assert result
eckhart's avatar
eckhart committed
245
        assert not messages, str(messages)
246
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
247
        result = parser(testdoc)
248
        # log_parsing_history(parser, "test.log")
249
250
        assert not result.error_flag

251

252
class TestGrammar:
253
    def setup(self):
254
255
256
257
258
259
        grammar = r"""@whitespace = horizontal
        haupt        = textzeile LEERZEILE
        textzeile    = { WORT }+
        WORT         = /[^ \t]+/~
        LEERZEILE    = /\n[ \t]*(?=\n)/~
        """
260
261
262
        self.pyparser, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
                                                              get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
        assert self.pyparser
263
        assert not messages
264
265
266
267

    def test_pos_values_initialized(self):
        # checks whether pos values in the parsing result and in the
        # history record have been initialized
268
        with logging("LOGS"):
269
270
271
            grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
            grammar("no_file_name*")
        for record in grammar.history__:
272
273
            assert not record.node or record.node.pos >= 0

274
    def test_select_parsing(self):
275
276
277
278
279
        grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
        grammar("wort", "WORT")
        grammar("eine Zeile", "textzeile")
        grammar("kein Haupt", "haupt")
        grammar("so ist es richtig", "haupt")
280

di68kap's avatar
di68kap committed
281
282
283
284
285
286
287
288
289
    def test_grammar_subclassing(self):
        class Arithmetic(Grammar):
            '''
            expression =  term  { ("+" | "-") term }
            term       =  factor  { ("*" | "/") factor }
            factor     =  INTEGER | "("  expression  ")"
            INTEGER    =  /\d+/~
            '''
            expression = Forward()
290
291
292
293
            INTEGER = RE('\\d+')
            factor = INTEGER | TKN("(") + expression + TKN(")")
            term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
            expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
di68kap's avatar
di68kap committed
294
295
296
297
298
299
            root__ = expression

        grammar = Arithmetic()
        CST = grammar('3+4')
        assert not CST.error_flag, CST.as_sxpr()

300

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
class TestSeries:
    def test_non_mandatory(self):
        lang = """
        document = series | /.*/
        series = "A" "B" "C" "D"
        """
        parser = grammar_provider(lang)()
        st = parser("ABCD");
        assert not st.error_flag
        st = parser("A_CD");
        assert not st.error_flag
        st = parser("AB_D");
        assert not st.error_flag

    def test_mandatory(self):
        """Test for the §-operator. The Series-parser should raise an
        error for any non-match that occurs after the mandatory-operator.
        """
        lang = """
        document = series | /.*/
        series = "A" "B" §"C" "D"
        """
        parser = grammar_provider(lang)()
324
325
326
        st = parser("ABCD");  assert not st.error_flag
        st = parser("A_CD");  assert not st.error_flag
        st = parser("AB_D");  assert st.error_flag
327
328
        # print(st.errors())
        assert st.errors()[0].code == Error.MANDATORY_CONTINUATION
329
        # transitivity of mandatory-operator
330
        st = parser("ABC_");  assert st.error_flag
331
        assert st.errors()[0].code == Error.MANDATORY_CONTINUATION
332
333

    def test_series_composition(self):
334
        TA, TB, TC, TD, TE = (TKN(b) for b in "ABCDE")
335
336
337
338
339
        s1 = Series(TA, TB, TC, mandatory=2)
        s2 = Series(TD, TE)

        combined = Alternative(s1 + s2, RegExp('.*'))
        parser = Grammar(combined)
340
341
342
        st = parser("ABCDE");  assert not st.error_flag
        st = parser("A_CDE");  assert not st.error_flag
        st = parser("AB_DE");  assert st.error_flag
343
        assert st.errors()[0].code == Error.MANDATORY_CONTINUATION
344
        st = parser("ABC_E");  assert st.error_flag
345
        assert st.errors()[0].code == Error.MANDATORY_CONTINUATION
346
347
348

        combined = Alternative(s2 + s1, RegExp('.*'))
        parser = Grammar(combined)
349
350
351
352
353
354
        st = parser("DEABC");  assert not st.error_flag
        st = parser("_EABC");  assert not st.error_flag
        st = parser("D_ABC");  assert not st.error_flag
        st = parser("DE_BC");  assert not st.error_flag
        st = parser("DEA_C");  assert not st.error_flag
        st = parser("DEAB_");  assert st.error_flag
355
        assert st.errors()[0].code == Error.MANDATORY_CONTINUATION
356

eckhart's avatar
eckhart committed
357
358
359
360
361
362
363
364
365
366
    # def test_boundary_cases(self):
    #     lang = """
    #     document = series | §!single | /.*/
    #     series = "A" "B" §"C" "D"
    #     single = "E"
    #     """
    #     parser_class = grammar_provider(lang)
    #     parser = parser_class()
    #     print(parser.python_src__)
    #     print(parser_class.python_src__)
eckhart's avatar
eckhart committed
367

368

369
370
371
class TestAllOfSomeOf:
    def test_allOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
372
        prefixes = AllOf(TKN("A"), TKN("B"))
373
374
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
375
        # aternative Form
376
        prefixes = AllOf(Series(TKN("B"), TKN("A")))
377
        assert Grammar(prefixes)('A B').content == 'A B'
378
379
380
381

    def test_allOf_completeness(self):
        """Test that an error is raised if not  all parsers of an AllOf-List
        match."""
382
        prefixes = AllOf(TKN("A"), TKN("B"))
383
384
385
386
387
        assert Grammar(prefixes)('B').error_flag

    def test_allOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
388
        prefixes = AllOf(TKN("A"), TKN("B"), TKN("A"))
389
390
391
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
392
393
394
395
        assert Grammar(prefixes)('A B B').error_flag

    def test_someOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
396
        prefixes = SomeOf(TKN("A"), TKN("B"))
397
398
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
399
        # aternative Form
400
        prefixes = SomeOf(Alternative(TKN("B"), TKN("A")))
401
402
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B').content == 'B'
403
404
405
406

    def test_someOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
407
        prefixes = SomeOf(TKN("A"), TKN("B"), TKN("A"))
408
409
410
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
411
412
413
        assert Grammar(prefixes)('A B B').error_flag


414
415
416
417
class TestPopRetrieve:
    mini_language = """
        document       = { text | codeblock }
        codeblock      = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
418
        delimiter      = delimiter_sign  # never use delimiter between capture and pop except for retrival!
419
        delimiter_sign = /`+/
eckhart's avatar
eckhart committed
420
        text           = /[^`]+/
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
        """
    mini_lang2 = """
        @braces_filter=counterpart
        document       = { text | codeblock }
        codeblock      = braces { text | opening_braces | (!:braces closing_braces) } ::braces
        braces         = opening_braces
        opening_braces = /\{+/
        closing_braces = /\}+/
        text           = /[^{}]+/
        """
    mini_lang3 = """
        document       = { text | env }
        env            = (specialtag | opentag) text [closespecial | closetag]
        opentag        = "<" name ">"
        specialtag     = "<" /ABC/ !name ">"
eckhart's avatar
eckhart committed
436
        closetag       = close_slash | close_star
437
438
439
440
441
442
443
444
        close_slash    = "<" ::name "/>"
        close_star     = "<" ::name "*>"
        closespecial   = "<" /ABC/~ ">"
        name           = /\w+/~
        text           = /[^<>]+/
        """

    def setup(self):
445
446
447
        self.minilang_parser = grammar_provider(self.mini_language)()
        self.minilang_parser2 = grammar_provider(self.mini_lang2)()
        self.minilang_parser3 = grammar_provider(self.mini_lang3)()
448
449
450

    @staticmethod
    def opening_delimiter(node, name):
451
        return node.tag_name == name # and not isinstance(node.parser, Retrieve)
452
453
454

    @staticmethod
    def closing_delimiter(node):
455
456
        return node.tag_name in {':Pop', ':Retrieve'}
        # return isinstance(node.parser, Retrieve)
457
458
459
460
461
462
463
464
465

    def test_compile_mini_language(self):
        assert self.minilang_parser
        assert self.minilang_parser2
        assert self.minilang_parser3

    def test_stackhandling(self):
        ambigous_opening = "<ABCnormal> normal tag <ABCnormal*>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
466
        assert not syntax_tree.error_flag, str(syntax_tree.errors())
467
468
469

        ambigous_opening = "<ABCnormal> normal tag <ABCnormal/>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
470
        assert not syntax_tree.error_flag, str(syntax_tree.errors())
471
472
473

        forgot_closing_tag = "<em> where is the closing tag?"
        syntax_tree = self.minilang_parser3(forgot_closing_tag)
474
        assert syntax_tree.error_flag, str(syntax_tree.errors())
475
476
477

        proper = "<em> has closing tag <em/>"
        syntax_tree = self.minilang_parser3(proper)
478
        assert not syntax_tree.error_flag, str(syntax_tree.errors())
479
480
481

        proper = "<em> has closing tag <em*>"
        syntax_tree = self.minilang_parser3(proper)
482
        assert not syntax_tree.error_flag, str(syntax_tree.errors())
483

484
    def test_cache_neutrality(self):
485
486
        """Test that packrat-caching does not interfere with the variable-
        changing parsers: Capture and Retrieve."""
487
488
489
490
491
492
493
494
495
496
        lang = """
            text = opening closing
            opening = (unmarked_package | marked_package)
            closing = ::variable
            unmarked_package = package "."
            marked_package = package "*" "."
            package = "(" variable ")"
            variable = /\w+/~
            """
        case = "(secret)*. secret"
497
        gr = grammar_provider(lang)()
498
        st = gr(case)
499
        assert not st.error_flag, str(st.errors())
500
501
502
503

    def test_single_line(self):
        teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
        syntax_tree = self.minilang_parser(teststr)
504
        assert not syntax_tree.errors()
505
506
        delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select(self.closing_delimiter)))
507
508
        assert delim == pop
        if is_logging():
509
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
510
511
512
513
514
515
516
517
518
519
520

    def test_multi_line(self):
        teststr = """
            Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde

            Absatz ohne ``` codeblock, aber
            das stellt sich erst am Ende herause...

            Mehrzeliger ```code block
            """
        syntax_tree = self.minilang_parser(teststr)
521
        assert not syntax_tree.errors()
522
523
        delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select(self.closing_delimiter)))
524
525
        assert delim == pop
        if is_logging():
526
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
527
528
529
530

    def test_single_line_complement(self):
        teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
        syntax_tree = self.minilang_parser2(teststr)
531
        assert not syntax_tree.errors()
532
533
        delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select(self.closing_delimiter)))
534
535
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
536
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
537
538
539
540
541
542
543
544
545
546
547

    def test_multi_line_complement(self):
        teststr = """
            Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende

            Absatz ohne {{{ codeblock, aber
            das stellt sich erst am Ende heraus...

            Mehrzeliger }}}code block
            """
        syntax_tree = self.minilang_parser2(teststr)
548
        assert not syntax_tree.errors()
549
550
        delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select(self.closing_delimiter)))
551
552
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
553
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
554
555


556
class TestWhitespaceHandling:
Eckhart Arnold's avatar
Eckhart Arnold committed
557
    minilang = """
558
559
560
561
562
563
564
565
566
        doc = A B
        A = "A"
        B = "B"
        Rdoc = ar br
        ar = /A/
        br = /B/
        """

    def setup(self):
567
        self.gr = grammar_provider(self.minilang)()
568
569
570
571
572
573
574
575
576
577
578
579

    def test_token_whitespace(self):
        st = self.gr("AB", 'doc')
        assert not st.error_flag
        st = self.gr("A B", 'doc')
        assert not st.error_flag

    def test_regexp_whitespace(self):
        st = self.gr("AB", 'Rdoc')
        assert not st.error_flag
        st = self.gr("A B", 'Rdoc')
        assert st.error_flag
di68kap's avatar
di68kap committed
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599


class TestErrorReporting:
    grammar = """
        root      = series alpha | anything
        series    = subseries &alpha 
        subseries = alpha §beta
        alpha     = /[a-z]+/
        beta      = /[A-Z]+/
        anything  = /.*/
        """

    def setup(self):
        self.parser = grammar_provider(self.grammar)()

    def test_error_propagation(self):
        testcode1 = "halloB"
        testcode2 = "XYZ"
        testcode3 = "hallo "
        cst = self.parser(testcode1)
600
        assert not cst.error_flag, str(cst.errors())
di68kap's avatar
di68kap committed
601
602
603
604
        cst = self.parser(testcode2)
        assert not cst.error_flag
        cst = self.parser(testcode3)
        assert cst.error_flag
605
606


di68kap's avatar
di68kap committed
607
608
609
610
611
612
613
class TestBorderlineCases:
    def test_not_matching(self):
        minilang = """parser = /X/"""
        gr = grammar_provider(minilang)()
        cst = gr('X', 'parser')
        assert not cst.error_flag
        cst = gr(' ', 'parser')
614
        assert cst.error_flag and cst.errors()[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
615
        cst = gr('', 'parser')
616
        assert cst.error_flag and cst.errors()[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
617
618
619
620
621
622
623

    def test_matching(self):
        minilang = """parser = /.?/"""
        gr = grammar_provider(minilang)()
        cst = gr(' ', 'parser')
        assert not cst.error_flag
        cst = gr('  ', 'parser')
624
        assert cst.error_flag and cst.errors()[0].code == Error.PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
625
626
627
628
        cst = gr('', 'parser')
        assert not cst.error_flag


Eckhart Arnold's avatar
Eckhart Arnold committed
629
class TestReentryAfterError:
630
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
631
632
633
634
635
        lang = """
        document = alpha [beta] gamma "."
          alpha = "ALPHA" abc
            abc = §"a" "b" "c"
          beta = "BETA" (bac | bca)
636
637
            bac = "b" "a" §"c"
            bca = "b" "c" §"a"
Eckhart Arnold's avatar
Eckhart Arnold committed
638
639
640
641
          gamma = "GAMMA" §(cab | cba)
            cab = "c" "a" §"b"
            cba = "c" "b" §"a"
        """
642
        self.gr = grammar_provider(lang)()
643

644
645
    def test_no_resume_rules(self):
        gr = self.gr;  gr.resume_rules = dict()
646
647
        content = 'ALPHA acb BETA bac GAMMA cab .'
        cst = gr(content)
648
649
650
651
652
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

eckhart's avatar
eckhart committed
653
654
655
656
657
658
659
660
661
    def test_no_resume_rules_partial_parsing(self):
        gr = self.gr;  gr.resume_rules = dict()
        content = 'ALPHA acb'
        cst = gr(content, 'alpha')
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

662
663
    def test_simple_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
664
        gr.resume_rules__['alpha'] = ['BETA']
665
        content = 'ALPHA acb BETA bac GAMMA cab .'
666
667
668
669
670
671
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
672
        assert len(cst.errors()) == 1
673

674
675
    def test_failing_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
676
        gr.resume_rules__['alpha'] = ['XXX']
677
        content = 'ALPHA acb BETA bac GAMMA cab .'
678
679
680
681
682
683
684
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        # assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message

685
686
    def test_severl_reentry_points(self):
        gr = self.gr;  gr.resume_rules = dict()
687
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
688
        content = 'ALPHA acb BETA bac GAMMA cab .'
689
690
691
692
693
694
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
695
        assert len(cst.errors()) == 1
696

697
698
    def test_several_reentry_points_second_point_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
699
700
701
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        content = 'ALPHA acb GAMMA cab .'
        cst = gr(content)
702
        # print(cst.as_sxpr())
Eckhart Arnold's avatar
Eckhart Arnold committed
703
        assert cst.error_flag
704
        assert cst.content == content
705
706
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
707
        assert len(cst.errors()) == 1
708

709
710
711
712
713
714
715
716
717
718
719
720
    def test_several_resume_rules_innermost_rule_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        gr.resume_rules__['beta'] = ['GAMMA']
        gr.resume_rules__['bac'] = ['GAMMA']
        content = 'ALPHA abc BETA bad GAMMA cab .'
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
721
        assert len(cst.errors()) == 1
722
723
724
725
726
727
728
729
        # multiple failures
        content = 'ALPHA acb BETA bad GAMMA cab .'
        cst = gr(content)
        # print(cst.as_sxpr())
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
730
        assert len(cst.errors()) == 2
731

732

733
734
735
736
737
738
739
740
741
742
class TestConfiguredErrorMessages:
    def test_(self):
        lang = """
            document = series | /.*/
            @series_error = "a badly configured error message {5}"
            series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("AB_D");  assert st.error_flag
743
744
745
        assert st.errors()[0].code == Error.MALFORMED_ERROR_STRING
        assert st.errors()[1].code == Error.MANDATORY_CONTINUATION
        # print(st.errors())
746
747


748
749
750
751
752
753
754
755
756
757
class TestUnknownParserError:
    def test_unknown_parser_error(self):
        gr = Grammar()
        try:
            gr("", "NonExistantParser")
            assert False, "UnknownParserError expected!"
        except UnknownParserError:
            pass


758
759
760
761
762
763
class TestEarlyTokenWhitespaceDrop:
    def setup(self):
        self.lang = """
            @ drop = token, whitespace
            expression = term  { ("+" | "-") term}
            term       = factor  { ("*"|"/") factor}
764
765
            factor     = number | variable | "("  expression  ")" 
                       | constant | fixed
766
            variable   = /[a-z]/~
767
768
769
            number     = /\d+/~
            constant   = "A" | "B"
            fixed      = "X"   
770
771
772
            """
        self.gr = grammar_provider(self.lang)()

773
    def test_drop(self):
774
775
776
777
        cst = self.gr('4 + 3 * 5')
        # print(cst.as_sxpr())
        assert not cst.pick(':Token')
        assert not cst.pick(':Whitespace')
778
779
780
781
782
783
784
785
        cst = self.gr('A + B')
        try:
            _ = next(cst.select(lambda node: node.content == 'A'))
            assert False, "Tokens in compound expressions should be dropped!"
        except StopIteration:
            pass
        cst = self.gr('X * y')
        assert next(cst.select(lambda node: node.content == 'X'))
786

787

788
if __name__ == "__main__":
789
    from DHParser.testing import runner
790
791
    with logging(False):
        runner("", globals())