test_parse.py 34.3 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parse.py - tests of the parsers-module of DHParser
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
23
from functools import partial
24

Eckhart Arnold's avatar
Eckhart Arnold committed
25
sys.path.extend(['../', './'])
26

eckhart's avatar
eckhart committed
27
28
from DHParser.configuration import get_config_value, set_config_value
from DHParser.toolkit import compile_python_object
29
from DHParser.log import is_logging, log_ST, log_parsing_history
30
from DHParser.error import Error, is_error
31
from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
eckhart's avatar
eckhart committed
32
    RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
33
    UnknownParserError, MetaParser, GrammarError, EMPTY_NODE
34
from DHParser import compile_source
35
36
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider, CompilationError
Eckhart Arnold's avatar
Eckhart Arnold committed
37
from DHParser.syntaxtree import Node
38
from DHParser.stringview import StringView
39
40


41
42
class TestParserError:
    def test_parser_error_str(self):
43
        pe = ParserError(Node('TAG', 'test').with_pos(0), StringView('Beispiel'), None, True)
44
45
        assert str(pe).find('Beispiel') >= 0 and str(pe).find('TAG') >= 0

eckhart's avatar
eckhart committed
46

eckhart's avatar
eckhart committed
47
48
49
50
51
52
53
54
55
56
class TestParserClass:
    def test_apply(self):
        minilang ="""
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
        gr = grammar_provider(minilang)()
        l = []
        def visitor(p: Parser):
di68kap's avatar
di68kap committed
57
            l.append(p.pname + p.ptype)
eckhart's avatar
eckhart committed
58
59
60
61
62
63
64
65
66
67
68
        gr.root__.apply(visitor)
        s1 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s2 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s3 = ", ".join(l)
        assert s1 == s2 == s3


69
class TestInfiLoopsAndRecursion:
70
    def test_direct_left_recursion1(self):
eckhart's avatar
eckhart committed
71
        minilang = """
72
73
74
75
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
76
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
77
        parser = grammar_provider(minilang)()
78
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
79
        syntax_tree = parser(snippet)
80
81
        assert not is_error(syntax_tree.error_flag), str(syntax_tree.errors_sorted)
        assert snippet == syntax_tree.content, str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
82
        if is_logging():
83
            log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
84
            log_parsing_history(parser, "test_LeftRecursion_direct")
85

86
    def test_direct_left_recursion2(self):
87
88
89
90
91
92
        minilang = """
            expr = ex
            ex   = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
93
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
94
        parser = grammar_provider(minilang)()
95
        assert parser
96
        syntax_tree = parser(snippet)
97
98
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
        assert snippet == syntax_tree.content
99

100
    def test_indirect_left_recursion1(self):
101
102
        minilang = """
            Expr    = //~ (Product | Sum | Value)
103
104
            Product = Expr { ('*' | '/') Expr }+
            Sum     = Expr { ('+' | '-') Expr }+
105
            Value   = /[0-9.]+/~ | '(' §Expr ')'
106
            """
107
        parser = grammar_provider(minilang)()
108
        assert parser
109
        snippet = "8 * 4"
110
        syntax_tree = parser(snippet)
111
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
112
113
        snippet = "7 + 8 * 4"
        syntax_tree = parser(snippet)
114
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
115
116
        snippet = "9 + 8 * (4 + 3)"
        syntax_tree = parser(snippet)
117
118
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
        assert snippet == syntax_tree.content
119
        if is_logging():
120
            log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
121
            log_parsing_history(parser, "test_LeftRecursion_indirect")
122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    # # BEWARE: EXPERIMENTAL TEST can be long running
    # def test_indirect_left_recursion2(self):
    #     arithmetic_syntax = """
    #         expression     = addition | subtraction
    #         addition       = (expression | term) "+" (expression | term)
    #         subtraction    = (expression | term) "-" (expression | term)
    #         term           = multiplication | division
    #         multiplication = (term | factor) "*" (term | factor)
    #         division       = (term | factor) "/" (term | factor)
    #         factor         = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
    #         group          = "(" expression ")"
    #         SIGN           = /[+-]/
    #         NUMBER         = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
    #         VARIABLE       = /[A-Za-z]/~
    #         """
    #     arithmetic = grammar_provider(arithmetic_syntax)()
    #     arithmetic.left_recursion_depth__ = 2
    #     assert arithmetic
    #     syntax_tree = arithmetic("(a + b) * (a - b)")
    #     assert syntax_tree.errors
143

144
145
146
147
148
149
150
151
152
153
    def test_break_inifnite_loop_ZeroOrMore(self):
        forever = ZeroOrMore(RegExp(''))
        result = Grammar(forever)('')  # infinite loops will automatically be broken
        assert repr(result) == "Node(:EMPTY__, )", repr(result)

    def test_break_inifnite_loop_OneOrMore(self):
        forever = OneOrMore(RegExp(''))
        result = Grammar(forever)('')  # infinite loops will automatically be broken
        assert repr(result) == "Node(:EMPTY__, )", str(result)

154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    # def test_infinite_loops(self):
    #     minilang = """forever = { // } \n"""
    #     try:
    #         parser_class = grammar_provider(minilang)
    #     except CompilationError as error:
    #         assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
    #     save = get_config_value('static_analysis')
    #     set_config_value('static_analysis', 'late')
    #     provider = grammar_provider(minilang)
    #     try:
    #         parser = provider()
    #     except GrammarError as error:
    #         assert error.errors[0][2].code == Error.INFINITE_LOOP
    #     set_config_value('static_analysis', 'none')
    #     parser = provider()
    #     snippet = " "
    #     syntax_tree = parser(snippet)
    #     assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
    #     res = parser.static_analysis()
    #     assert res and res[0][2].code == Error.INFINITE_LOOP
    #     minilang = """not_forever = { / / } \n"""
    #     parser = grammar_provider(minilang)()
    #     res = parser.static_analysis()
    #     assert not res
    #     set_config_value('static_analysis', save)
179

eckhart's avatar
eckhart committed
180

Eckhart Arnold's avatar
Eckhart Arnold committed
181
class TestFlowControl:
182
183
    def setup(self):
        self.t1 = """
eckhart's avatar
eckhart committed
184
        All work and no play
185
186
187
188
189
        makes Jack a dull boy
        END
        """
        self.t2 = "All word and not play makes Jack a dull boy END\n"

Eckhart Arnold's avatar
Eckhart Arnold committed
190
    def test_lookbehind(self):
191
        ws = RegExp(r'\s*')
Eckhart Arnold's avatar
Eckhart Arnold committed
192
        end = RegExp("END")
193
        doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
194
        word = RegExp(r'\w+')
Eckhart Arnold's avatar
Eckhart Arnold committed
195
196
197
198
        sequence = OneOrMore(NegativeLookahead(end) + word + ws)
        document = ws + sequence + doc_end + ws

        parser = Grammar(document)
199
200
201
202
203
204
205
        cst = parser(self.t1)
        assert not cst.error_flag, cst.as_sxpr()
        cst = parser(self.t2)
        assert cst.error_flag, cst.as_sxpr()

    def test_lookbehind_indirect(self):
        class LookbehindTestGrammar(Grammar):
eckhart's avatar
eckhart committed
206
            parser_initialization__ = ["upon instantiation"]
207
            ws = RegExp(r'\s*')
208
            end = RegExp('END')
209
            SUCC_LB = RegExp('\\s*?\\n')
210
            doc_end = Series(Lookbehind(SUCC_LB), end)
211
            word = RegExp(r'\w+')
212
213
214
215
216
217
            sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
            document = Series(ws, sequence, doc_end, ws)
            root__ = document

        parser = LookbehindTestGrammar()
        cst = parser(self.t1)
Eckhart Arnold's avatar
Eckhart Arnold committed
218
        assert not cst.error_flag, cst.as_sxpr()
219
        cst = parser(self.t2)
Eckhart Arnold's avatar
Eckhart Arnold committed
220
221
222
        assert cst.error_flag, cst.as_sxpr()


223
224
225
226
227
228
229
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
230
231
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
232
        assert result
233
        assert not messages, str(messages)
234
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
eckhart's avatar
eckhart committed
235
236
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
237
        assert node.tag_name == "regex"
238
239
        assert str(node) == 'abc+def'

240
241
    def test_multilineRegex_wo_Comments(self):
        mlregex = r"""
eckhart's avatar
eckhart committed
242
243
        regex =  /\w+
                  [+]
244
245
246
247
248
249
                  \w* /
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages, str(messages)
250
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
251
252
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
253
        assert node.tag_name == "regex"
254
255
        assert str(node) == 'abc+def'

256
257
258
259
260
261
262
263
264
    def text_ignore_case(self):
        mlregex = r"""
        @ ignorecase = True
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
265
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
266
267
268
269
        node, rest = parser.regex('Alpha')
        assert node
        assert not node.error_flag
        assert rest == ''
270
        assert node.tag_name == "regex"
271
272
273
274
275
276
277
278
279
280
        assert str(node) == 'Alpha'

        mlregex = r"""
        @ ignorecase = False
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
281
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
282
283
284
285
        node, rest = parser.regex('Alpha')
        assert node.error_flag


286
287
288
289
290
291
292
293
294
295
296
297
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
eckhart's avatar
eckhart committed
298
299
300
        result, messages, syntax_tree = compile_source(
            tokenlang, None, get_ebnf_grammar(), get_ebnf_transformer(),
            get_ebnf_compiler("TokenTest"))
301
        assert result
eckhart's avatar
eckhart committed
302
        assert not messages, str(messages)
303
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
304
        result = parser(testdoc)
305
        # log_parsing_history(parser, "test.log")
306
307
        assert not result.error_flag

308

309
class TestGrammar:
310
    def setup(self):
311
312
313
314
315
316
        grammar = r"""@whitespace = horizontal
        haupt        = textzeile LEERZEILE
        textzeile    = { WORT }+
        WORT         = /[^ \t]+/~
        LEERZEILE    = /\n[ \t]*(?=\n)/~
        """
317
318
319
        self.pyparser, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
                                                              get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
        assert self.pyparser
320
        assert not messages
321
322
323
324

    def test_pos_values_initialized(self):
        # checks whether pos values in the parsing result and in the
        # history record have been initialized
325
326
        grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
        grammar("no_file_name*")
327
        for record in grammar.history__:
328
329
            assert not record.node or record.node.pos >= 0

330
    def test_select_parsing(self):
331
        grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
332
333
334
335
        grammar("wort", "WORT")
        grammar("eine Zeile", "textzeile")
        grammar("kein Haupt", "haupt")
        grammar("so ist es richtig", "haupt")
336

di68kap's avatar
di68kap committed
337
338
    def test_grammar_subclassing(self):
        class Arithmetic(Grammar):
339
            r'''
di68kap's avatar
di68kap committed
340
341
342
343
344
345
            expression =  term  { ("+" | "-") term }
            term       =  factor  { ("*" | "/") factor }
            factor     =  INTEGER | "("  expression  ")"
            INTEGER    =  /\d+/~
            '''
            expression = Forward()
346
347
348
349
            INTEGER = RE('\\d+')
            factor = INTEGER | TKN("(") + expression + TKN(")")
            term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
            expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
di68kap's avatar
di68kap committed
350
351
352
353
354
355
            root__ = expression

        grammar = Arithmetic()
        CST = grammar('3+4')
        assert not CST.error_flag, CST.as_sxpr()

356

357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
class TestSeries:
    def test_non_mandatory(self):
        lang = """
        document = series | /.*/
        series = "A" "B" "C" "D"
        """
        parser = grammar_provider(lang)()
        st = parser("ABCD");
        assert not st.error_flag
        st = parser("A_CD");
        assert not st.error_flag
        st = parser("AB_D");
        assert not st.error_flag

    def test_mandatory(self):
        """Test for the §-operator. The Series-parser should raise an
        error for any non-match that occurs after the mandatory-operator.
        """
        lang = """
        document = series | /.*/
        series = "A" "B" §"C" "D"
        """
        parser = grammar_provider(lang)()
380
381
382
        st = parser("ABCD");  assert not st.error_flag
        st = parser("A_CD");  assert not st.error_flag
        st = parser("AB_D");  assert st.error_flag
383
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
384
        # transitivity of mandatory-operator
385
        st = parser("ABC_");  assert st.error_flag
386
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
387
388

    def test_series_composition(self):
389
        TA, TB, TC, TD, TE = (TKN(b) for b in "ABCDE")
390
391
392
393
394
        s1 = Series(TA, TB, TC, mandatory=2)
        s2 = Series(TD, TE)

        combined = Alternative(s1 + s2, RegExp('.*'))
        parser = Grammar(combined)
395
396
397
        st = parser("ABCDE");  assert not st.error_flag
        st = parser("A_CDE");  assert not st.error_flag
        st = parser("AB_DE");  assert st.error_flag
398
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
399
        st = parser("ABC_E");  assert st.error_flag
400
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
401
402
403

        combined = Alternative(s2 + s1, RegExp('.*'))
        parser = Grammar(combined)
404
405
406
407
408
409
        st = parser("DEABC");  assert not st.error_flag
        st = parser("_EABC");  assert not st.error_flag
        st = parser("D_ABC");  assert not st.error_flag
        st = parser("DE_BC");  assert not st.error_flag
        st = parser("DEA_C");  assert not st.error_flag
        st = parser("DEAB_");  assert st.error_flag
410
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
411

eckhart's avatar
eckhart committed
412
413
414
415
416
417
418
419
420
421
    # def test_boundary_cases(self):
    #     lang = """
    #     document = series | §!single | /.*/
    #     series = "A" "B" §"C" "D"
    #     single = "E"
    #     """
    #     parser_class = grammar_provider(lang)
    #     parser = parser_class()
    #     print(parser.python_src__)
    #     print(parser_class.python_src__)
eckhart's avatar
eckhart committed
422

423

424
425
426
class TestAllOfSomeOf:
    def test_allOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
427
        prefixes = AllOf(TKN("A"), TKN("B"))
428
429
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
430
        # aternative Form
431
        prefixes = AllOf(Series(TKN("B"), TKN("A")))
432
        assert Grammar(prefixes)('A B').content == 'A B'
433
434
435
436

    def test_allOf_completeness(self):
        """Test that an error is raised if not  all parsers of an AllOf-List
        match."""
437
        prefixes = AllOf(TKN("A"), TKN("B"))
438
439
440
441
442
        assert Grammar(prefixes)('B').error_flag

    def test_allOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
443
        prefixes = AllOf(TKN("A"), TKN("B"), TKN("A"))
444
445
446
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
447
448
449
450
        assert Grammar(prefixes)('A B B').error_flag

    def test_someOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
451
        prefixes = SomeOf(TKN("A"), TKN("B"))
452
453
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
454
        # aternative Form
455
        prefixes = SomeOf(Alternative(TKN("B"), TKN("A")))
456
457
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B').content == 'B'
458
459
460
461

    def test_someOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
462
        prefixes = SomeOf(TKN("A"), TKN("B"), TKN("A"))
463
464
465
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
466
467
468
        assert Grammar(prefixes)('A B B').error_flag


469
class TestPopRetrieve:
470
    mini_language = r"""
471
472
        document       = { text | codeblock }
        codeblock      = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
473
        delimiter      = delimiter_sign  # never use delimiter between capture and pop except for retrival!
474
        delimiter_sign = /`+/
eckhart's avatar
eckhart committed
475
        text           = /[^`]+/
476
        """
477
    mini_lang2 = r"""
478
479
480
481
482
483
484
485
        @braces_filter=counterpart
        document       = { text | codeblock }
        codeblock      = braces { text | opening_braces | (!:braces closing_braces) } ::braces
        braces         = opening_braces
        opening_braces = /\{+/
        closing_braces = /\}+/
        text           = /[^{}]+/
        """
486
    mini_lang3 = r"""
487
488
489
490
        document       = { text | env }
        env            = (specialtag | opentag) text [closespecial | closetag]
        opentag        = "<" name ">"
        specialtag     = "<" /ABC/ !name ">"
eckhart's avatar
eckhart committed
491
        closetag       = close_slash | close_star
492
493
494
495
496
497
498
499
        close_slash    = "<" ::name "/>"
        close_star     = "<" ::name "*>"
        closespecial   = "<" /ABC/~ ">"
        name           = /\w+/~
        text           = /[^<>]+/
        """

    def setup(self):
500
501
502
        self.minilang_parser = grammar_provider(self.mini_language)()
        self.minilang_parser2 = grammar_provider(self.mini_lang2)()
        self.minilang_parser3 = grammar_provider(self.mini_lang3)()
503
504
505

    @staticmethod
    def opening_delimiter(node, name):
506
        return node.tag_name == name # and not isinstance(node.parser, Retrieve)
507
508
509

    @staticmethod
    def closing_delimiter(node):
510
511
        return node.tag_name in {':Pop', ':Retrieve'}
        # return isinstance(node.parser, Retrieve)
512
513
514
515
516
517
518
519
520

    def test_compile_mini_language(self):
        assert self.minilang_parser
        assert self.minilang_parser2
        assert self.minilang_parser3

    def test_stackhandling(self):
        ambigous_opening = "<ABCnormal> normal tag <ABCnormal*>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
521
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
522
523
524

        ambigous_opening = "<ABCnormal> normal tag <ABCnormal/>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
525
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
526
527
528

        forgot_closing_tag = "<em> where is the closing tag?"
        syntax_tree = self.minilang_parser3(forgot_closing_tag)
529
        assert syntax_tree.error_flag, str(syntax_tree.errors_sorted)
530
531
532

        proper = "<em> has closing tag <em/>"
        syntax_tree = self.minilang_parser3(proper)
533
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
534
535
536

        proper = "<em> has closing tag <em*>"
        syntax_tree = self.minilang_parser3(proper)
537
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
538

539
    def test_cache_neutrality(self):
540
541
        """Test that packrat-caching does not interfere with the variable-
        changing parsers: Capture and Retrieve."""
542
        lang = r"""
543
544
545
546
547
548
549
550
551
            text = opening closing
            opening = (unmarked_package | marked_package)
            closing = ::variable
            unmarked_package = package "."
            marked_package = package "*" "."
            package = "(" variable ")"
            variable = /\w+/~
            """
        case = "(secret)*. secret"
552
        gr = grammar_provider(lang)()
553
        st = gr(case)
554
        assert not st.error_flag, str(st.errors_sorted)
555
556
557
558

    def test_single_line(self):
        teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
        syntax_tree = self.minilang_parser(teststr)
559
        assert not syntax_tree.errors_sorted
560
561
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
562
563
        assert delim == pop
        if is_logging():
564
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
565
566
567
568
569
570
571
572
573
574
575

    def test_multi_line(self):
        teststr = """
            Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde

            Absatz ohne ``` codeblock, aber
            das stellt sich erst am Ende herause...

            Mehrzeliger ```code block
            """
        syntax_tree = self.minilang_parser(teststr)
576
        assert not syntax_tree.errors_sorted
577
578
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
579
580
        assert delim == pop
        if is_logging():
581
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
582
583
584
585

    def test_single_line_complement(self):
        teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
        syntax_tree = self.minilang_parser2(teststr)
586
        assert not syntax_tree.errors_sorted
587
588
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
589
590
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
591
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
592
593
594
595
596
597
598
599
600
601
602

    def test_multi_line_complement(self):
        teststr = """
            Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende

            Absatz ohne {{{ codeblock, aber
            das stellt sich erst am Ende heraus...

            Mehrzeliger }}}code block
            """
        syntax_tree = self.minilang_parser2(teststr)
603
        assert not syntax_tree.errors_sorted
604
605
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
606
607
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
608
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
609
610


611
class TestWhitespaceHandling:
Eckhart Arnold's avatar
Eckhart Arnold committed
612
    minilang = """
613
614
615
616
617
618
619
620
621
        doc = A B
        A = "A"
        B = "B"
        Rdoc = ar br
        ar = /A/
        br = /B/
        """

    def setup(self):
622
        self.gr = grammar_provider(self.minilang)()
623
624
625
626
627
628
629
630
631
632
633
634

    def test_token_whitespace(self):
        st = self.gr("AB", 'doc')
        assert not st.error_flag
        st = self.gr("A B", 'doc')
        assert not st.error_flag

    def test_regexp_whitespace(self):
        st = self.gr("AB", 'Rdoc')
        assert not st.error_flag
        st = self.gr("A B", 'Rdoc')
        assert st.error_flag
di68kap's avatar
di68kap committed
635
636
637
638
639


class TestErrorReporting:
    grammar = """
        root      = series alpha | anything
eckhart's avatar
eckhart committed
640
        series    = subseries &alpha
di68kap's avatar
di68kap committed
641
642
643
644
645
646
647
648
649
650
651
652
653
654
        subseries = alpha §beta
        alpha     = /[a-z]+/
        beta      = /[A-Z]+/
        anything  = /.*/
        """

    def setup(self):
        self.parser = grammar_provider(self.grammar)()

    def test_error_propagation(self):
        testcode1 = "halloB"
        testcode2 = "XYZ"
        testcode3 = "hallo "
        cst = self.parser(testcode1)
655
        assert not cst.error_flag, str(cst.errors_sorted)
di68kap's avatar
di68kap committed
656
657
658
659
        cst = self.parser(testcode2)
        assert not cst.error_flag
        cst = self.parser(testcode3)
        assert cst.error_flag
660
661


di68kap's avatar
di68kap committed
662
663
class TestBorderlineCases:
    def test_not_matching(self):
664
        minilang = """parser = /X/\n"""
di68kap's avatar
di68kap committed
665
666
667
668
        gr = grammar_provider(minilang)()
        cst = gr('X', 'parser')
        assert not cst.error_flag
        cst = gr(' ', 'parser')
669
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
670
        cst = gr('', 'parser')
671
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
672
673
674
675
676
677
678

    def test_matching(self):
        minilang = """parser = /.?/"""
        gr = grammar_provider(minilang)()
        cst = gr(' ', 'parser')
        assert not cst.error_flag
        cst = gr('  ', 'parser')
679
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
680
681
682
683
        cst = gr('', 'parser')
        assert not cst.error_flag


Eckhart Arnold's avatar
Eckhart Arnold committed
684
class TestReentryAfterError:
685
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
686
687
688
689
690
        lang = """
        document = alpha [beta] gamma "."
          alpha = "ALPHA" abc
            abc = §"a" "b" "c"
          beta = "BETA" (bac | bca)
691
692
            bac = "b" "a" §"c"
            bca = "b" "c" §"a"
Eckhart Arnold's avatar
Eckhart Arnold committed
693
694
695
696
          gamma = "GAMMA" §(cab | cba)
            cab = "c" "a" §"b"
            cba = "c" "b" §"a"
        """
697
        self.gr = grammar_provider(lang)()
698

699
700
    def test_no_resume_rules(self):
        gr = self.gr;  gr.resume_rules = dict()
701
702
        content = 'ALPHA acb BETA bac GAMMA cab .'
        cst = gr(content)
703
704
705
706
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

eckhart's avatar
eckhart committed
707
708
709
710
711
712
713
714
    def test_no_resume_rules_partial_parsing(self):
        gr = self.gr;  gr.resume_rules = dict()
        content = 'ALPHA acb'
        cst = gr(content, 'alpha')
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

715
716
    def test_simple_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
717
        gr.resume_rules__['alpha'] = ['BETA']
718
        content = 'ALPHA acb BETA bac GAMMA cab .'
719
720
721
722
723
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
724
        assert len(cst.errors_sorted) == 1
725

726
727
    def test_failing_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
728
        gr.resume_rules__['alpha'] = ['XXX']
729
        content = 'ALPHA acb BETA bac GAMMA cab .'
730
731
732
733
734
735
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        # assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message

736
737
    def test_severl_reentry_points(self):
        gr = self.gr;  gr.resume_rules = dict()
738
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
739
        content = 'ALPHA acb BETA bac GAMMA cab .'
740
741
742
743
744
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
745
        assert len(cst.errors_sorted) == 1
746

747
748
    def test_several_reentry_points_second_point_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
749
750
751
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        content = 'ALPHA acb GAMMA cab .'
        cst = gr(content)
Eckhart Arnold's avatar
Eckhart Arnold committed
752
        assert cst.error_flag
753
        assert cst.content == content
754
755
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
756
        assert len(cst.errors_sorted) == 1
757

758
759
760
761
762
763
764
765
766
767
768
    def test_several_resume_rules_innermost_rule_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        gr.resume_rules__['beta'] = ['GAMMA']
        gr.resume_rules__['bac'] = ['GAMMA']
        content = 'ALPHA abc BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
769
        assert len(cst.errors_sorted) == 1
770
771
772
773
774
775
776
        # multiple failures
        content = 'ALPHA acb BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
777
        assert len(cst.errors_sorted) == 2
778

779

780
class TestConfiguredErrorMessages:
781
    def test_configured_error_message(self):
782
783
784
785
786
787
788
789
        lang = """
            document = series | /.*/
            @series_error = "a badly configured error message {5}"
            series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("AB_D");  assert st.error_flag
790
791
        assert st.errors_sorted[0].code == Error.MALFORMED_ERROR_STRING
        assert st.errors_sorted[1].code == Error.MANDATORY_CONTINUATION
792
793


794
795
796
797
798
799
800
801
802
803
class TestUnknownParserError:
    def test_unknown_parser_error(self):
        gr = Grammar()
        try:
            gr("", "NonExistantParser")
            assert False, "UnknownParserError expected!"
        except UnknownParserError:
            pass


804
805
class TestEarlyTokenWhitespaceDrop:
    def setup(self):
806
        self.lang = r"""
807
808
809
            @ drop = token, whitespace
            expression = term  { ("+" | "-") term}
            term       = factor  { ("*"|"/") factor}
eckhart's avatar
eckhart committed
810
            factor     = number | variable | "("  expression  ")"
811
                       | constant | fixed
812
            variable   = /[a-z]/~
813
814
            number     = /\d+/~
            constant   = "A" | "B"
eckhart's avatar
eckhart committed
815
            fixed      = "X"
816
817
818
            """
        self.gr = grammar_provider(self.lang)()

819
    def test_drop(self):
820
821
822
        cst = self.gr('4 + 3 * 5')
        assert not cst.pick(':Token')
        assert not cst.pick(':Whitespace')
823
824
        cst = self.gr('A + B')
        try:
825
            _ = next(cst.select_if(lambda node: node.content == 'A'))
826
827
828
829
            assert False, "Tokens in compound expressions should be dropped!"
        except StopIteration:
            pass
        cst = self.gr('X * y')
830
        assert next(cst.select_if(lambda node: node.content == 'X'))
831

832

Eckhart Arnold's avatar
Eckhart Arnold committed
833
class TestMetaParser:
834
835
836
837
838
839
840
    def setup(self):
        self.mp = MetaParser()
        self.mp.grammar = Grammar()  # override placeholder warning
        self.mp.pname = "named"
        self.mp.tag_name = self.mp.pname

    def test_return_value(self):
841
842
        save = get_config_value('flatten_tree_while_parsing')
        set_config_value('flatten_tree_while_parsing', True)
843
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
844
845
846
847
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert nd.children[0].result == "non-empty"
848
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
849
850
851
852
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert not nd.children[0].result
853
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
854
855
856
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
857
        nd = self.mp._return_value(Node(':anonymous', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
858
859
860
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
861
862
863
864
865
        nd = self.mp._return_value(EMPTY_NODE)
        assert nd.tag_name == 'named' and not nd.children, nd.as_sxpr()
        self.mp.pname = ''
        self.mp.tag_name = ':unnamed'
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
866
867
868
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert nd.content == 'non-empty'
869
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
870
871
872
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert not nd.content
873
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
874
875
876
        assert nd.tag_name == ':anonymous', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
877
        nd = self.mp._return_value(Node('', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
878
879
880
        assert nd.tag_name == '', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
881
882
        assert self.mp._return_value(None) == EMPTY_NODE
        assert self.mp._return_value(EMPTY_NODE) == EMPTY_NODE
883
        set_config_value('flatten_tree_while_parsing', save)
Eckhart Arnold's avatar
Eckhart Arnold committed
884

885
886
887
888
889
890
    def test_return_values(self):
        self.mp.pname = "named"
        self.mp.tag_name = self.mp.pname
        rv = self.mp._return_values((Node('tag', 'content'), EMPTY_NODE))
        assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name

891
892
893
894
895
    def test_in_context(self):
        minilang = """
            term       = factor  { (DIV|MUL) factor}
            factor     = NUMBER | VARIABLE
            MUL        = "*" | &factor
eckhart's avatar
eckhart committed
896
            DIV        = "/"
897
898
899
900
901
            NUMBER     = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
            VARIABLE   = /[A-Za-z]/~
            """
        gr = grammar_provider(minilang)()
        cst = gr("2x")
902
        assert bool(cst.pick('MUL')), "Named empty nodes should not be dropped!!!"
Eckhart Arnold's avatar
Eckhart Arnold committed
903

eckhart's avatar
eckhart committed
904

905
if __name__ == "__main__":
906
    from DHParser.testing import runner
907
    runner("", globals())