test_parse.py 34.4 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parse.py - tests of the parsers-module of DHParser
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
23
from functools import partial
24

Eckhart Arnold's avatar
Eckhart Arnold committed
25
sys.path.extend(['../', './'])
26

27
from DHParser.toolkit import compile_python_object, get_config_value, set_config_value
28
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
29
from DHParser.error import Error, is_error
30
from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
eckhart's avatar
eckhart committed
31
    RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
32
    UnknownParserError, MetaParser, GrammarError, EMPTY_NODE
33
from DHParser import compile_source
34
35
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider, CompilationError
Eckhart Arnold's avatar
Eckhart Arnold committed
36
from DHParser.syntaxtree import Node
37
from DHParser.stringview import StringView
38
39


40
41
class TestParserError:
    def test_parser_error_str(self):
42
        pe = ParserError(Node('TAG', 'test').with_pos(0), StringView('Beispiel'), None, True)
43
44
        assert str(pe).find('Beispiel') >= 0 and str(pe).find('TAG') >= 0

eckhart's avatar
eckhart committed
45

eckhart's avatar
eckhart committed
46
47
48
49
50
51
52
53
54
55
class TestParserClass:
    def test_apply(self):
        minilang ="""
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
        gr = grammar_provider(minilang)()
        l = []
        def visitor(p: Parser):
di68kap's avatar
di68kap committed
56
            l.append(p.pname + p.ptype)
eckhart's avatar
eckhart committed
57
58
59
60
61
62
63
64
65
66
67
        gr.root__.apply(visitor)
        s1 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s2 = ", ".join(l)
        l = []
        gr.root__.apply(visitor)
        s3 = ", ".join(l)
        assert s1 == s2 == s3


68
class TestInfiLoopsAndRecursion:
69
    def test_direct_left_recursion1(self):
eckhart's avatar
eckhart committed
70
        minilang = """
71
72
73
74
            expr = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
75
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
76
        parser = grammar_provider(minilang)()
77
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
78
        syntax_tree = parser(snippet)
79
80
        assert not is_error(syntax_tree.error_flag), str(syntax_tree.errors_sorted)
        assert snippet == syntax_tree.content, str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
81
        if is_logging():
82
            log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
83
            log_parsing_history(parser, "test_LeftRecursion_direct")
84

85
    def test_direct_left_recursion2(self):
86
87
88
89
90
91
        minilang = """
            expr = ex
            ex   = expr ("+"|"-") term | term
            term = term ("*"|"/") factor | factor
            factor = /[0-9]+/~
            """
92
        snippet = "9 + 8 + 7 + 6 + 5 + 3 * 4"
93
        parser = grammar_provider(minilang)()
94
        assert parser
95
96
        with logging():
            syntax_tree = parser(snippet)
97
98
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
        assert snippet == syntax_tree.content
99

100
    def test_indirect_left_recursion1(self):
101
102
        minilang = """
            Expr    = //~ (Product | Sum | Value)
103
104
            Product = Expr { ('*' | '/') Expr }+
            Sum     = Expr { ('+' | '-') Expr }+
105
            Value   = /[0-9.]+/~ | '(' §Expr ')'
106
            """
107
        parser = grammar_provider(minilang)()
108
        assert parser
109
        snippet = "8 * 4"
110
        syntax_tree = parser(snippet)
111
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
112
113
        snippet = "7 + 8 * 4"
        syntax_tree = parser(snippet)
114
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
115
116
        snippet = "9 + 8 * (4 + 3)"
        syntax_tree = parser(snippet)
117
118
        assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
        assert snippet == syntax_tree.content
119
        if is_logging():
120
            log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
121
            log_parsing_history(parser, "test_LeftRecursion_indirect")
122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    # # BEWARE: EXPERIMENTAL TEST can be long running
    # def test_indirect_left_recursion2(self):
    #     arithmetic_syntax = """
    #         expression     = addition | subtraction
    #         addition       = (expression | term) "+" (expression | term)
    #         subtraction    = (expression | term) "-" (expression | term)
    #         term           = multiplication | division
    #         multiplication = (term | factor) "*" (term | factor)
    #         division       = (term | factor) "/" (term | factor)
    #         factor         = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
    #         group          = "(" expression ")"
    #         SIGN           = /[+-]/
    #         NUMBER         = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
    #         VARIABLE       = /[A-Za-z]/~
    #         """
    #     arithmetic = grammar_provider(arithmetic_syntax)()
    #     arithmetic.left_recursion_depth__ = 2
    #     assert arithmetic
    #     syntax_tree = arithmetic("(a + b) * (a - b)")
    #     assert syntax_tree.errors
143

144
145
146
147
148
149
150
151
152
153
    def test_break_inifnite_loop_ZeroOrMore(self):
        forever = ZeroOrMore(RegExp(''))
        result = Grammar(forever)('')  # infinite loops will automatically be broken
        assert repr(result) == "Node(:EMPTY__, )", repr(result)

    def test_break_inifnite_loop_OneOrMore(self):
        forever = OneOrMore(RegExp(''))
        result = Grammar(forever)('')  # infinite loops will automatically be broken
        assert repr(result) == "Node(:EMPTY__, )", str(result)

154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    # def test_infinite_loops(self):
    #     minilang = """forever = { // } \n"""
    #     try:
    #         parser_class = grammar_provider(minilang)
    #     except CompilationError as error:
    #         assert all(e.code == Error.INFINITE_LOOP for e in error.errors)
    #     save = get_config_value('static_analysis')
    #     set_config_value('static_analysis', 'late')
    #     provider = grammar_provider(minilang)
    #     try:
    #         parser = provider()
    #     except GrammarError as error:
    #         assert error.errors[0][2].code == Error.INFINITE_LOOP
    #     set_config_value('static_analysis', 'none')
    #     parser = provider()
    #     snippet = " "
    #     syntax_tree = parser(snippet)
    #     assert any(e.code == Error.INFINITE_LOOP for e in syntax_tree.errors)
    #     res = parser.static_analysis()
    #     assert res and res[0][2].code == Error.INFINITE_LOOP
    #     minilang = """not_forever = { / / } \n"""
    #     parser = grammar_provider(minilang)()
    #     res = parser.static_analysis()
    #     assert not res
    #     set_config_value('static_analysis', save)
179

eckhart's avatar
eckhart committed
180

Eckhart Arnold's avatar
Eckhart Arnold committed
181
class TestFlowControl:
182
183
    def setup(self):
        self.t1 = """
eckhart's avatar
eckhart committed
184
        All work and no play
185
186
187
188
189
        makes Jack a dull boy
        END
        """
        self.t2 = "All word and not play makes Jack a dull boy END\n"

Eckhart Arnold's avatar
Eckhart Arnold committed
190
    def test_lookbehind(self):
191
        ws = RegExp(r'\s*')
Eckhart Arnold's avatar
Eckhart Arnold committed
192
        end = RegExp("END")
193
        doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
194
        word = RegExp(r'\w+')
Eckhart Arnold's avatar
Eckhart Arnold committed
195
196
197
198
        sequence = OneOrMore(NegativeLookahead(end) + word + ws)
        document = ws + sequence + doc_end + ws

        parser = Grammar(document)
199
200
201
202
203
204
205
        cst = parser(self.t1)
        assert not cst.error_flag, cst.as_sxpr()
        cst = parser(self.t2)
        assert cst.error_flag, cst.as_sxpr()

    def test_lookbehind_indirect(self):
        class LookbehindTestGrammar(Grammar):
eckhart's avatar
eckhart committed
206
            parser_initialization__ = ["upon instantiation"]
207
            ws = RegExp(r'\s*')
208
            end = RegExp('END')
209
            SUCC_LB = RegExp('\\s*?\\n')
210
            doc_end = Series(Lookbehind(SUCC_LB), end)
211
            word = RegExp(r'\w+')
212
213
214
215
216
217
            sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
            document = Series(ws, sequence, doc_end, ws)
            root__ = document

        parser = LookbehindTestGrammar()
        cst = parser(self.t1)
Eckhart Arnold's avatar
Eckhart Arnold committed
218
        assert not cst.error_flag, cst.as_sxpr()
219
        cst = parser(self.t2)
Eckhart Arnold's avatar
Eckhart Arnold committed
220
221
222
        assert cst.error_flag, cst.as_sxpr()


223
224
225
226
227
228
229
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
230
231
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
232
        assert result
233
        assert not messages, str(messages)
234
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
eckhart's avatar
eckhart committed
235
236
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
237
        assert node.tag_name == "regex"
238
239
        assert str(node) == 'abc+def'

240
241
    def test_multilineRegex_wo_Comments(self):
        mlregex = r"""
eckhart's avatar
eckhart committed
242
243
        regex =  /\w+
                  [+]
244
245
246
247
248
249
                  \w* /
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages, str(messages)
250
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
251
252
        node = parser('abc+def', parser.regex)
        assert not node.error_flag
253
        assert node.tag_name == "regex"
254
255
        assert str(node) == 'abc+def'

256
257
258
259
260
261
262
263
264
    def text_ignore_case(self):
        mlregex = r"""
        @ ignorecase = True
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
265
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
266
267
268
269
        node, rest = parser.regex('Alpha')
        assert node
        assert not node.error_flag
        assert rest == ''
270
        assert node.tag_name == "regex"
271
272
273
274
275
276
277
278
279
280
        assert str(node) == 'Alpha'

        mlregex = r"""
        @ ignorecase = False
        regex = /alpha/
        """
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
        assert result
        assert not messages
281
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
282
283
284
285
        node, rest = parser.regex('Alpha')
        assert node.error_flag


286
287
288
289
290
291
292
293
294
295
296
297
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
eckhart's avatar
eckhart committed
298
299
300
        result, messages, syntax_tree = compile_source(
            tokenlang, None, get_ebnf_grammar(), get_ebnf_transformer(),
            get_ebnf_compiler("TokenTest"))
301
        assert result
eckhart's avatar
eckhart committed
302
        assert not messages, str(messages)
303
        parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
304
        result = parser(testdoc)
305
        # log_parsing_history(parser, "test.log")
306
307
        assert not result.error_flag

308

309
class TestGrammar:
310
    def setup(self):
311
312
313
314
315
316
        grammar = r"""@whitespace = horizontal
        haupt        = textzeile LEERZEILE
        textzeile    = { WORT }+
        WORT         = /[^ \t]+/~
        LEERZEILE    = /\n[ \t]*(?=\n)/~
        """
317
318
319
        self.pyparser, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
                                                              get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
        assert self.pyparser
320
        assert not messages
321
322
323
324

    def test_pos_values_initialized(self):
        # checks whether pos values in the parsing result and in the
        # history record have been initialized
325
        with logging("LOGS"):
326
            grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
327
328
            grammar("no_file_name*")
        for record in grammar.history__:
329
330
            assert not record.node or record.node.pos >= 0

331
    def test_select_parsing(self):
332
        grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
333
334
335
336
        grammar("wort", "WORT")
        grammar("eine Zeile", "textzeile")
        grammar("kein Haupt", "haupt")
        grammar("so ist es richtig", "haupt")
337

di68kap's avatar
di68kap committed
338
339
    def test_grammar_subclassing(self):
        class Arithmetic(Grammar):
340
            r'''
di68kap's avatar
di68kap committed
341
342
343
344
345
346
            expression =  term  { ("+" | "-") term }
            term       =  factor  { ("*" | "/") factor }
            factor     =  INTEGER | "("  expression  ")"
            INTEGER    =  /\d+/~
            '''
            expression = Forward()
347
348
349
350
            INTEGER = RE('\\d+')
            factor = INTEGER | TKN("(") + expression + TKN(")")
            term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
            expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
di68kap's avatar
di68kap committed
351
352
353
354
355
356
            root__ = expression

        grammar = Arithmetic()
        CST = grammar('3+4')
        assert not CST.error_flag, CST.as_sxpr()

357

358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
class TestSeries:
    def test_non_mandatory(self):
        lang = """
        document = series | /.*/
        series = "A" "B" "C" "D"
        """
        parser = grammar_provider(lang)()
        st = parser("ABCD");
        assert not st.error_flag
        st = parser("A_CD");
        assert not st.error_flag
        st = parser("AB_D");
        assert not st.error_flag

    def test_mandatory(self):
        """Test for the §-operator. The Series-parser should raise an
        error for any non-match that occurs after the mandatory-operator.
        """
        lang = """
        document = series | /.*/
        series = "A" "B" §"C" "D"
        """
        parser = grammar_provider(lang)()
381
382
383
        st = parser("ABCD");  assert not st.error_flag
        st = parser("A_CD");  assert not st.error_flag
        st = parser("AB_D");  assert st.error_flag
384
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
385
        # transitivity of mandatory-operator
386
        st = parser("ABC_");  assert st.error_flag
387
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
388
389

    def test_series_composition(self):
390
        TA, TB, TC, TD, TE = (TKN(b) for b in "ABCDE")
391
392
393
394
395
        s1 = Series(TA, TB, TC, mandatory=2)
        s2 = Series(TD, TE)

        combined = Alternative(s1 + s2, RegExp('.*'))
        parser = Grammar(combined)
396
397
398
        st = parser("ABCDE");  assert not st.error_flag
        st = parser("A_CDE");  assert not st.error_flag
        st = parser("AB_DE");  assert st.error_flag
399
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
400
        st = parser("ABC_E");  assert st.error_flag
401
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
402
403
404

        combined = Alternative(s2 + s1, RegExp('.*'))
        parser = Grammar(combined)
405
406
407
408
409
410
        st = parser("DEABC");  assert not st.error_flag
        st = parser("_EABC");  assert not st.error_flag
        st = parser("D_ABC");  assert not st.error_flag
        st = parser("DE_BC");  assert not st.error_flag
        st = parser("DEA_C");  assert not st.error_flag
        st = parser("DEAB_");  assert st.error_flag
411
        assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
412

eckhart's avatar
eckhart committed
413
414
415
416
417
418
419
420
421
422
    # def test_boundary_cases(self):
    #     lang = """
    #     document = series | §!single | /.*/
    #     series = "A" "B" §"C" "D"
    #     single = "E"
    #     """
    #     parser_class = grammar_provider(lang)
    #     parser = parser_class()
    #     print(parser.python_src__)
    #     print(parser_class.python_src__)
eckhart's avatar
eckhart committed
423

424

425
426
427
class TestAllOfSomeOf:
    def test_allOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
428
        prefixes = AllOf(TKN("A"), TKN("B"))
429
430
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
431
        # aternative Form
432
        prefixes = AllOf(Series(TKN("B"), TKN("A")))
433
        assert Grammar(prefixes)('A B').content == 'A B'
434
435
436
437

    def test_allOf_completeness(self):
        """Test that an error is raised if not  all parsers of an AllOf-List
        match."""
438
        prefixes = AllOf(TKN("A"), TKN("B"))
439
440
441
442
443
        assert Grammar(prefixes)('B').error_flag

    def test_allOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
444
        prefixes = AllOf(TKN("A"), TKN("B"), TKN("A"))
445
446
447
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
448
449
450
451
        assert Grammar(prefixes)('A B B').error_flag

    def test_someOf_order(self):
        """Test that parsers of an AllOf-List can match in arbitrary order."""
452
        prefixes = SomeOf(TKN("A"), TKN("B"))
453
454
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B A').content == 'B A'
455
        # aternative Form
456
        prefixes = SomeOf(Alternative(TKN("B"), TKN("A")))
457
458
        assert Grammar(prefixes)('A B').content == 'A B'
        assert Grammar(prefixes)('B').content == 'B'
459
460
461
462

    def test_someOf_redundance(self):
        """Test that one and the same parser may be listed several times
        and must be matched several times accordingly."""
463
        prefixes = SomeOf(TKN("A"), TKN("B"), TKN("A"))
464
465
466
        assert Grammar(prefixes)('A A B').content == 'A A B'
        assert Grammar(prefixes)('A B A').content == 'A B A'
        assert Grammar(prefixes)('B A A').content == 'B A A'
467
468
469
        assert Grammar(prefixes)('A B B').error_flag


470
class TestPopRetrieve:
471
    mini_language = r"""
472
473
        document       = { text | codeblock }
        codeblock      = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
474
        delimiter      = delimiter_sign  # never use delimiter between capture and pop except for retrival!
475
        delimiter_sign = /`+/
eckhart's avatar
eckhart committed
476
        text           = /[^`]+/
477
        """
478
    mini_lang2 = r"""
479
480
481
482
483
484
485
486
        @braces_filter=counterpart
        document       = { text | codeblock }
        codeblock      = braces { text | opening_braces | (!:braces closing_braces) } ::braces
        braces         = opening_braces
        opening_braces = /\{+/
        closing_braces = /\}+/
        text           = /[^{}]+/
        """
487
    mini_lang3 = r"""
488
489
490
491
        document       = { text | env }
        env            = (specialtag | opentag) text [closespecial | closetag]
        opentag        = "<" name ">"
        specialtag     = "<" /ABC/ !name ">"
eckhart's avatar
eckhart committed
492
        closetag       = close_slash | close_star
493
494
495
496
497
498
499
500
        close_slash    = "<" ::name "/>"
        close_star     = "<" ::name "*>"
        closespecial   = "<" /ABC/~ ">"
        name           = /\w+/~
        text           = /[^<>]+/
        """

    def setup(self):
501
502
503
        self.minilang_parser = grammar_provider(self.mini_language)()
        self.minilang_parser2 = grammar_provider(self.mini_lang2)()
        self.minilang_parser3 = grammar_provider(self.mini_lang3)()
504
505
506

    @staticmethod
    def opening_delimiter(node, name):
507
        return node.tag_name == name # and not isinstance(node.parser, Retrieve)
508
509
510

    @staticmethod
    def closing_delimiter(node):
511
512
        return node.tag_name in {':Pop', ':Retrieve'}
        # return isinstance(node.parser, Retrieve)
513
514
515
516
517
518
519
520
521

    def test_compile_mini_language(self):
        assert self.minilang_parser
        assert self.minilang_parser2
        assert self.minilang_parser3

    def test_stackhandling(self):
        ambigous_opening = "<ABCnormal> normal tag <ABCnormal*>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
522
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
523
524
525

        ambigous_opening = "<ABCnormal> normal tag <ABCnormal/>"
        syntax_tree = self.minilang_parser3(ambigous_opening)
526
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
527
528
529

        forgot_closing_tag = "<em> where is the closing tag?"
        syntax_tree = self.minilang_parser3(forgot_closing_tag)
530
        assert syntax_tree.error_flag, str(syntax_tree.errors_sorted)
531
532
533

        proper = "<em> has closing tag <em/>"
        syntax_tree = self.minilang_parser3(proper)
534
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
535
536
537

        proper = "<em> has closing tag <em*>"
        syntax_tree = self.minilang_parser3(proper)
538
        assert not syntax_tree.error_flag, str(syntax_tree.errors_sorted)
539

540
    def test_cache_neutrality(self):
541
542
        """Test that packrat-caching does not interfere with the variable-
        changing parsers: Capture and Retrieve."""
543
        lang = r"""
544
545
546
547
548
549
550
551
552
            text = opening closing
            opening = (unmarked_package | marked_package)
            closing = ::variable
            unmarked_package = package "."
            marked_package = package "*" "."
            package = "(" variable ")"
            variable = /\w+/~
            """
        case = "(secret)*. secret"
553
        gr = grammar_provider(lang)()
554
        st = gr(case)
555
        assert not st.error_flag, str(st.errors_sorted)
556
557
558
559

    def test_single_line(self):
        teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
        syntax_tree = self.minilang_parser(teststr)
560
        assert not syntax_tree.errors_sorted
561
562
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
563
564
        assert delim == pop
        if is_logging():
565
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
566
567
568
569
570
571
572
573
574
575
576

    def test_multi_line(self):
        teststr = """
            Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde

            Absatz ohne ``` codeblock, aber
            das stellt sich erst am Ende herause...

            Mehrzeliger ```code block
            """
        syntax_tree = self.minilang_parser(teststr)
577
        assert not syntax_tree.errors_sorted
578
579
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
580
581
        assert delim == pop
        if is_logging():
582
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
583
584
585
586

    def test_single_line_complement(self):
        teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
        syntax_tree = self.minilang_parser2(teststr)
587
        assert not syntax_tree.errors_sorted
588
589
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
590
591
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
592
            log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
593
594
595
596
597
598
599
600
601
602
603

    def test_multi_line_complement(self):
        teststr = """
            Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende

            Absatz ohne {{{ codeblock, aber
            das stellt sich erst am Ende heraus...

            Mehrzeliger }}}code block
            """
        syntax_tree = self.minilang_parser2(teststr)
604
        assert not syntax_tree.errors_sorted
605
606
        delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
        pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
607
608
        assert len(delim) == len(pop) and delim != pop
        if is_logging():
609
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
610
611


612
class TestWhitespaceHandling:
Eckhart Arnold's avatar
Eckhart Arnold committed
613
    minilang = """
614
615
616
617
618
619
620
621
622
        doc = A B
        A = "A"
        B = "B"
        Rdoc = ar br
        ar = /A/
        br = /B/
        """

    def setup(self):
623
        self.gr = grammar_provider(self.minilang)()
624
625
626
627
628
629
630
631
632
633
634
635

    def test_token_whitespace(self):
        st = self.gr("AB", 'doc')
        assert not st.error_flag
        st = self.gr("A B", 'doc')
        assert not st.error_flag

    def test_regexp_whitespace(self):
        st = self.gr("AB", 'Rdoc')
        assert not st.error_flag
        st = self.gr("A B", 'Rdoc')
        assert st.error_flag
di68kap's avatar
di68kap committed
636
637
638
639
640


class TestErrorReporting:
    grammar = """
        root      = series alpha | anything
eckhart's avatar
eckhart committed
641
        series    = subseries &alpha
di68kap's avatar
di68kap committed
642
643
644
645
646
647
648
649
650
651
652
653
654
655
        subseries = alpha §beta
        alpha     = /[a-z]+/
        beta      = /[A-Z]+/
        anything  = /.*/
        """

    def setup(self):
        self.parser = grammar_provider(self.grammar)()

    def test_error_propagation(self):
        testcode1 = "halloB"
        testcode2 = "XYZ"
        testcode3 = "hallo "
        cst = self.parser(testcode1)
656
        assert not cst.error_flag, str(cst.errors_sorted)
di68kap's avatar
di68kap committed
657
658
659
660
        cst = self.parser(testcode2)
        assert not cst.error_flag
        cst = self.parser(testcode3)
        assert cst.error_flag
661
662


di68kap's avatar
di68kap committed
663
664
class TestBorderlineCases:
    def test_not_matching(self):
665
        minilang = """parser = /X/\n"""
di68kap's avatar
di68kap committed
666
667
668
669
        gr = grammar_provider(minilang)()
        cst = gr('X', 'parser')
        assert not cst.error_flag
        cst = gr(' ', 'parser')
670
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
671
        cst = gr('', 'parser')
672
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_DID_NOT_MATCH
di68kap's avatar
di68kap committed
673
674
675
676
677
678
679

    def test_matching(self):
        minilang = """parser = /.?/"""
        gr = grammar_provider(minilang)()
        cst = gr(' ', 'parser')
        assert not cst.error_flag
        cst = gr('  ', 'parser')
680
        assert cst.error_flag and cst.errors_sorted[0].code == Error.PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
681
682
683
684
        cst = gr('', 'parser')
        assert not cst.error_flag


Eckhart Arnold's avatar
Eckhart Arnold committed
685
class TestReentryAfterError:
686
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
687
688
689
690
691
        lang = """
        document = alpha [beta] gamma "."
          alpha = "ALPHA" abc
            abc = §"a" "b" "c"
          beta = "BETA" (bac | bca)
692
693
            bac = "b" "a" §"c"
            bca = "b" "c" §"a"
Eckhart Arnold's avatar
Eckhart Arnold committed
694
695
696
697
          gamma = "GAMMA" §(cab | cba)
            cab = "c" "a" §"b"
            cba = "c" "b" §"a"
        """
698
        self.gr = grammar_provider(lang)()
699

700
701
    def test_no_resume_rules(self):
        gr = self.gr;  gr.resume_rules = dict()
702
703
        content = 'ALPHA acb BETA bac GAMMA cab .'
        cst = gr(content)
704
705
706
707
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

eckhart's avatar
eckhart committed
708
709
710
711
712
713
714
715
    def test_no_resume_rules_partial_parsing(self):
        gr = self.gr;  gr.resume_rules = dict()
        content = 'ALPHA acb'
        cst = gr(content, 'alpha')
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

716
717
    def test_simple_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
718
        gr.resume_rules__['alpha'] = ['BETA']
719
        content = 'ALPHA acb BETA bac GAMMA cab .'
720
721
722
723
724
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
725
        assert len(cst.errors_sorted) == 1
726

727
728
    def test_failing_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
729
        gr.resume_rules__['alpha'] = ['XXX']
730
        content = 'ALPHA acb BETA bac GAMMA cab .'
731
732
733
734
735
736
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        # assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message

737
738
    def test_severl_reentry_points(self):
        gr = self.gr;  gr.resume_rules = dict()
739
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
740
        content = 'ALPHA acb BETA bac GAMMA cab .'
741
742
743
744
745
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
746
        assert len(cst.errors_sorted) == 1
747

748
749
    def test_several_reentry_points_second_point_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
750
751
752
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        content = 'ALPHA acb GAMMA cab .'
        cst = gr(content)
Eckhart Arnold's avatar
Eckhart Arnold committed
753
        assert cst.error_flag
754
        assert cst.content == content
755
756
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
757
        assert len(cst.errors_sorted) == 1
758

759
760
761
762
763
764
765
766
767
768
769
    def test_several_resume_rules_innermost_rule_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
        gr.resume_rules__['alpha'] = ['BETA', 'GAMMA']
        gr.resume_rules__['beta'] = ['GAMMA']
        gr.resume_rules__['bac'] = ['GAMMA']
        content = 'ALPHA abc BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
770
        assert len(cst.errors_sorted) == 1
771
772
773
774
775
776
777
        # multiple failures
        content = 'ALPHA acb BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
        # because of resuming, there should be only on error message
778
        assert len(cst.errors_sorted) == 2
779

780

781
class TestConfiguredErrorMessages:
782
    def test_configured_error_message(self):
783
784
785
786
787
788
789
790
        lang = """
            document = series | /.*/
            @series_error = "a badly configured error message {5}"
            series = "X" | head §"C" "D"
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("AB_D");  assert st.error_flag
791
792
        assert st.errors_sorted[0].code == Error.MALFORMED_ERROR_STRING
        assert st.errors_sorted[1].code == Error.MANDATORY_CONTINUATION
793
794


795
796
797
798
799
800
801
802
803
804
class TestUnknownParserError:
    def test_unknown_parser_error(self):
        gr = Grammar()
        try:
            gr("", "NonExistantParser")
            assert False, "UnknownParserError expected!"
        except UnknownParserError:
            pass


805
806
class TestEarlyTokenWhitespaceDrop:
    def setup(self):
807
        self.lang = r"""
808
809
810
            @ drop = token, whitespace
            expression = term  { ("+" | "-") term}
            term       = factor  { ("*"|"/") factor}
eckhart's avatar
eckhart committed
811
            factor     = number | variable | "("  expression  ")"
812
                       | constant | fixed
813
            variable   = /[a-z]/~
814
815
            number     = /\d+/~
            constant   = "A" | "B"
eckhart's avatar
eckhart committed
816
            fixed      = "X"
817
818
819
            """
        self.gr = grammar_provider(self.lang)()

820
    def test_drop(self):
821
822
823
        cst = self.gr('4 + 3 * 5')
        assert not cst.pick(':Token')
        assert not cst.pick(':Whitespace')
824
825
        cst = self.gr('A + B')
        try:
826
            _ = next(cst.select_if(lambda node: node.content == 'A'))
827
828
829
830
            assert False, "Tokens in compound expressions should be dropped!"
        except StopIteration:
            pass
        cst = self.gr('X * y')
831
        assert next(cst.select_if(lambda node: node.content == 'X'))
832

833

Eckhart Arnold's avatar
Eckhart Arnold committed
834
class TestMetaParser:
835
836
837
838
839
840
841
    def setup(self):
        self.mp = MetaParser()
        self.mp.grammar = Grammar()  # override placeholder warning
        self.mp.pname = "named"
        self.mp.tag_name = self.mp.pname

    def test_return_value(self):
842
843
        save = get_config_value('flatten_tree_while_parsing')
        set_config_value('flatten_tree_while_parsing', True)
844
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
845
846
847
848
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert nd.children[0].result == "non-empty"
849
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
850
851
852
853
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert not nd.children[0].result
854
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
855
856
857
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
858
        nd = self.mp._return_value(Node(':anonymous', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
859
860
861
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
862
863
864
865
866
        nd = self.mp._return_value(EMPTY_NODE)
        assert nd.tag_name == 'named' and not nd.children, nd.as_sxpr()
        self.mp.pname = ''
        self.mp.tag_name = ':unnamed'
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
867
868
869
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert nd.content == 'non-empty'
870
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
871
872
873
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert not nd.content
874
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
875
876
877
        assert nd.tag_name == ':anonymous', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
878
        nd = self.mp._return_value(Node('', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
879
880
881
        assert nd.tag_name == '', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
882
883
        assert self.mp._return_value(None) == EMPTY_NODE
        assert self.mp._return_value(EMPTY_NODE) == EMPTY_NODE
884
        set_config_value('flatten_tree_while_parsing', save)
Eckhart Arnold's avatar
Eckhart Arnold committed
885

886
887
888
889
890
891
    def test_return_values(self):
        self.mp.pname = "named"
        self.mp.tag_name = self.mp.pname
        rv = self.mp._return_values((Node('tag', 'content'), EMPTY_NODE))
        assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name

892
893
894
895
896
    def test_in_context(self):
        minilang = """
            term       = factor  { (DIV|MUL) factor}
            factor     = NUMBER | VARIABLE
            MUL        = "*" | &factor
eckhart's avatar
eckhart committed
897
            DIV        = "/"
898
899
900
901
902
            NUMBER     = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
            VARIABLE   = /[A-Za-z]/~
            """
        gr = grammar_provider(minilang)()
        cst = gr("2x")
903
        assert bool(cst.pick('MUL')), "Named empty nodes should not be dropped!!!"
Eckhart Arnold's avatar
Eckhart Arnold committed
904

eckhart's avatar
eckhart committed
905

906
if __name__ == "__main__":
907
    from DHParser.testing import runner
908
909
    with logging(False):
        runner("", globals())