test_parsers.py 4.29 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parsers.py - tests of the parsers-module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
22
from functools import partial
23
import sys
24

Eckhart Arnold's avatar
Eckhart Arnold committed
25
sys.path.extend(['../', './'])
26

di68kap's avatar
di68kap committed
27
from DHParser import parsers
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.toolkit import is_logging, compile_python_object
29
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
30
    replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
di68kap's avatar
di68kap committed
31
from DHParser.parsers import compile_source
Eckhart Arnold's avatar
Eckhart Arnold committed
32
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
33
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
34
35


Eckhart Arnold's avatar
Eckhart Arnold committed
36
37
38
39
40
41
42
43
44
ARITHMETIC_EBNF = """
    @ whitespace = linefeed
    formula = [ //~ ] expr
    expr = expr ("+"|"-") term | term
    term = term ("*"|"/") factor | factor
    factor = /[0-9]+/~
    # example:  "5 + 3 * 4"
    """

45

Eckhart Arnold's avatar
Eckhart Arnold committed
46
47
ARITHMETIC_EBNF_transformation_table = {
    # AST Transformations for the DSL-grammar
48
49
50
    "formula": [remove_expendables],
    "term, expr": [replace_by_single_child, flatten],
    "factor": [remove_expendables, reduce_single_child],
51
    (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
52
    "*": [remove_expendables, replace_by_single_child]
Eckhart Arnold's avatar
Eckhart Arnold committed
53
54
}

55

Eckhart Arnold's avatar
Eckhart Arnold committed
56
57
58
ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)


59
class TestInfiLoopsAndRecursion:
60
    def test_direct_left_recursion(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
61
        minilang = ARITHMETIC_EBNF
62
        snippet = "5 + 3 * 4"
63
        parser = parser_factory(minilang)()
64
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
65
        syntax_tree = parser(snippet)
66
        assert not syntax_tree.collect_errors()
67
        assert snippet == str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
68
69
        if is_logging():
            syntax_tree.log("test_LeftRecursion_direct.cst")
Eckhart Arnold's avatar
Eckhart Arnold committed
70
            # self.minilang_parser1.log_parsing_history("test_LeftRecursion_direct")
71
72
73
74

    def test_indirect_left_recursion(self):
        pass

75
76
77
    def test_inifinite_loops(self):
        minilang = """not_forever = { // } \n"""
        snippet = " "
78
        parser = parser_factory(minilang)()
Eckhart Arnold's avatar
Eckhart Arnold committed
79
        syntax_tree = parser(snippet)
80
81
82
        assert syntax_tree.error_flag
        # print(syntax_tree.collect_errors())

83

84
85
86
87
88
89
90
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
91
92
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
93
        assert result
94
95
96
97
98
99
100
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('abc+def')
        assert rest == ''
        assert node.parser.name == "regex"
        assert str(node) == 'abc+def'

101
102
103
104
105
106
107
108
109
110
111
112
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
Eckhart Arnold's avatar
Eckhart Arnold committed
113
114
        result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
                                    get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
115
116
117
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
118
        result = parser(testdoc)
119
120
121
        # parser.log_parsing_history("test.log")
        assert not result.error_flag

122

123
if __name__ == "__main__":
124
    from DHParser.testing import runner
di68kap's avatar
di68kap committed
125
    runner("", globals())