test_parsers.py 6.01 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parsers.py - tests of the parsers-module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
22
from functools import partial
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
sys.path.extend(['../', './'])
25

di68kap's avatar
di68kap committed
26
from DHParser import parsers
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.toolkit import is_logging, compile_python_object
28
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
29
    replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
di68kap's avatar
di68kap committed
30
from DHParser.parsers import compile_source
Eckhart Arnold's avatar
Eckhart Arnold committed
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
33
34


Eckhart Arnold's avatar
Eckhart Arnold committed
35
36
37
38
39
40
41
42
43
ARITHMETIC_EBNF = """
    @ whitespace = linefeed
    formula = [ //~ ] expr
    expr = expr ("+"|"-") term | term
    term = term ("*"|"/") factor | factor
    factor = /[0-9]+/~
    # example:  "5 + 3 * 4"
    """

44

Eckhart Arnold's avatar
Eckhart Arnold committed
45
46
ARITHMETIC_EBNF_transformation_table = {
    # AST Transformations for the DSL-grammar
47
48
49
    "formula": [remove_expendables],
    "term, expr": [replace_by_single_child, flatten],
    "factor": [remove_expendables, reduce_single_child],
50
    (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
51
    "*": [remove_expendables, replace_by_single_child]
Eckhart Arnold's avatar
Eckhart Arnold committed
52
53
}

54

Eckhart Arnold's avatar
Eckhart Arnold committed
55
56
57
ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)


58
59
60
61
62
63
64
65
class TestGrammarTest:
    cases = {
        "factor": {
            "match": {
                1: "0",
                2: "314",
            },
            "fail": {
66
67
68
69
70
71
72
73
74
75
76
                3: "21F",
                4: "G123"
            }
        },
        "term": {
            "match": {
                1: "4 * 5",
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
77
78
79
                1: "(term (factor 4) (:Token *) (factor 5))",
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
            },
            "fail": {
                4: "4 + 5",
                5: "20 / 4 - 3"
            }
        }
    }

    failure_cases = {
        "term": {
            "match": {
                1: "4 + 5",     # error: this should fail
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
96
97
98
                1: "(term (factor 4) (:Token *) (factor 5))",
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))"  # error 19 != 20
99
100
101
102
            },
            "fail": {
                4: "4 * 5",     # error: this should match
                5: "20 / 4 - 3"
103
104
105
106
            }
        }
    }

di68kap's avatar
di68kap committed
107
    def test_testing_grammar(self):
108
109
        parser_fac = parser_factory(ARITHMETIC_EBNF)
        trans_fac = lambda : ARITHMETIC_EBNFTransform
di68kap's avatar
di68kap committed
110
        errata = parsers.test_grammar(self.cases, parser_fac, trans_fac)
111
        assert not errata, str(errata)
di68kap's avatar
di68kap committed
112
        errata = parsers.test_grammar(self.failure_cases, parser_fac, trans_fac)
113
114
115
        # for e in errata:
        #     print(e)
        assert len(errata) == 3
116

117
class TestInfiLoopsAndRecursion:
118
    def test_direct_left_recursion(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
119
        minilang = ARITHMETIC_EBNF
120
        snippet = "5 + 3 * 4"
121
        parser = parser_factory(minilang)()
122
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
123
        syntax_tree = parser(snippet)
124
        assert not syntax_tree.collect_errors()
125
        assert snippet == str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
126
127
        if is_logging():
            syntax_tree.log("test_LeftRecursion_direct.cst")
Eckhart Arnold's avatar
Eckhart Arnold committed
128
            # self.minilang_parser1.log_parsing_history("test_LeftRecursion_direct")
129
130
131
132

    def test_indirect_left_recursion(self):
        pass

133
134
135
    def test_inifinite_loops(self):
        minilang = """not_forever = { // } \n"""
        snippet = " "
136
        parser = parser_factory(minilang)()
Eckhart Arnold's avatar
Eckhart Arnold committed
137
        syntax_tree = parser(snippet)
138
139
140
        assert syntax_tree.error_flag
        # print(syntax_tree.collect_errors())

141

142
143
144
145
146
147
148
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
149
150
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
151
        assert result
152
153
154
155
156
157
158
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('abc+def')
        assert rest == ''
        assert node.parser.name == "regex"
        assert str(node) == 'abc+def'

159
160
161
162
163
164
165
166
167
168
169
170
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
Eckhart Arnold's avatar
Eckhart Arnold committed
171
172
        result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
                                    get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
173
174
175
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
176
        result = parser(testdoc)
177
178
179
        # parser.log_parsing_history("test.log")
        assert not result.error_flag

180

181
if __name__ == "__main__":
182
    from run import runner
di68kap's avatar
di68kap committed
183
    runner("", globals())