test_parsers.py 6.01 KB
Newer Older
1 2
#!/usr/bin/python3

3
"""test_parsers.py - tests of the parsers-module of DHParser 
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
22
from functools import partial
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
sys.path.extend(['../', './'])
25

di68kap's avatar
di68kap committed
26
from DHParser import parsers
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.toolkit import is_logging, compile_python_object
28
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
29
    replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
di68kap's avatar
di68kap committed
30
from DHParser.parsers import compile_source
Eckhart Arnold's avatar
Eckhart Arnold committed
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
33 34


Eckhart Arnold's avatar
Eckhart Arnold committed
35 36 37 38 39 40 41 42 43
ARITHMETIC_EBNF = """
    @ whitespace = linefeed
    formula = [ //~ ] expr
    expr = expr ("+"|"-") term | term
    term = term ("*"|"/") factor | factor
    factor = /[0-9]+/~
    # example:  "5 + 3 * 4"
    """

44

Eckhart Arnold's avatar
Eckhart Arnold committed
45 46
ARITHMETIC_EBNF_transformation_table = {
    # AST Transformations for the DSL-grammar
47 48 49
    "formula": [remove_expendables],
    "term, expr": [replace_by_single_child, flatten],
    "factor": [remove_expendables, reduce_single_child],
50
    (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
51
    "": [remove_expendables, replace_by_single_child]
Eckhart Arnold's avatar
Eckhart Arnold committed
52 53
}

54

Eckhart Arnold's avatar
Eckhart Arnold committed
55 56 57
ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)


58 59 60 61 62 63 64 65
class TestGrammarTest:
    cases = {
        "factor": {
            "match": {
                1: "0",
                2: "314",
            },
            "fail": {
66 67 68 69 70 71 72 73 74 75 76
                3: "21F",
                4: "G123"
            }
        },
        "term": {
            "match": {
                1: "4 * 5",
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
77 78 79
                1: "(term (factor 4) (:Token *) (factor 5))",
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
            },
            "fail": {
                4: "4 + 5",
                5: "20 / 4 - 3"
            }
        }
    }

    failure_cases = {
        "term": {
            "match": {
                1: "4 + 5",     # error: this should fail
                2: "20 / 4",
                3: "20 / 4 * 3"
            },
            "ast": {
96 97 98
                1: "(term (factor 4) (:Token *) (factor 5))",
                2: "(term (factor 20) (:Token /) (factor 4))",
                3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))"  # error 19 != 20
99 100 101 102
            },
            "fail": {
                4: "4 * 5",     # error: this should match
                5: "20 / 4 - 3"
103 104 105 106
            }
        }
    }

di68kap's avatar
di68kap committed
107
    def test_testing_grammar(self):
108 109
        parser_fac = parser_factory(ARITHMETIC_EBNF)
        trans_fac = lambda : ARITHMETIC_EBNFTransform
di68kap's avatar
di68kap committed
110
        errata = parsers.test_grammar(self.cases, parser_fac, trans_fac)
111
        assert not errata, str(errata)
di68kap's avatar
di68kap committed
112
        errata = parsers.test_grammar(self.failure_cases, parser_fac, trans_fac)
113 114 115
        # for e in errata:
        #     print(e)
        assert len(errata) == 3
116

117
class TestInfiLoopsAndRecursion:
118
    def test_direct_left_recursion(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
119
        minilang = ARITHMETIC_EBNF
120
        snippet = "5 + 3 * 4"
121
        parser = parser_factory(minilang)()
122
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
123
        syntax_tree = parser(snippet)
124
        assert not syntax_tree.collect_errors()
125
        assert snippet == str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
126 127
        if is_logging():
            syntax_tree.log("test_LeftRecursion_direct.cst")
Eckhart Arnold's avatar
Eckhart Arnold committed
128
            # self.minilang_parser1.log_parsing_history("test_LeftRecursion_direct")
129 130 131 132

    def test_indirect_left_recursion(self):
        pass

133 134 135
    def test_inifinite_loops(self):
        minilang = """not_forever = { // } \n"""
        snippet = " "
136
        parser = parser_factory(minilang)()
Eckhart Arnold's avatar
Eckhart Arnold committed
137
        syntax_tree = parser(snippet)
138 139 140
        assert syntax_tree.error_flag
        # print(syntax_tree.collect_errors())

141

142 143 144 145 146 147 148
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
149 150
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
151
        assert result
152 153 154 155 156 157 158
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('abc+def')
        assert rest == ''
        assert node.parser.name == "regex"
        assert str(node) == 'abc+def'

159 160 161 162 163 164 165 166 167 168 169 170
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
Eckhart Arnold's avatar
Eckhart Arnold committed
171 172
        result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
                                    get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
173 174 175
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
176
        result = parser(testdoc)
177 178 179
        # parser.log_parsing_history("test.log")
        assert not result.error_flag

180

181
if __name__ == "__main__":
182
    from run import runner
di68kap's avatar
di68kap committed
183
    runner("", globals())