Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

test_syntaxtree.py 11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

26
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.transform import traverse, reduce_single_child, \
28
    replace_by_single_child, flatten, remove_expendables
29
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
30
from DHParser.dsl import grammar_provider
31

32

33 34
class TestParseSxpression:
    def test_parse_s_expression(self):
35 36 37 38
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
39
        try:
40 41
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
42 43 44 45
                          "if argument is not a tree!"
        except ValueError:
            pass

46 47 48 49 50 51 52
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
53 54 55 56 57 58 59 60 61
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
               '(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
        tree = parse_xml(' <a>   <b>beta</b>   </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'

62

63 64 65 66 67 68
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
69
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
70
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
71
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
72 73 74 75 76

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
77 78 79 80 81
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

82
    def test_find(self):
83
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
84 85
        assert len(found) == 1
        assert found[0].result == 'e'
86
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
87 88 89
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

90
    def test_equality1(self):
91 92
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
93 94
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
95 96 97

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
98
        att  = {"term": [replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
99 100
                "factor": [remove_expendables, reduce_single_child],
                (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
101
                "?": [remove_expendables, replace_by_single_child]}
102
        parser = grammar_provider(ebnf)()
103 104
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
105
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
106
        assert tree == compare_tree, tree.as_sxpr()
107

108 109 110 111 112 113 114
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

115 116 117 118 119 120 121
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
122
        tree = parser(ebnf)
123 124 125 126 127 128 129 130 131 132 133 134 135 136
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

137 138 139
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
140
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
141
        nd2 = Node(None, "456")
142
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
143
        nd = Node(None, (nd1, nd2))
144
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
145
        nd.init_pos(0)
146 147 148 149
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

di68kap's avatar
di68kap committed
150 151 152

class TestRootNode:
    def test_error_handling(self):
153 154
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
155
        root = RootNode()
eckhart's avatar
eckhart committed
156 157
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
158
        root.swallow(tree)
di68kap's avatar
di68kap committed
159
        assert root.error_flag
eckhart's avatar
eckhart committed
160
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
161
        assert root.error_flag
eckhart's avatar
eckhart committed
162 163
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
164 165 166 167
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


168
class TestNodeFind():
169
    """Test the select-functions of class Node.
170 171 172 173 174 175
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
176
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
177
        matches = list(tree.select(matchf))
178 179 180
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
181 182
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
183 184
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
185
        assert list(tree.select(matchf2, include_root=True))
186
        assert not list(tree.select(matchf2, include_root=False))
187 188

    def test_getitem(self):
189 190 191
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
192 193 194 195 196
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
197
        matches = list(tree.select_by_tag('X', False))
198 199
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
200

201
    def test_contains(self):
202
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
203
        assert 'a' not in tree
204 205
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
206 207 208
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
209
        assert 'c' not in tree
210
        assert any(tree.select_by_tag('c', False))
211 212


213
class TestSerialization:
di68kap's avatar
di68kap committed
214 215 216 217
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
218
        tree = parse_sxpr('(A "B")')
219
        tree.attributes['attr'] = "value"
220
        tree2 = parse_sxpr('(A `(attr "value") "B")')
221 222
        assert tree.as_sxpr() ==  tree2.as_sxpr()
        tree.attributes['attr2'] = "value2"
223
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
224 225
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
226 227 228 229 230 231 232 233 234 235 236 237 238
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
239 240 241 242 243 244 245 246
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
247

248 249
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
250

251
        xml = tree.as_xml(inline_tags={'A'})
252
        assert xml == "<A><B>C</B><D>E</D></A>", xml
253

254
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
255 256 257

        tree.attributes['xml:space'] = 'preserve'
        xml = tree.as_xml()
258
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
259

260
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
261

262 263
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
264
        xml = tree.as_xml(inline_tags={'A'})
265 266 267 268 269 270 271 272 273 274 275 276 277
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
278

di68kap's avatar
di68kap committed
279

280
if __name__ == "__main__":
281
    from DHParser.testing import runner
282
    runner("", globals())