Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

test_syntaxtree.py 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

26 27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, \
    MockParser
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32

33

34 35
class TestParseSxpression:
    def test_parse_s_expression(self):
36 37 38 39
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
40
        try:
41 42
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
43 44 45 46
                          "if argument is not a tree!"
        except ValueError:
            pass

47 48 49 50 51 52 53
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
54 55 56 57 58
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
59
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
60 61 62 63 64
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
65

66 67 68 69 70
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

71

72 73 74 75 76 77
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
78
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
79
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
80
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
81 82 83 84 85

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
86 87 88 89 90
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

91
    def test_find(self):
92
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
93 94
        assert len(found) == 1
        assert found[0].result == 'e'
95
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
96 97 98
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

99
    def test_equality1(self):
100 101
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
102 103
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
104 105 106

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
107
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
108
                "factor": [remove_expendables, reduce_single_child],
109
                "*": [remove_expendables, replace_by_single_child]}
110
        parser = grammar_provider(ebnf)()
111 112
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
113
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
114
        assert tree == compare_tree, tree.as_sxpr()
115

116 117 118 119 120 121 122
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

123 124 125 126 127 128 129
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
130
        tree = parser(ebnf)
131 132 133 134 135 136 137 138 139 140 141 142 143 144
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

145 146 147
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
148
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
149
        nd2 = Node(None, "456")
150
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
151
        nd = Node(None, (nd1, nd2))
152
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
153
        nd.init_pos(0)
154 155 156 157
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

158 159 160 161
    def test_xml_sanitizer(self):
        node = Node(MockParser('tag'), '<&>')
        print(node.as_xml())

di68kap's avatar
di68kap committed
162 163 164

class TestRootNode:
    def test_error_handling(self):
165 166
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
167
        root = RootNode()
eckhart's avatar
eckhart committed
168 169
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
170
        root.swallow(tree)
di68kap's avatar
di68kap committed
171
        assert root.error_flag
eckhart's avatar
eckhart committed
172
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
173
        assert root.error_flag
eckhart's avatar
eckhart committed
174 175
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
176 177 178 179
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


180
class TestNodeFind():
181
    """Test the select-functions of class Node.
182 183 184 185 186 187
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
188
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
189
        matches = list(tree.select(matchf))
190 191 192
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
193 194
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
195 196
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
197
        assert list(tree.select(matchf2, include_root=True))
198
        assert not list(tree.select(matchf2, include_root=False))
199 200

    def test_getitem(self):
201 202 203
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
204 205 206 207 208
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
209
        matches = list(tree.select_by_tag('X', False))
210 211
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
212

213
    def test_contains(self):
214
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
215
        assert 'a' not in tree
216 217
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
218 219 220
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
221
        assert 'c' not in tree
222
        assert any(tree.select_by_tag('c', False))
223 224


225
class TestSerialization:
di68kap's avatar
di68kap committed
226 227 228 229
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
230
        tree = parse_sxpr('(A "B")')
231
        tree.attr['attr'] = "value"
232
        tree2 = parse_sxpr('(A `(attr "value") "B")')
233
        assert tree.as_sxpr() ==  tree2.as_sxpr()
234
        tree.attr['attr2'] = "value2"
235
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
236 237
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
238 239 240 241 242 243 244 245 246 247 248 249 250
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
251 252 253 254 255 256 257 258
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
259

260 261
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
262

263
        xml = tree.as_xml(inline_tags={'A'})
264
        assert xml == "<A><B>C</B><D>E</D></A>", xml
265

266
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
267

268
        tree.attr['xml:space'] = 'preserve'
269
        xml = tree.as_xml()
270
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
271

272
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
273

274 275
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
276
        xml = tree.as_xml(inline_tags={'A'})
277 278 279 280 281 282 283 284 285 286 287 288 289
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
290

di68kap's avatar
di68kap committed
291

292
if __name__ == "__main__":
293
    from DHParser.testing import runner
294
    runner("", globals())