11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

26 27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33

34

35 36
class TestParseSxpression:
    def test_parse_s_expression(self):
37 38 39 40
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
41
        try:
42 43
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
44 45 46 47
                          "if argument is not a tree!"
        except ValueError:
            pass

48 49 50 51 52 53 54
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
55 56 57 58 59
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
60
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
61 62 63 64 65
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
66

67 68 69 70 71
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

72

73 74 75 76 77 78
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
79
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
80
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
81
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
82

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree_copy = copy.deepcopy(tree)

        assert tree == tree_copy
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.all_errors
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert tree != tree_copy
        assert tree_copy == parse_sxpr('(a (b c) (d (e f) (h i)))')
99 100
        # print(tree.as_sxpr())
        # print(parse_sxpr('(a (b c) (d x))').as_sxpr())
101 102 103 104 105
        assert tree == parse_sxpr('(a (b c) (d x))')

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

106 107 108 109
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
110 111 112 113 114
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

115
    def test_find(self):
116
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
117 118
        assert len(found) == 1
        assert found[0].result == 'e'
119
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
120 121 122
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

123
    def test_equality1(self):
124 125
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
126 127
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
128 129 130

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
131
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
132
                "factor": [remove_expendables, reduce_single_child],
133
                "*": [remove_expendables, replace_by_single_child]}
134
        parser = grammar_provider(ebnf)()
135 136
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
137
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
138
        assert tree == compare_tree, tree.as_sxpr()
139

140 141 142 143 144 145 146
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

147 148 149 150 151 152 153
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
154
        tree = parser(ebnf)
155 156 157 158 159
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
160 161
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
162 163 164
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
165 166
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
167 168
        transform(tree)
        res4 = compiler(tree)
169 170
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
171

172 173
    def test_len_and_pos(self):
        """Test len-property of Node."""
174
        nd1 = Node(ZOMBIE_TAG, "123")
175
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
176
        nd2 = Node(ZOMBIE_TAG, "456")
177
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
178
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
179
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
180
        nd.init_pos(0)
181 182 183 184
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

185
    def test_xml_sanitizer(self):
186
        node = Node('tag', '<&>')
187
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
188

di68kap's avatar
di68kap committed
189 190 191

class TestRootNode:
    def test_error_handling(self):
192 193
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
194
        root = RootNode()
eckhart's avatar
eckhart committed
195 196
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
197
        root.swallow(tree)
di68kap's avatar
di68kap committed
198
        assert root.error_flag
eckhart's avatar
eckhart committed
199
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
200
        assert root.error_flag
eckhart's avatar
eckhart committed
201 202
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
203 204 205 206
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


207
class TestNodeFind():
208
    """Test the select-functions of class Node.
209 210 211 212 213 214
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
215
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
216
        matches = list(tree.select(matchf))
217 218 219
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
220 221
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
222 223
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
224
        assert list(tree.select(matchf2, include_root=True))
225
        assert not list(tree.select(matchf2, include_root=False))
226 227

    def test_getitem(self):
228 229 230
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
231 232 233 234 235
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
236
        matches = list(tree.select_by_tag('X', False))
237 238
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
239

240
    def test_contains(self):
241
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
242
        assert 'a' not in tree
243 244
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
245 246 247
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
248
        assert 'c' not in tree
249
        assert any(tree.select_by_tag('c', False))
250 251


252
class TestSerialization:
di68kap's avatar
di68kap committed
253 254 255 256
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
257
        tree = parse_sxpr('(A "B")')
258
        tree.attr['attr'] = "value"
259
        tree2 = parse_sxpr('(A `(attr "value") "B")')
260
        assert tree.as_sxpr() ==  tree2.as_sxpr()
261
        tree.attr['attr2'] = "value2"
262
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
263 264
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
265 266 267 268 269 270 271 272 273 274 275 276 277
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
278 279 280 281 282 283 284 285
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
286

287 288
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
289

290
        xml = tree.as_xml(inline_tags={'A'})
291
        assert xml == "<A><B>C</B><D>E</D></A>", xml
292

293
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
294

295
        tree.attr['xml:space'] = 'preserve'
296
        # print(tree.attr)
297
        xml = tree.as_xml()
298
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
299

300
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
301

302 303
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
304
        xml = tree.as_xml(inline_tags={'A'})
305 306 307 308 309 310 311 312 313 314 315 316 317
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
318

di68kap's avatar
di68kap committed
319

320
if __name__ == "__main__":
321
    from DHParser.testing import runner
322
    runner("", globals())