test_syntaxtree.py 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

di68kap's avatar
di68kap committed
26
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.transform import traverse, reduce_single_child, \
28
    replace_by_single_child, flatten, remove_expendables
29
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
30
from DHParser.dsl import grammar_provider
31
from DHParser.error import Error
32

33

34 35
class TestParseSxpression:
    def test_parse_s_expression(self):
36 37 38 39
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
40
        try:
41 42
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
43 44 45 46
                          "if argument is not a tree!"
        except ValueError:
            pass

47 48 49 50 51 52 53
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
54 55 56 57 58
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
59
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
60 61 62 63 64
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
65

66 67 68 69 70
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

71

72 73 74 75 76 77
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
78
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
79
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
80
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
81

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree_copy = copy.deepcopy(tree)

        assert tree == tree_copy
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.all_errors
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert tree != tree_copy
        assert tree_copy == parse_sxpr('(a (b c) (d (e f) (h i)))')
98 99
        # print(tree.as_sxpr())
        # print(parse_sxpr('(a (b c) (d x))').as_sxpr())
100 101 102 103 104
        assert tree == parse_sxpr('(a (b c) (d x))')

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

105 106 107 108
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
109 110 111 112 113
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

114
    def test_find(self):
115
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
116 117
        assert len(found) == 1
        assert found[0].result == 'e'
118
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
119 120 121
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

122
    def test_equality1(self):
123 124
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
125 126
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
127 128 129

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
130
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
131
                "factor": [remove_expendables, reduce_single_child],
132
                "*": [remove_expendables, replace_by_single_child]}
133
        parser = grammar_provider(ebnf)()
134 135
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
136
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
137
        assert tree == compare_tree, tree.as_sxpr()
138

139 140 141 142 143 144 145
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

146 147 148 149 150 151 152
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
153
        tree = parser(ebnf)
154 155 156 157 158
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
159 160
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
161 162 163
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
164 165
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
166 167
        transform(tree)
        res4 = compiler(tree)
168 169
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
170

171 172 173
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
174
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
175
        nd2 = Node(None, "456")
176
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
177
        nd = Node(None, (nd1, nd2))
178
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
179
        nd.init_pos(0)
180 181 182 183
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

184
    def test_xml_sanitizer(self):
185
        node = Node('tag', '<&>')
186
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
187

di68kap's avatar
di68kap committed
188 189 190

class TestRootNode:
    def test_error_handling(self):
191 192
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
193
        root = RootNode()
eckhart's avatar
eckhart committed
194 195
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
196
        root.swallow(tree)
di68kap's avatar
di68kap committed
197
        assert root.error_flag
eckhart's avatar
eckhart committed
198
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
199
        assert root.error_flag
eckhart's avatar
eckhart committed
200 201
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
202 203 204 205
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


206
class TestNodeFind():
207
    """Test the select-functions of class Node.
208 209 210 211 212 213
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
214
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
215
        matches = list(tree.select(matchf))
216 217 218
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
219 220
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
221 222
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
223
        assert list(tree.select(matchf2, include_root=True))
224
        assert not list(tree.select(matchf2, include_root=False))
225 226

    def test_getitem(self):
227 228 229
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
230 231 232 233 234
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
235
        matches = list(tree.select_by_tag('X', False))
236 237
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
238

239
    def test_contains(self):
240
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
241
        assert 'a' not in tree
242 243
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
244 245 246
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
247
        assert 'c' not in tree
248
        assert any(tree.select_by_tag('c', False))
249 250


251
class TestSerialization:
di68kap's avatar
di68kap committed
252 253 254 255
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
256
        tree = parse_sxpr('(A "B")')
257
        tree.attr['attr'] = "value"
258
        tree2 = parse_sxpr('(A `(attr "value") "B")')
259
        assert tree.as_sxpr() ==  tree2.as_sxpr()
260
        tree.attr['attr2'] = "value2"
261
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
262 263
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
264 265 266 267 268 269 270 271 272 273 274 275 276
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
277 278 279 280 281 282 283 284
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
285

286 287
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
288

289
        xml = tree.as_xml(inline_tags={'A'})
290
        assert xml == "<A><B>C</B><D>E</D></A>", xml
291

292
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
293

294
        tree.attr['xml:space'] = 'preserve'
eckhart's avatar
eckhart committed
295
        print(tree.attr)
296
        xml = tree.as_xml()
297
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
298

299
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
300

301 302
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
303
        xml = tree.as_xml(inline_tags={'A'})
304 305 306 307 308 309 310 311 312 313 314 315 316
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
317

di68kap's avatar
di68kap committed
318

319
if __name__ == "__main__":
320
    from DHParser.testing import runner
321
    runner("", globals())