test_syntaxtree.py 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

26
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.transform import traverse, reduce_single_child, \
28
    replace_by_single_child, flatten, remove_expendables
29
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
30
from DHParser.dsl import grammar_provider
31

32

33 34
class TestParseSxpression:
    def test_parse_s_expression(self):
35 36 37 38
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
39
        try:
40 41
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
42 43 44 45
                          "if argument is not a tree!"
        except ValueError:
            pass

46 47 48 49 50 51 52
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
53 54 55 56 57
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
58
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
59 60 61 62 63
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
64

65 66 67 68 69
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

70

71 72 73 74 75 76
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
77
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
78
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
79
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
80 81 82 83 84

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
85 86 87 88 89
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

90
    def test_find(self):
91
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
92 93
        assert len(found) == 1
        assert found[0].result == 'e'
94
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
95 96 97
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

98
    def test_equality1(self):
99 100
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
101 102
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
103 104 105

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
106
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
107
                "factor": [remove_expendables, reduce_single_child],
108
                "*": [remove_expendables, replace_by_single_child]}
109
        parser = grammar_provider(ebnf)()
110 111
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
112
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
113
        assert tree == compare_tree, tree.as_sxpr()
114

115 116 117 118 119 120 121
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

122 123 124 125 126 127 128
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
129
        tree = parser(ebnf)
130 131 132 133 134 135 136 137 138 139 140 141 142 143
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

144 145 146
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
147
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
148
        nd2 = Node(None, "456")
149
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
150
        nd = Node(None, (nd1, nd2))
151
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
152
        nd.init_pos(0)
153 154 155 156
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

di68kap's avatar
di68kap committed
157 158 159

class TestRootNode:
    def test_error_handling(self):
160 161
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
162
        root = RootNode()
eckhart's avatar
eckhart committed
163 164
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
165
        root.swallow(tree)
di68kap's avatar
di68kap committed
166
        assert root.error_flag
eckhart's avatar
eckhart committed
167
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
168
        assert root.error_flag
eckhart's avatar
eckhart committed
169 170
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
171 172 173 174
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


175
class TestNodeFind():
176
    """Test the select-functions of class Node.
177 178 179 180 181 182
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
183
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
184
        matches = list(tree.select(matchf))
185 186 187
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
188 189
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
190 191
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
192
        assert list(tree.select(matchf2, include_root=True))
193
        assert not list(tree.select(matchf2, include_root=False))
194 195

    def test_getitem(self):
196 197 198
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
199 200 201 202 203
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
204
        matches = list(tree.select_by_tag('X', False))
205 206
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
207

208
    def test_contains(self):
209
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
210
        assert 'a' not in tree
211 212
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
213 214 215
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
216
        assert 'c' not in tree
217
        assert any(tree.select_by_tag('c', False))
218 219


220
class TestSerialization:
di68kap's avatar
di68kap committed
221 222 223 224
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
225
        tree = parse_sxpr('(A "B")')
226
        tree.attr['attr'] = "value"
227
        tree2 = parse_sxpr('(A `(attr "value") "B")')
228
        assert tree.as_sxpr() ==  tree2.as_sxpr()
229
        tree.attr['attr2'] = "value2"
230
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
231 232
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
233 234 235 236 237 238 239 240 241 242 243 244 245
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
246 247 248 249 250 251 252 253
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
254

255 256
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
257

258
        xml = tree.as_xml(inline_tags={'A'})
259
        assert xml == "<A><B>C</B><D>E</D></A>", xml
260

261
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
262

263
        tree.attr['xml:space'] = 'preserve'
264
        xml = tree.as_xml()
265
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
266

267
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
268

269 270
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
271
        xml = tree.as_xml(inline_tags={'A'})
272 273 274 275 276 277 278 279 280 281 282 283 284
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
285

di68kap's avatar
di68kap committed
286

287
if __name__ == "__main__":
288
    from DHParser.testing import runner
289
    runner("", globals())