test_syntaxtree.py 11.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.transform import traverse, reduce_single_child, \
28
    replace_by_single_child, flatten, remove_expendables
29
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
30
from DHParser.dsl import grammar_provider
31

32

33
34
class TestParseSxpression:
    def test_parse_s_expression(self):
35
36
37
38
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
39
        try:
40
41
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
42
43
44
45
                          "if argument is not a tree!"
        except ValueError:
            pass

46
47
48
49
50
51
52
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
53
54
55
56
57
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
58
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
di68kap's avatar
di68kap committed
59
60
61
        tree = parse_xml(' <a>   <b>beta</b>   </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'

62
63
64
65
66
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

67

68
69
70
71
72
73
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
74
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
75
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
76
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
77
78
79
80
81

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
82
83
84
85
86
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

87
    def test_find(self):
88
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
89
90
        assert len(found) == 1
        assert found[0].result == 'e'
91
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
92
93
94
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

95
    def test_equality1(self):
96
97
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
98
99
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
100
101
102

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
103
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
104
                "factor": [remove_expendables, reduce_single_child],
105
                "*": [remove_expendables, replace_by_single_child]}
106
        parser = grammar_provider(ebnf)()
107
108
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
109
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
110
        assert tree == compare_tree, tree.as_sxpr()
111

112
113
114
115
116
117
118
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

119
120
121
122
123
124
125
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
126
        tree = parser(ebnf)
127
128
129
130
131
132
133
134
135
136
137
138
139
140
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

141
142
143
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
144
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
145
        nd2 = Node(None, "456")
146
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
147
        nd = Node(None, (nd1, nd2))
148
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
149
        nd.init_pos(0)
150
151
152
153
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

di68kap's avatar
di68kap committed
154
155
156

class TestRootNode:
    def test_error_handling(self):
157
158
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
159
        root = RootNode()
eckhart's avatar
eckhart committed
160
161
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
162
        root.swallow(tree)
di68kap's avatar
di68kap committed
163
        assert root.error_flag
eckhart's avatar
eckhart committed
164
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
165
        assert root.error_flag
eckhart's avatar
eckhart committed
166
167
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
168
169
170
171
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


172
class TestNodeFind():
173
    """Test the select-functions of class Node.
174
175
176
177
178
179
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
180
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
181
        matches = list(tree.select(matchf))
182
183
184
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
185
186
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
187
188
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
189
        assert list(tree.select(matchf2, include_root=True))
190
        assert not list(tree.select(matchf2, include_root=False))
191
192

    def test_getitem(self):
193
194
195
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
196
197
198
199
200
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
201
        matches = list(tree.select_by_tag('X', False))
202
203
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
204

205
    def test_contains(self):
206
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
207
        assert 'a' not in tree
208
209
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
210
211
212
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
213
        assert 'c' not in tree
214
        assert any(tree.select_by_tag('c', False))
215
216


217
class TestSerialization:
di68kap's avatar
di68kap committed
218
219
220
221
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
222
        tree = parse_sxpr('(A "B")')
223
        tree.attr['attr'] = "value"
224
        tree2 = parse_sxpr('(A `(attr "value") "B")')
225
        assert tree.as_sxpr() ==  tree2.as_sxpr()
226
        tree.attr['attr2'] = "value2"
227
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
228
229
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
230
231
232
233
234
235
236
237
238
239
240
241
242
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
243
244
245
246
247
248
249
250
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
251

252
253
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
254

255
        xml = tree.as_xml(inline_tags={'A'})
256
        assert xml == "<A><B>C</B><D>E</D></A>", xml
257

258
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
259

260
        tree.attr['xml:space'] = 'preserve'
261
        xml = tree.as_xml()
262
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
263

264
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
265

266
267
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
268
        xml = tree.as_xml(inline_tags={'A'})
269
270
271
272
273
274
275
276
277
278
279
280
281
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
282

di68kap's avatar
di68kap committed
283

284
if __name__ == "__main__":
285
    from DHParser.testing import runner
286
    runner("", globals())