2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 13.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33
from DHParser.parse import RE, Grammar
34

35

36
37
class TestParseSxpression:
    def test_parse_s_expression(self):
38
39
40
41
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
42
        try:
43
44
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
45
46
47
48
                          "if argument is not a tree!"
        except ValueError:
            pass

49
50
51
52
53
54
55
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
56
57
58
59
60
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
61
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
62
63
64
65
66
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
67

68
69
70
71
72
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

73

74
75
76
77
78
79
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
80
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
81
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
82
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
83

84
85
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
86
        tree.with_pos(0)
87
88
        tree_copy = copy.deepcopy(tree)

89
        assert tree.equals(tree_copy)
90
91
92
93
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
94
        assert not tree_copy.errors
95
96
97
98
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
99
100
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
101
102
        # print(tree.as_sxpr())
        # print(parse_sxpr('(a (b c) (d x))').as_sxpr())
103
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
104
105
106
107

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

108
109
110
111
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
112
113
114
115
116
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

117
    def test_find(self):
118
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
119
120
        assert len(found) == 1
        assert found[0].result == 'e'
121
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
122
123
124
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

125
    def test_equality1(self):
126
127
128
129
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
130
131
132

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
133
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
134
                "factor": [remove_expendables, reduce_single_child],
135
                "*": [remove_expendables, replace_by_single_child]}
136
        parser = grammar_provider(ebnf)()
137
138
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
139
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
140
        assert tree.equals(compare_tree), tree.as_sxpr()
141

142
143
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
144
        assert cpy.equals(self.unique_tree)
145
146
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
147
        assert not cpy.equals(self.unique_tree)
148

149
150
151
152
153
154
155
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
156
        tree = parser(ebnf)
157
158
159
160
161
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
162
163
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
164
165
166
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
167
168
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
169
170
        transform(tree)
        res4 = compiler(tree)
171
172
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
173

174
175
    def test_len_and_pos(self):
        """Test len-property of Node."""
176
        nd1 = Node(ZOMBIE_TAG, "123")
177
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
178
        nd2 = Node(ZOMBIE_TAG, "456")
179
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
180
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
181
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
182
        nd.with_pos(0)
183
184
185
186
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

187
    def test_xml_sanitizer(self):
188
        node = Node('tag', '<&>')
189
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
190

di68kap's avatar
di68kap committed
191
192
193

class TestRootNode:
    def test_error_handling(self):
194
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
195
        tree.with_pos(0)
di68kap's avatar
di68kap committed
196
        root = RootNode()
eckhart's avatar
eckhart committed
197
198
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
199
        root.swallow(tree)
di68kap's avatar
di68kap committed
200
        assert root.error_flag
201
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
202
        assert root.error_flag
203
204
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
205
206
207
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

208
209
210
211
212
213
    def test_error_reporting(self):
        number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
214

215
class TestNodeFind():
216
    """Test the select-functions of class Node.
217
218
219
220
221
222
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
223
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
224
        matches = list(tree.select(matchf))
225
226
227
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
228
229
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
230
231
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
232
        assert list(tree.select(matchf2, include_root=True))
233
        assert not list(tree.select(matchf2, include_root=False))
234
235

    def test_getitem(self):
236
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
237
238
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
239
240
241
242
243
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
244
        matches = list(tree.select_by_tag('X', False))
245
246
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
247

248
    def test_contains(self):
249
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
250
        assert 'a' not in tree
251
252
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
253
254
255
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
256
        assert 'c' not in tree
257
        assert any(tree.select_by_tag('c', False))
258
259


260
class TestSerialization:
di68kap's avatar
di68kap committed
261
262
263
264
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
265
        tree = parse_sxpr('(A "B")')
266
        tree.attr['attr'] = "value"
267
        tree2 = parse_sxpr('(A `(attr "value") "B")')
268
        assert tree.as_sxpr() ==  tree2.as_sxpr()
269
        tree.attr['attr2'] = "value2"
270
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
271
272
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
273
274
275
276
277
278
279
280
281
282
283
284
285
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
286
287
288
289
290
291
292
293
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
294

295
296
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
297

298
        xml = tree.as_xml(inline_tags={'A'})
299
        assert xml == "<A><B>C</B><D>E</D></A>", xml
300

301
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
302

303
        tree.attr['xml:space'] = 'preserve'
304
        # print(tree.attr)
305
        xml = tree.as_xml()
306
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
307

308
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
309

310
311
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
312
        xml = tree.as_xml(inline_tags={'A'})
313
314
315
316
317
318
319
320
321
322
323
324
325
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
326

di68kap's avatar
di68kap committed
327

328
if __name__ == "__main__":
329
    from DHParser.testing import runner
330
    runner("", globals())