test_syntaxtree.py 14.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import json
24
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
25 26
sys.path.extend(['../', './'])

27 28
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
29
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
30
    replace_by_single_child, flatten, remove_empty, remove_whitespace
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import grammar_provider
33
from DHParser.error import Error
34
from DHParser.parse import RE, Grammar
35

36

37 38
class TestParseSxpression:
    def test_parse_s_expression(self):
39 40 41 42
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
43
        try:
44 45
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
46 47 48 49
                          "if argument is not a tree!"
        except ValueError:
            pass

50 51 52 53 54 55 56
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
57 58 59 60 61
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
62
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
63 64 65 66 67
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
68

69 70 71 72 73
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

74

75
class TestParseJSON:
76 77 78
    def setup(self):
        self.tree = parse_sxpr('(a (b ä) (d (e ö) (h ü)))')
        d = self.tree.pick('d')
79 80
        d.attr['name'] = "James Bond"
        d.attr['id'] = '007'
81 82 83

    def test_json_obj_roundtrip(self):
        json_obj_tree = self.tree.to_json_obj()
84
        tree_copy = Node.from_json_obj(json_obj_tree)
85 86 87 88 89 90 91 92
        assert tree_copy.equals(self.tree)

    def test_json_rountrip(self):
        s = self.tree.as_json(indent=None, ensure_ascii=True)
        tree_copy = Node.from_json_obj(json.loads(s))
        assert tree_copy.equals(self.tree)
        s = self.tree.as_json(indent=2, ensure_ascii=False)
        tree_copy = Node.from_json_obj(json.loads(s))
93 94


95 96 97 98 99 100
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
101
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
102
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
103
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
104

105 106 107 108 109 110 111 112 113 114 115
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

116 117
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
118
        tree.with_pos(0)
119 120
        tree_copy = copy.deepcopy(tree)

121
        assert tree.equals(tree_copy)
122 123 124 125
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
126
        assert not tree_copy.errors
127 128 129 130
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
131 132 133
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
134 135 136 137

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

138 139 140 141
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
142 143 144 145 146
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

147
    def test_find(self):
148
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
149 150
        assert len(found) == 1
        assert found[0].result == 'e'
151
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
152 153 154
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

155
    def test_equality1(self):
156 157 158 159
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
160 161 162

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
163 164 165
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
166
        parser = grammar_provider(ebnf)()
167 168
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
169
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
170
        assert tree.equals(compare_tree), tree.as_sxpr()
171

172 173
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
174
        assert cpy.equals(self.unique_tree)
175 176
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
177
        assert not cpy.equals(self.unique_tree)
178

179 180 181 182 183 184 185
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
186
        tree = parser(ebnf)
187 188 189 190 191
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
192 193
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
194 195 196
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
197 198
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
199 200
        transform(tree)
        res4 = compiler(tree)
201 202
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
203

204 205
    def test_len_and_pos(self):
        """Test len-property of Node."""
206
        nd1 = Node(ZOMBIE_TAG, "123")
207
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
208
        nd2 = Node(ZOMBIE_TAG, "456")
209
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
210
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
211
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
212
        nd.with_pos(0)
213 214 215 216
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

217
    def test_xml_sanitizer(self):
218
        node = Node('tag', '<&>')
219
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
220

di68kap's avatar
di68kap committed
221 222 223

class TestRootNode:
    def test_error_handling(self):
224
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
225
        tree.with_pos(0)
di68kap's avatar
di68kap committed
226
        root = RootNode()
eckhart's avatar
eckhart committed
227 228
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
229
        root.swallow(tree)
di68kap's avatar
di68kap committed
230
        assert root.error_flag
231
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
232
        assert root.error_flag
233 234
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
235 236 237
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

238
    def test_error_reporting(self):
239
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
240 241 242 243
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
244

245
class TestNodeFind():
246
    """Test the select-functions of class Node.
247 248 249 250 251 252
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
253
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
254
        matches = list(tree.select(matchf))
255 256 257
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
258 259
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
260 261
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
262
        assert list(tree.select(matchf2, include_root=True))
263
        assert not list(tree.select(matchf2, include_root=False))
264 265

    def test_getitem(self):
266
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
267 268
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
269 270 271 272 273
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
274
        matches = list(tree.select_by_tag('X', False))
275 276
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
277

278
    def test_contains(self):
279
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
280
        assert 'a' not in tree
281 282
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
283 284 285
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
286
        assert 'c' not in tree
287
        assert any(tree.select_by_tag('c', False))
288 289


290
class TestSerialization:
di68kap's avatar
di68kap committed
291 292 293 294
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
295
        tree = parse_sxpr('(A "B")')
296
        tree.attr['attr'] = "value"
297
        tree2 = parse_sxpr('(A `(attr "value") "B")')
298
        assert tree.as_sxpr() ==  tree2.as_sxpr()
299
        tree.attr['attr2'] = "value2"
300
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
301 302
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
303 304 305 306 307 308 309 310 311 312 313 314 315
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
316 317 318 319 320 321 322 323
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
324

325 326
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
327

328
        xml = tree.as_xml(inline_tags={'A'})
329
        assert xml == "<A><B>C</B><D>E</D></A>", xml
330

331
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
332

333
        tree.attr['xml:space'] = 'preserve'
334
        xml = tree.as_xml()
335
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
336

337
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
338

339 340
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
341
        xml = tree.as_xml(inline_tags={'A'})
342 343 344 345 346 347 348 349 350 351 352 353 354
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
355

di68kap's avatar
di68kap committed
356

357
if __name__ == "__main__":
358
    from DHParser.testing import runner
359
    runner("", globals())