11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24 25
sys.path.extend(['../', './'])

26 27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
29
    replace_by_single_child, flatten, remove_empty, remove_whitespace
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33
from DHParser.parse import RE, Grammar
34

35

36 37
class TestParseSxpression:
    def test_parse_s_expression(self):
38 39 40 41
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
42
        try:
43 44
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
45 46 47 48
                          "if argument is not a tree!"
        except ValueError:
            pass

49 50 51 52 53 54 55
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
56 57 58 59 60
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
61
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
62 63 64 65 66
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
67

68 69 70 71 72
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

73

74 75 76 77 78 79 80 81 82 83 84
class TestParseJSON:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        d = tree.pick('d')
        d.attr['name'] = "James Bond"
        d.attr['id'] = '007'
        json_obj_tree = tree.to_json_obj()
        tree_copy = Node.from_json_obj(json_obj_tree)
        assert tree_copy.equals(tree)


85 86 87 88 89 90
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
91
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
92
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
93
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
94

95 96 97 98 99 100 101 102 103 104 105
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

106 107
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
108
        tree.with_pos(0)
109 110
        tree_copy = copy.deepcopy(tree)

111
        assert tree.equals(tree_copy)
112 113 114 115
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
116
        assert not tree_copy.errors
117 118 119 120
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
121 122 123
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
124 125 126 127

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

128 129 130 131
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
132 133 134 135 136
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

137
    def test_find(self):
138
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
139 140
        assert len(found) == 1
        assert found[0].result == 'e'
141
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
142 143 144
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

145
    def test_equality1(self):
146 147 148 149
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
150 151 152

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
153 154 155
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
156
        parser = grammar_provider(ebnf)()
157 158
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
159
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
160
        assert tree.equals(compare_tree), tree.as_sxpr()
161

162 163
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
164
        assert cpy.equals(self.unique_tree)
165 166
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
167
        assert not cpy.equals(self.unique_tree)
168

169 170 171 172 173 174 175
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
176
        tree = parser(ebnf)
177 178 179 180 181
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
182 183
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
184 185 186
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
187 188
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
189 190
        transform(tree)
        res4 = compiler(tree)
191 192
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
193

194 195
    def test_len_and_pos(self):
        """Test len-property of Node."""
196
        nd1 = Node(ZOMBIE_TAG, "123")
197
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
198
        nd2 = Node(ZOMBIE_TAG, "456")
199
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
200
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
201
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
202
        nd.with_pos(0)
203 204 205 206
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

207
    def test_xml_sanitizer(self):
208
        node = Node('tag', '<&>')
209
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
210

di68kap's avatar
di68kap committed
211 212 213

class TestRootNode:
    def test_error_handling(self):
214
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
215
        tree.with_pos(0)
di68kap's avatar
di68kap committed
216
        root = RootNode()
eckhart's avatar
eckhart committed
217 218
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
219
        root.swallow(tree)
di68kap's avatar
di68kap committed
220
        assert root.error_flag
221
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
222
        assert root.error_flag
223 224
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
225 226 227
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

228
    def test_error_reporting(self):
229
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
230 231 232 233
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
234

235
class TestNodeFind():
236
    """Test the select-functions of class Node.
237 238 239 240 241 242
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
243
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
244
        matches = list(tree.select(matchf))
245 246 247
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
248 249
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
250 251
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
252
        assert list(tree.select(matchf2, include_root=True))
253
        assert not list(tree.select(matchf2, include_root=False))
254 255

    def test_getitem(self):
256
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
257 258
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
259 260 261 262 263
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
264
        matches = list(tree.select_by_tag('X', False))
265 266
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
267

268
    def test_contains(self):
269
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
270
        assert 'a' not in tree
271 272
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
273 274 275
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
276
        assert 'c' not in tree
277
        assert any(tree.select_by_tag('c', False))
278 279


280
class TestSerialization:
di68kap's avatar
di68kap committed
281 282 283 284
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
285
        tree = parse_sxpr('(A "B")')
286
        tree.attr['attr'] = "value"
287
        tree2 = parse_sxpr('(A `(attr "value") "B")')
288
        assert tree.as_sxpr() ==  tree2.as_sxpr()
289
        tree.attr['attr2'] = "value2"
290
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
291 292
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
293 294 295 296 297 298 299 300 301 302 303 304 305
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
306 307 308 309 310 311 312 313
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
314

315 316
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
317

318
        xml = tree.as_xml(inline_tags={'A'})
319
        assert xml == "<A><B>C</B><D>E</D></A>", xml
320

321
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
322

323
        tree.attr['xml:space'] = 'preserve'
324
        xml = tree.as_xml()
325
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
326

327
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
328

329 330
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
331
        xml = tree.as_xml(inline_tags={'A'})
332 333 334 335 336 337 338 339 340 341 342 343 344
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
345

di68kap's avatar
di68kap committed
346

347
if __name__ == "__main__":
348
    from DHParser.testing import runner
349
    runner("", globals())