compile.py 11.2 KB
Newer Older
eckhart's avatar
mend  
eckhart committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# compile.py - Syntax driven compilation support for DHParser
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.

"""
Module ``compile`` contains a skeleton class for syntax
driven compilation support. Class ``Compiler`` can serve as base
class for a compiler. Compiler objects
are callable an receive the Abstract syntax tree (AST)
as argument and yield whatever output the compiler produces. In
most Digital Humanities applications this will be
XML-code. However, it can also be anything else, like binary
code or, as in the case of DHParser's EBNF-compiler, Python
source code.

Function ``compile_source`` invokes all stages of the compilation
eckhart's avatar
eckhart committed
30
process, i.e. pre-processing, parsing, CST to AST-transformation
eckhart's avatar
mend  
eckhart committed
31
32
33
34
35
36
and compilation.

See module ``ebnf`` for a sample of the implementation of a
compiler object.
"""

eckhart's avatar
eckhart committed
37
import copy
eckhart's avatar
mend  
eckhart committed
38
39
40
import re

from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc
eckhart's avatar
eckhart committed
41
from DHParser.syntaxtree import Node, RootNode, ZOMBIE_ROOTNODE, StrictResultType
eckhart's avatar
mend  
eckhart committed
42
43
44
45
46
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, Error
from DHParser.log import log_parsing_history, log_ST, is_logging, logfile_basename
from DHParser.toolkit import typing, sane_parser_name, load_if_file
47
from typing import Any, Optional, Tuple, List, Callable
eckhart's avatar
mend  
eckhart committed
48
49


eckhart's avatar
eckhart committed
50
51
52
53
__all__ = ('CompilerError', 'Compiler', 'compile_source')


class CompilerError(Exception):
54
55
    """
    Exception raised when an error of the compiler itself is detected.
eckhart's avatar
eckhart committed
56
57
    Compiler errors are not to be confused with errors in the source
    code to be compiled, which do not raise Exceptions but are merely
58
59
    reported as an error.
    """
eckhart's avatar
eckhart committed
60
61
62
    pass


eckhart's avatar
mend  
eckhart committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class Compiler:
    """
    Class Compiler is the abstract base class for compilers. Compiler
    objects are callable and take the root node of the abstract
    syntax tree (AST) as argument and return the compiled code in a
    format chosen by the compiler itself.

    Subclasses implementing a compiler must define `on_XXX()`-methods
    for each node name that can occur in the AST where 'XXX' is the
    node's name(for unnamed nodes it is the node's ptype without the
    leading colon ':').

    These compiler methods take the node on which they are run as
    argument. Other than in the AST transformation, which runs depth-first,
    compiler methods are called forward moving starting with the root
    node, and they are responsible for compiling the child nodes
    themselves. This should be done by invoking the `compile(node)`-
    method which will pick the right `on_XXX`-method. It is not
    recommended to call the `on_XXX`-methods directly.

    Attributes:
        context:  A list of parent nodes that ends with the currently
                compiled node.
eckhart's avatar
eckhart committed
86

eckhart's avatar
mend  
eckhart committed
87
88
89
90
91
        _dirty_flag:  A flag indicating that the compiler has already been
                called at least once and that therefore all compilation
                variables must be reset when it is called again.
    """

eckhart's avatar
eckhart committed
92
    def __init__(self):
93
        self._reset()
eckhart's avatar
mend  
eckhart committed
94
95

    def _reset(self):
eckhart's avatar
eckhart committed
96
        self.tree = ZOMBIE_ROOTNODE   # type: RootNode
eckhart's avatar
mend  
eckhart committed
97
98
99
        self.context = []  # type: List[Node]
        self._dirty_flag = False

100
    def __call__(self, root: RootNode) -> Any:
eckhart's avatar
mend  
eckhart committed
101
102
103
104
105
106
107
108
109
110
        """
        Compiles the abstract syntax tree with the root node `node` and
        returns the compiled code. It is up to subclasses implementing
        the compiler to determine the format of the returned data.
        (This very much depends on the kind and purpose of the
        implemented compiler.)
        """
        if self._dirty_flag:
            self._reset()
        self._dirty_flag = True
111
112
        self.tree = root
        result = self.compile(root)
eckhart's avatar
mend  
eckhart committed
113
114
        return result

115
116
117
118
119
120
121
122
123
124
125
126
    # @staticmethod
    # def propagate_error_flags(node: Node, lazy: bool = True) -> None:
    #     # See test_parser.TestCompilerClass.test_propagate_error()..
    #     """Propagates error flags from children to parent nodes to make sure
    #     that the parent's error flag is always greater or equal the maximum
    #     of the children's error flags."""
    #     if not lazy or node.error_flag < Error.HIGHEST:
    #         for child in node.children:
    #             Compiler.propagate_error_flags(child)
    #             node.error_flag = max(node.error_flag, child.error_flag)
    #             if lazy and node.error_flag >= Error.HIGHEST:
    #                 return
eckhart's avatar
mend  
eckhart committed
127
128
129

    @staticmethod
    def method_name(node_name: str) -> str:
eckhart's avatar
eckhart committed
130
131
        """
        Returns the method name for `node_name`, e.g.::
eckhart's avatar
mend  
eckhart committed
132
133
134
135

            >>> Compiler.method_name('expression')
            'on_expression'
        """
136
        assert re.match(r'\w+$', node_name)
eckhart's avatar
mend  
eckhart committed
137
138
        return 'on_' + node_name

139
140
141
142
143
144
145
146
147
148
    def compile_children(self, node: Node) -> StrictResultType:
        """Compiles all children of the given node and returns the tuple
        of the compiled children or the node's (potentially empty) result
        in case the node does not have any children.
        """
        if node.children:
            return tuple(self.compile(child) for child in node.children)
        else:
            return node.result

eckhart's avatar
mend  
eckhart committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    def fallback_compiler(self, node: Node) -> Any:
        """This is a generic compiler function which will be called on
        all those node types for which no compiler method `on_XXX` has
        been defined."""
        if node.children:
            result = tuple(self.compile(nd) for nd in node.children)
            node.result = result
        return node

    def compile(self, node: Node) -> Any:
        """
        Calls the compilation method for the given node and returns the
        result of the compilation.

        The method's name is derived from either the node's parser
        name or, if the parser is anonymous, the node's parser's class
        name by adding the prefix ``on_``.

        Note that ``compile`` does not call any compilation functions
        for the parsers of the sub nodes by itself. Rather, this should
        be done within the compilation methods.
        """
171
172
173
        elem = node.tag_name
        if elem.startswith(':'):
            elem = elem[1:]
eckhart's avatar
mend  
eckhart committed
174
        if not sane_parser_name(elem):
eckhart's avatar
eckhart committed
175
176
177
            self.tree.new_error(node, "Reserved name '%s' not allowed as parser "
                                "name! " % elem + "(Any name starting with "
                                "'_' or '__' or ending with '__' is reserved.)")
eckhart's avatar
mend  
eckhart committed
178
179
180
181
182
183
184
185
186
187
            return None
        else:
            try:
                compiler = self.__getattribute__(self.method_name(elem))
            except AttributeError:
                compiler = self.fallback_compiler
            self.context.append(node)
            result = compiler(node)
            self.context.pop()
            if result is None:
188
189
                raise CompilerError('Method on_%s returned `None` instead of a '
                                    'valid compilation result!' % elem)
eckhart's avatar
mend  
eckhart committed
190
191
192
193
194
195
196
197
198
199
200
201
            # # the following statement makes sure that the error_flag
            # # is propagated early on. Otherwise it is redundant, because
            # # the __call__ method globally propagates the node's error_flag
            # # later anyway. So, maybe it could be removed here.
            # for child in node.children:
            #     node.error_flag = node.error_flag or child.error_flag
            return result


def compile_source(source: str,
                   preprocessor: Optional[PreprocessorFunc],  # str -> str
                   parser: Grammar,  # str -> Node (concrete syntax tree (CST))
202
                   transformer: TransformationFunc,  # Node (CST) -> Node (abstract ST (AST))
eckhart's avatar
eckhart committed
203
                   compiler: Compiler,  # Node (AST) -> Any
eckhart's avatar
eckhart committed
204
                   preserve_ast: bool = False) -> Tuple[Optional[Any], List[Error], Optional[Node]]:
eckhart's avatar
mend  
eckhart committed
205
206
    """
    Compiles a source in four stages:
eckhart's avatar
eckhart committed
207
    1. Pre-Processing (if needed)
eckhart's avatar
mend  
eckhart committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    2. Parsing
    3. AST-transformation
    4. Compiling.

    The compilations stage is only invoked if no errors occurred in
    either of the two previous stages.

    Args:
        source (str): The input text for compilation or a the name of a
            file containing the input text.
        preprocessor (function):  text -> text. A preprocessor function
            or None, if no preprocessor is needed.
        parser (function):  A parsing function or grammar class
        transformer (function):  A transformation function that takes
            the root-node of the concrete syntax tree as an argument and
            transforms it (in place) into an abstract syntax tree.
        compiler (function): A compiler function or compiler class
            instance
eckhart's avatar
eckhart committed
226
        preserve_ast (bool): Preserves the AST-tree.
eckhart's avatar
mend  
eckhart committed
227
228
229
230
231
232

    Returns (tuple):
        The result of the compilation as a 3-tuple
        (result, errors, abstract syntax tree). In detail:
        1. The result as returned by the compiler or ``None`` in case of failure
        2. A list of error or warning messages
eckhart's avatar
eckhart committed
233
234
        3. The root-node of the abstract syntax tree if `preserve_ast` is True
           or `None` otherwise.
eckhart's avatar
mend  
eckhart committed
235
    """
236
    ast = None
eckhart's avatar
mend  
eckhart committed
237
238
239
240
241
242
243
    original_text = load_if_file(source)
    log_file_name = logfile_basename(source, compiler)
    if preprocessor is None:
        source_text = original_text
        source_mapping = lambda i: i
    else:
        source_text, source_mapping = with_source_mapping(preprocessor(original_text))
eckhart's avatar
eckhart committed
244
    syntax_tree = parser(source_text)  # type: RootNode
eckhart's avatar
mend  
eckhart committed
245
246
247
248
249
250
251
252
    if is_logging():
        log_ST(syntax_tree, log_file_name + '.cst')
        log_parsing_history(parser, log_file_name)

    assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
    # only compile if there were no syntax errors, for otherwise it is
    # likely that error list gets littered with compile error messages
    result = None
253
254
255
    # efl = syntax_tree.error_flag
    # messages = syntax_tree.collect_errors(clear_errors=True)
    if not is_error(syntax_tree.error_flag):
eckhart's avatar
mend  
eckhart committed
256
        transformer(syntax_tree)
257
258
        # efl = max(efl, syntax_tree.error_flag)
        # messages.extend(syntax_tree.collect_errors(clear_errors=True))
eckhart's avatar
mend  
eckhart committed
259
260
261
        if is_logging():
            log_ST(syntax_tree, log_file_name + '.ast')
        if not is_error(syntax_tree.error_flag):
eckhart's avatar
eckhart committed
262
263
            if preserve_ast:
                ast = copy.deepcopy(syntax_tree)
eckhart's avatar
mend  
eckhart committed
264
265
            result = compiler(syntax_tree)
        # print(syntax_tree.as_sxpr())
266
267
        # messages.extend(syntax_tree.collect_errors())
        # syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
eckhart's avatar
mend  
eckhart committed
268

269
    messages = syntax_tree.collect_errors()
eckhart's avatar
mend  
eckhart committed
270
    adjust_error_locations(messages, original_text, source_mapping)
271
    return result, messages, ast
di68kap's avatar
di68kap committed
272
273
274


# TODO: Verify compiler against grammar, i.e. make sure that for all on_X()-methods, `X` is the name of a parser