Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

compile.py 14.4 KB
Newer Older
eckhart's avatar
mend  
eckhart committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
# compile.py - Syntax driven compilation support for DHParser
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.

"""
Module ``compile`` contains a skeleton class for syntax
driven compilation support. Class ``Compiler`` can serve as base
class for a compiler. Compiler objects
are callable an receive the Abstract syntax tree (AST)
as argument and yield whatever output the compiler produces. In
most Digital Humanities applications this will be
XML-code. However, it can also be anything else, like binary
code or, as in the case of DHParser's EBNF-compiler, Python
source code.

Function ``compile_source`` invokes all stages of the compilation
eckhart's avatar
eckhart committed
30
process, i.e. pre-processing, parsing, CST to AST-transformation
eckhart's avatar
mend  
eckhart committed
31 32 33 34 35 36
and compilation.

See module ``ebnf`` for a sample of the implementation of a
compiler object.
"""

eckhart's avatar
eckhart committed
37
import copy
38
from typing import Any, Optional, Tuple, List, cast
eckhart's avatar
mend  
eckhart committed
39

eckhart's avatar
eckhart committed
40
from DHParser.preprocess import with_source_mapping, PreprocessorFunc, SourceMapFunc
41
from DHParser.syntaxtree import Node, RootNode, ZOMBIE_TAG, StrictResultType
eckhart's avatar
mend  
eckhart committed
42 43
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
44
from DHParser.error import adjust_error_locations, is_error, is_fatal, Error
eckhart's avatar
mend  
eckhart committed
45
from DHParser.log import log_parsing_history, log_ST, is_logging, logfile_basename
46
from DHParser.toolkit import load_if_file
eckhart's avatar
mend  
eckhart committed
47 48


49 50 51 52 53 54
__all__ = ('CompilerError',
           'Compiler',
           'compile_source',
           'visitor_name',
           'TreeProcessor',
           'process_tree')
eckhart's avatar
eckhart committed
55 56 57


class CompilerError(Exception):
58 59
    """
    Exception raised when an error of the compiler itself is detected.
eckhart's avatar
eckhart committed
60 61
    Compiler errors are not to be confused with errors in the source
    code to be compiled, which do not raise Exceptions but are merely
62 63
    reported as an error.
    """
eckhart's avatar
eckhart committed
64 65 66
    pass


eckhart's avatar
eckhart committed
67 68 69 70 71 72 73 74 75 76 77
def visitor_name(node_name: str) -> str:
    """
    Returns the method name for `node_name`, e.g.::

        >>> visitor_name('expression')
        'on_expression'
    """
    # assert re.match(r'\w+$', node_name)
    return 'on_' + node_name


78 79 80
ROOTNODE_PLACEHOLDER = RootNode()


eckhart's avatar
mend  
eckhart committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
class Compiler:
    """
    Class Compiler is the abstract base class for compilers. Compiler
    objects are callable and take the root node of the abstract
    syntax tree (AST) as argument and return the compiled code in a
    format chosen by the compiler itself.

    Subclasses implementing a compiler must define `on_XXX()`-methods
    for each node name that can occur in the AST where 'XXX' is the
    node's name(for unnamed nodes it is the node's ptype without the
    leading colon ':').

    These compiler methods take the node on which they are run as
    argument. Other than in the AST transformation, which runs depth-first,
    compiler methods are called forward moving starting with the root
    node, and they are responsible for compiling the child nodes
    themselves. This should be done by invoking the `compile(node)`-
    method which will pick the right `on_XXX`-method. It is not
    recommended to call the `on_XXX`-methods directly.

    Attributes:
        context:  A list of parent nodes that ends with the currently
                compiled node.
104 105
        tree:  The root of the abstract syntax tree.
        source:  The source code.
eckhart's avatar
eckhart committed
106

eckhart's avatar
mend  
eckhart committed
107 108 109 110 111
        _dirty_flag:  A flag indicating that the compiler has already been
                called at least once and that therefore all compilation
                variables must be reset when it is called again.
    """

eckhart's avatar
eckhart committed
112
    def __init__(self):
113
        self.reset()
eckhart's avatar
mend  
eckhart committed
114

115 116
    def reset(self):
        # self.source = ''
117
        self.tree = ROOTNODE_PLACEHOLDER   # type: RootNode
eckhart's avatar
mend  
eckhart committed
118
        self.context = []  # type: List[Node]
119
        self._None_check = True  # type: bool
eckhart's avatar
mend  
eckhart committed
120 121
        self._dirty_flag = False

122
    def __call__(self, root: RootNode) -> Any:
eckhart's avatar
mend  
eckhart committed
123 124 125 126 127 128 129
        """
        Compiles the abstract syntax tree with the root node `node` and
        returns the compiled code. It is up to subclasses implementing
        the compiler to determine the format of the returned data.
        (This very much depends on the kind and purpose of the
        implemented compiler.)
        """
130
        assert root.tag_name != ZOMBIE_TAG
eckhart's avatar
mend  
eckhart committed
131
        if self._dirty_flag:
132
            self.reset()
eckhart's avatar
mend  
eckhart committed
133
        self._dirty_flag = True
eckhart's avatar
eckhart committed
134
        self.tree = root  # type: RootNode
135
        # self.source = source  # type: str
136
        result = self.compile(root)
eckhart's avatar
mend  
eckhart committed
137 138
        return result

139 140 141 142 143 144 145 146 147 148
    # Obsolete, because never used...
    # def compile_children(self, node: Node) -> StrictResultType:
    #     """Compiles all children of the given node and returns the tuple
    #     of the compiled children or the node's (potentially empty) result
    #     in case the node does not have any children.
    #     """
    #     if node.children:
    #         return tuple(self.compile(child) for child in node.children)
    #     else:
    #         return node.result
149

eckhart's avatar
mend  
eckhart committed
150 151 152 153 154
    def fallback_compiler(self, node: Node) -> Any:
        """This is a generic compiler function which will be called on
        all those node types for which no compiler method `on_XXX` has
        been defined."""
        if node.children:
155
            node.result = tuple(self.compile(nd) for nd in node.children)
eckhart's avatar
mend  
eckhart committed
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
        return node

    def compile(self, node: Node) -> Any:
        """
        Calls the compilation method for the given node and returns the
        result of the compilation.

        The method's name is derived from either the node's parser
        name or, if the parser is anonymous, the node's parser's class
        name by adding the prefix ``on_``.

        Note that ``compile`` does not call any compilation functions
        for the parsers of the sub nodes by itself. Rather, this should
        be done within the compilation methods.
        """
171 172 173
        elem = node.tag_name
        if elem.startswith(':'):
            elem = elem[1:]
eckhart's avatar
eckhart committed
174 175 176 177 178 179 180
        try:
            compiler = self.__getattribute__(visitor_name(elem))
        except AttributeError:
            compiler = self.fallback_compiler
        self.context.append(node)
        result = compiler(node)
        self.context.pop()
181 182 183
        if result is None and self._None_check:
            raise CompilerError('Method on_%s returned `None` instead of a valid compilation '
                                'compilation result! Turn this check of by adding '
184
                                '"self._None_check = False" to the reset()-Method of your'
185
                                'compiler class, in case on_%s actually SHOULD return None.'
186
                                % (elem, elem))
eckhart's avatar
eckhart committed
187
        return result
eckhart's avatar
mend  
eckhart committed
188 189 190 191 192


def compile_source(source: str,
                   preprocessor: Optional[PreprocessorFunc],  # str -> str
                   parser: Grammar,  # str -> Node (concrete syntax tree (CST))
193
                   transformer: TransformationFunc,  # Node (CST) -> Node (abstract ST (AST))
194
                   compiler: Compiler,  # Node (AST), Source -> Any
eckhart's avatar
eckhart committed
195
                   preserve_ast: bool = False) -> Tuple[Optional[Any], List[Error], Optional[Node]]:
eckhart's avatar
mend  
eckhart committed
196 197
    """
    Compiles a source in four stages:
eckhart's avatar
eckhart committed
198
    1. Pre-Processing (if needed)
eckhart's avatar
mend  
eckhart committed
199 200 201 202
    2. Parsing
    3. AST-transformation
    4. Compiling.

203 204 205
    The later stages AST-transformation, compilation will only be invoked if
    no fatal errors occurred in any of the earlier stages of the processing
    pipeline.
eckhart's avatar
mend  
eckhart committed
206 207 208 209 210 211 212 213 214 215 216 217

    Args:
        source (str): The input text for compilation or a the name of a
            file containing the input text.
        preprocessor (function):  text -> text. A preprocessor function
            or None, if no preprocessor is needed.
        parser (function):  A parsing function or grammar class
        transformer (function):  A transformation function that takes
            the root-node of the concrete syntax tree as an argument and
            transforms it (in place) into an abstract syntax tree.
        compiler (function): A compiler function or compiler class
            instance
eckhart's avatar
eckhart committed
218
        preserve_ast (bool): Preserves the AST-tree.
eckhart's avatar
mend  
eckhart committed
219 220 221 222 223 224

    Returns (tuple):
        The result of the compilation as a 3-tuple
        (result, errors, abstract syntax tree). In detail:
        1. The result as returned by the compiler or ``None`` in case of failure
        2. A list of error or warning messages
eckhart's avatar
eckhart committed
225 226
        3. The root-node of the abstract syntax tree if `preserve_ast` is True
           or `None` otherwise.
eckhart's avatar
mend  
eckhart committed
227
    """
eckhart's avatar
eckhart committed
228 229 230
    ast = None  # type: Optional[Node]
    original_text = load_if_file(source)  # type: str
    log_file_name = logfile_basename(source, compiler)  # type: str
231 232 233

    # preprocessing

eckhart's avatar
mend  
eckhart committed
234
    if preprocessor is None:
eckhart's avatar
eckhart committed
235 236
        source_text = original_text  # type: str
        source_mapping = lambda i: i  # type: SourceMapFunc
eckhart's avatar
mend  
eckhart committed
237 238
    else:
        source_text, source_mapping = with_source_mapping(preprocessor(original_text))
239 240 241

    # parsing

eckhart's avatar
eckhart committed
242
    syntax_tree = parser(source_text)  # type: RootNode
eckhart's avatar
mend  
eckhart committed
243 244 245 246
    if is_logging():
        log_ST(syntax_tree, log_file_name + '.cst')
        log_parsing_history(parser, log_file_name)

Eckhart Arnold's avatar
Eckhart Arnold committed
247 248 249
    # assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
    #     str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early

eckhart's avatar
mend  
eckhart committed
250
    result = None
251 252 253 254 255 256 257 258 259 260
    if not is_fatal(syntax_tree.error_flag):

        # AST-transformation

        if is_error(syntax_tree.error_flag):
            # catch Python exception, because if an error has occured
            # earlier, the syntax tree might not look like expected,
            # which could (fatally) break AST transformations.
            try:
                transformer(syntax_tree)
261
            except Exception as e:
262 263 264 265 266 267
                syntax_tree.new_error(syntax_tree,
                                      "AST-Transformation failed due to earlier parser errors. "
                                      "Crash Message: " + str(e), Error.AST_TRANSFORM_CRASH)
        else:
            transformer(syntax_tree)

eckhart's avatar
mend  
eckhart committed
268 269
        if is_logging():
            log_ST(syntax_tree, log_file_name + '.ast')
270 271

        if not is_fatal(syntax_tree.error_flag):
eckhart's avatar
eckhart committed
272 273
            if preserve_ast:
                ast = copy.deepcopy(syntax_tree)
274 275 276 277 278 279 280

            # Compilation

            if is_error(syntax_tree.error_flag):
                # assume Python crashes are merely a consequence of earlier
                # errors, so let's catch them
                try:
281
                    result = compiler(syntax_tree)
282 283 284 285 286 287 288 289 290
                except Exception as e:
                    node = compiler.context[-1] if compiler.context else syntax_tree
                    syntax_tree.new_error(
                        node, "Compilation failed, most likely, due to errors earlier "
                        "in the processing pipeline. Crash Message: " + str(e),
                        Error.COMPILER_CRASH)
            else:
                # assume Python crashes are programming mistakes, so let
                # the exceptions through
291
                result = compiler(syntax_tree)
eckhart's avatar
mend  
eckhart committed
292

eckhart's avatar
eckhart committed
293
    messages = syntax_tree.errors_sorted  # type: List[Error]
eckhart's avatar
mend  
eckhart committed
294
    adjust_error_locations(messages, original_text, source_mapping)
295
    return result, messages, ast
di68kap's avatar
di68kap committed
296 297


298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
class TreeProcessor(Compiler):
    """A special kind of Compiler class that take a tree as input (just like
    `Compiler`) but always yields a tree as result.

    The intended use case for TreeProcessor are digital-humanities-applications
    where domain specific languages often describe data structures that, again,
    most of the times are tree structures that can be serialized as XML or HTML.
    Typically, these tree structures pass through several processing stages in
    sequence that - as long as no fatal errors occur on the way - end with
    HTML-preview or a preprint-XML.

    The tree-processors can most suitably be invoked with the `process-tree()`-
    functions which makes sure that a tree-processor is only invoked if no
    fatal errors have occurred in any of the earlier stages.
    """
    def __call__(self, root: RootNode) -> RootNode:
        result = super().__call__(root)
        assert isinstance(result, RootNode)
        return cast(RootNode, result)


def process_tree(tp: TreeProcessor, tree: RootNode) -> RootNode:
    """Process a tree with the tree-processor `tp` only if no fatal error
    has occurred so far. Process, but catch any Python exceptions, in case
    any normal errors have occurred earlier in the processing pipeline.
    Don't catch Python-exceptions if not errors have occurred earlier.

    This behaviour is based on the assumption that given any non-fatal
    errors have occurred earlier, the tree passed through the pipeline
    might not be in a state that is expected by the later stages, thus if
    an exception occurs it is not really to be considered a programming
    error. Processing stages should be written with possible errors
    occurring in earlier stages in mind, though. However, because it could
    be difficult to provide for all possible kinds of badly structured
    trees resulting from errors, exceptions occurring on code processing
    potentially faulty trees will be dealt with gracefully.
    """
    assert isinstance(tp, TreeProcessor)
    if not is_fatal(tree.error_flag):
        if is_error(tree.error_flag):
            # assume Python crashes are merely a consequence of earlier
            # errors, so let's catch them
            try:
                tree = tp(tree)
            except Exception as e:
                node = tp.context[-1] if tp.context else tree
                tree.new_error(
                    node, "Tree-processing failed, most likely, due to errors earlier in "
                          "in the processing pipeline. Crash Message: " + str(e),
                    Error.TREE_PROCESSING_CRASH)
        else:
            # assume Python crashes are programming mistakes, so let
            # the exceptions through
            tree = tp(tree)
        assert isinstance(tree, RootNode)
        return tree


di68kap's avatar
di68kap committed
356
# TODO: Verify compiler against grammar, i.e. make sure that for all on_X()-methods, `X` is the name of a parser
357
# TODO: AST validation against an ASDSL-Specification