Commit 3004d9b7 authored by eckhart's avatar eckhart
Browse files

- moved compilation support to a separate module "compile.py"

parent 3f663703
......@@ -18,6 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
from .compile import *
from .dsl import *
from .ebnf import *
# Flat namespace for the DHParser Package. Is this a good idea...?
......
......@@ -30,9 +30,10 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.log import logging
from DHParser.parse import Grammar, Compiler, compile_source
from DHParser.parse import Grammar
from DHParser import Compiler, compile_source, TransformationFunc
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.syntaxtree import Node
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re
......
......@@ -31,10 +31,10 @@ from typing import Callable, Dict, List, Set, Tuple
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
Compiler
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser import Compiler, TransformationFunc
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table
from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
......
# parse.py - parser combinators for for DHParser
# parse.py - parser combinators for DHParser
#
# Copyright 2016 by Eckhart Arnold (arnold@badw.de)
# Bavarian Academy of Sciences an Humanities (badw.de)
......@@ -18,8 +18,8 @@
"""
Module ``parse`` contains the python classes and functions for
DHParser's packrat parser. It's central class is the
``Grammar``-class, which is the base class for any contrete
DHParser's packrat-parser. It's central class is the
``Grammar``-class, which is the base class for any concrete
Grammar. Grammar-objects are callable and parsing is done by
calling a Grammar-object with a source text as argument.
......@@ -27,35 +27,20 @@ The different parsing functions are callable descendants of class
``Parser``. Usually, they are organized in a tree and defined
within the namespace of a grammar-class. See ``ebnf.EBNFGrammar``
for an example.
Module ``parse`` furthermode contains the base class for a
compiler as well as a generic ccompiler function. Compiler
objects are also callabe receive the Abstract syntax tree (AST)
as argument and yield whatever output the compiler produces. In
most Digital Humanities applications this will be
XML-code. However, it can also be anything else, like binary
code or, as in the case of DHParser's ebnf-compiler, Python
source code.
See module ``ebnf`` for a sample of the implementation of a
compiler object.
"""
import copy
import os
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
from DHParser.error import Error, is_error, linebreaks, adjust_error_locations
from DHParser.log import is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.error import Error, linebreaks
from DHParser.log import is_logging, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
from DHParser.syntaxtree import Node, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import sane_parser_name, \
escape_control_characters, load_if_file, re
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from typing import Callable, cast, Dict, List, Set, Tuple, Union, Optional
__all__ = ('Parser',
'UnknownParserError',
......@@ -86,9 +71,7 @@ __all__ = ('Parser',
'Capture',
'Retrieve',
'Pop',
'Forward',
'Compiler',
'compile_source')
'Forward')
########################################################################
......@@ -1900,211 +1883,4 @@ class Forward(Parser):
return False
#######################################################################
#
# Syntax driven compilation support
#
#######################################################################
class Compiler:
"""
Class Compiler is the abstract base class for compilers. Compiler
objects are callable and take the root node of the abstract
syntax tree (AST) as argument and return the compiled code in a
format chosen by the compiler itself.
Subclasses implementing a compiler must define `on_XXX()`-methods
for each node name that can occur in the AST where 'XXX' is the
node's name(for unnamed nodes it is the node's ptype without the
leading colon ':').
These compiler methods take the node on which they are run as
argument. Other than in the AST transformation, which runs depth-first,
compiler methods are called forward moving starting with the root
node, and they are responsible for compiling the child nodes
themselves. This should be done by invoking the `compile(node)`-
method which will pick the right `on_XXX`-method. It is not
recommended to call the `on_XXX`-methods directly.
Attributes:
context: A list of parent nodes that ends with the currently
compiled node.
grammar_name: The name of the grammar this compiler is related to
grammar_source: The source code of the grammar this compiler is
related to.
_dirty_flag: A flag indicating that the compiler has already been
called at least once and that therefore all compilation
variables must be reset when it is called again.
"""
def __init__(self, grammar_name="", grammar_source=""):
self._reset()
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
self.context = [] # type: List[Node]
self._dirty_flag = False
def __call__(self, node: Node) -> Any:
"""
Compiles the abstract syntax tree with the root node `node` and
returns the compiled code. It is up to subclasses implementing
the compiler to determine the format of the returned data.
(This very much depends on the kind and purpose of the
implemented compiler.)
"""
if self._dirty_flag:
self._reset()
self._dirty_flag = True
result = self.compile(node)
self.propagate_error_flags(node, lazy=True)
return result
def set_grammar_name(self, grammar_name="", grammar_source=""):
"""
Changes the grammar's name and the grammar's source.
The grammar name and the source text of the grammar are
metadata about the grammar that do not affect the compilation
process. Classes inheriting from `Compiler` can use this
information to name and annotate its output.
"""
assert grammar_name == "" or re.match(r'\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
@staticmethod
def propagate_error_flags(node: Node, lazy: bool = True) -> None:
# See test_parser.TestCompilerClass.test_propagate_error()..
if not lazy or node.error_flag < Error.HIGHEST:
for child in node.children:
Compiler.propagate_error_flags(child)
node.error_flag = max(node.error_flag, child.error_flag)
if lazy and node.error_flag >= Error.HIGHEST:
return
@staticmethod
def method_name(node_name: str) -> str:
"""Returns the method name for `node_name`, e.g.::
>>> Compiler.method_name('expression')
'on_expression'
"""
return 'on_' + node_name
def fallback_compiler(self, node: Node) -> Any:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
been defined."""
if node.children:
result = tuple(self.compile(nd) for nd in node.children)
node.result = result
return node
def compile(self, node: Node) -> Any:
"""
Calls the compilation method for the given node and returns the
result of the compilation.
The method's name is derived from either the node's parser
name or, if the parser is anonymous, the node's parser's class
name by adding the prefix ``on_``.
Note that ``compile`` does not call any compilation functions
for the parsers of the sub nodes by itself. Rather, this should
be done within the compilation methods.
"""
elem = node.parser.name or node.parser.ptype[1:]
if not sane_parser_name(elem):
node.add_error("Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
try:
compiler = self.__getattribute__(self.method_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
if result is None:
raise ValueError('%s failed to return a valid compilation result!' % str(compiler))
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return result
def compile_source(source: str,
preprocessor: Optional[PreprocessorFunc], # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any
"""
Compiles a source in four stages:
1. Preprocessing (if needed)
2. Parsing
3. AST-transformation
4. Compiling.
The compilations stage is only invoked if no errors occurred in
either of the two previous stages.
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
preprocessor (function): text -> text. A preprocessor function
or None, if no preprocessor is needed.
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
transforms it (in place) into an abstract syntax tree.
compiler (function): A compiler function or compiler class
instance
Returns (tuple):
The result of the compilation as a 3-tuple
(result, errors, abstract syntax tree). In detail:
1. The result as returned by the compiler or ``None`` in case of failure
2. A list of error or warning messages
3. The root-node of the abstract syntax tree
"""
original_text = load_if_file(source)
log_file_name = logfile_basename(source, compiler)
if preprocessor is None:
source_text = original_text
source_mapping = lambda i: i
else:
source_text, source_mapping = with_source_mapping(preprocessor(original_text))
syntax_tree = parser(source_text)
if is_logging():
log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text)
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
efl = syntax_tree.error_flag
messages = syntax_tree.collect_errors(clear_errors=True)
if not is_error(efl):
transformer(syntax_tree)
efl = max(efl, syntax_tree.error_flag)
messages.extend(syntax_tree.collect_errors(clear_errors=True))
if is_logging():
log_ST(syntax_tree, log_file_name + '.ast')
if not is_error(syntax_tree.error_flag):
result = compiler(syntax_tree)
# print(syntax_tree.as_sxpr())
messages.extend(syntax_tree.collect_errors())
syntax_tree.error_flag = max(syntax_tree.error_flag, efl)
adjust_error_locations(messages, original_text, source_mapping)
return result, messages, syntax_tree
......@@ -26,12 +26,11 @@ parser classes are defined in the ``parse`` module.
import collections.abc
import copy
from functools import partial
from DHParser.error import Error, linebreaks, line_col
from DHParser.stringview import StringView
from DHParser.toolkit import re, typing
from typing import Any, Callable, cast, Iterator, List, Union, Tuple, Hashable, Optional
from DHParser.toolkit import re
from typing import Callable, cast, Iterator, List, Union, Tuple, Optional
__all__ = ('ParserBase',
......@@ -42,8 +41,7 @@ __all__ = ('ParserBase',
'ZOMBIE_PARSER',
'Node',
'mock_syntax_tree',
'flatten_sxpr',
'TransformationFunc')
'flatten_sxpr')
#######################################################################
......@@ -745,10 +743,6 @@ def mock_syntax_tree(sxpr):
node._pos = 0
return node
TransformationFunc = Union[Callable[[Node], Any], partial]
# if __name__ == "__main__":
# st = mock_syntax_tree("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
# print(st.as_sxpr())
......
......@@ -38,6 +38,7 @@ from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict
__all__ = ('TransformationDict',
'TransformationProc',
'TransformationFunc',
'ConditionFunc',
'KeyFunc',
'transformation_factory',
......@@ -96,6 +97,7 @@ __all__ = ('TransformationDict',
TransformationProc = Callable[[List[Node]], None]
TransformationDict = Dict[str, Sequence[Callable]]
TransformationFunc = Union[Callable[[Node], Any], partial]
ProcessingTableType = Dict[str, Union[Sequence[Callable], TransformationDict]]
ConditionFunc = Callable # Callable[[List[Node]], bool]
KeyFunc = Callable[[Node], str]
......@@ -846,3 +848,4 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
......@@ -25,7 +25,7 @@ import sys
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.parse import compile_source
from DHParser import compile_source
from DHParser.log import logging
EBNF_TEMPLATE = r"""-grammar
......
......@@ -63,9 +63,9 @@ Main Modules Reference
The core of DHParser are the modules containing the functionality
for the parsing and compiling process. The modules ``preprocess``,
``parse`` and ``transform`` represent particular stages of the
``parse``, ``transform`` and ``compile`` represent particular stages of the
parsing/compiling process, while ``syntaxtree`` and ``error`` define
clases for syntax trees and parser/compiler errors, respectively.
classes for syntax trees and parser/compiler errors, respectively.
Module ``preprocess``
---------------------
......@@ -91,6 +91,12 @@ Module ``transform``
.. automodule:: transform
:members:
Module ``compile``
--------------------
.. automodule:: compile
:members:
Module ``error``
----------------
......
......@@ -23,13 +23,14 @@ from DHParser import is_filename, load_if_file, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
Node, TransformationDict, TRUE_CONDITION, \
traverse, remove_children_if, merge_children, is_anonymous, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.transform import TransformationFunc
from DHParser.log import logging
......
......@@ -24,7 +24,8 @@ import os
import sys
sys.path.extend(['../', './'])
from DHParser.parse import Grammar, Compiler
from DHParser.parse import Grammar
from DHParser import Compiler
from DHParser.error import is_error
from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, grammar_provider, \
load_compiler_suite
......
......@@ -27,7 +27,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object, re
from DHParser.preprocess import nil_preprocessor
from DHParser.parse import compile_source
from DHParser import compile_source
from DHParser.error import has_errors
from DHParser.syntaxtree import WHITESPACE_PTYPE
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler
......
......@@ -26,12 +26,10 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST
from DHParser.stringview import StringView
from DHParser.error import Error
from DHParser.syntaxtree import mock_syntax_tree
from DHParser.parse import compile_source, Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, Compiler, \
UnknownParserError
from DHParser.parse import Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, UnknownParserError
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
......@@ -548,22 +546,6 @@ class TestBorderlineCases:
assert not cst.error_flag
class TestCompilerClass:
def test_error_propagations(self):
tree = mock_syntax_tree('(A (B 1) (C (D (E 2) (F 3))))')
A = tree
B = next(tree.find(lambda node: str(node) == "1"))
D = next(tree.find(lambda node: node.parser.name == "D"))
F = next(tree.find(lambda node: str(node) == "3"))
B.add_error("Error in child node")
F.add_error("Error in child's child node")
Compiler.propagate_error_flags(tree, lazy=True)
assert A.error_flag
assert not D.error_flag
Compiler.propagate_error_flags(tree, lazy=False)
assert D.error_flag
class TestUnknownParserError:
def test_unknown_parser_error(self):
gr = Grammar()
......
......@@ -25,7 +25,7 @@ limitations under the License.
from functools import partial
from DHParser.dsl import grammar_provider
from DHParser.parse import compile_source
from DHParser import compile_source
from DHParser.preprocess import make_token, tokenized_to_original_mapping, source_map, \
BEGIN_TOKEN, END_TOKEN, TOKEN_DELIMITER, SourceMapFunc, SourceMap, chain_preprocessors, \
strip_tokens
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment