In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 139d6128 authored by di68kap's avatar di68kap

- some major refactorings

parent 1e552471
......@@ -22,17 +22,17 @@ compilation of domain specific languages based on an EBNF-grammar.
"""
from functools import partial
import collections
import os
try:
import regex as re
except ImportError:
import re
from EBNFcompiler import *
from toolkit import *
from parsercombinators import *
from syntaxtree import *
from version import __version__
from EBNFcompiler import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from toolkit import IS_LOGGING, load_if_file, is_python_code, md5, compile_python_object
from parsercombinators import GrammarBase, CompilerBase, full_compilation, nil_scanner
from syntaxtree import Node
__all__ = ['GrammarError',
......@@ -58,7 +58,7 @@ SCANNER_SECTION = "SCANNER SECTION - Can be edited. Changes will be preserved."
PARSER_SECTION = "PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!"
AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
END_SECTIONS_MARKER = "END OF PYDSL-SECTIONS"
END_SECTIONS_MARKER = "END OF DHPARSER-SECTIONS"
class GrammarError(Exception):
......@@ -86,22 +86,22 @@ class CompilationError(Exception):
return self.error_messages
def compile_python_object(python_src, obj_name_ending="Grammar"):
"""Compiles the python source code and returns the object the name of which
ends with `obj_name_ending`.
"""
code = compile(python_src, '<string>', 'exec')
module_vars = globals()
allowed_symbols = PARSER_SYMBOLS | AST_SYMBOLS | COMPILER_SYMBOLS
namespace = {k: module_vars[k] for k in allowed_symbols}
exec(code, namespace) # safety risk?
for key in namespace.keys():
if key.endswith(obj_name_ending):
obj = namespace[key]
break
else:
obj = None
return obj
DHPARSER_IMPORTS = """
from functools import partial
try:
import regex as re
except ImportError:
import re
from parsercombinators import GrammarBase, CompilerBase, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment
from syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \\
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \\
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \\
is_whitespace, is_expendable
"""
def get_grammar_instance(grammar):
......@@ -117,10 +117,10 @@ def get_grammar_instance(grammar):
parser_py, errors, AST = grammar_src, '', None
else:
parser_py, errors, AST = full_compilation(grammar_src,
EBNFGrammar(), EBNFTransTable, EBNFCompiler())
EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
if errors:
raise GrammarError(errors, grammar_src)
parser_root = compile_python_object(parser_py, 'Grammar')()
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
else:
# assume that dsl_grammar is a ParserHQ-object or Grammar class
grammar_src = ''
......@@ -146,35 +146,34 @@ def load_compiler_suite(compiler_suite):
except ValueError as error:
raise ValueError('File "' + compiler_suite + '" seems to be corrupted. '
'Please delete or repair file manually.')
scanner = compile_python_object(scanner_py, 'Scanner')
ast = compile_python_object(ast_py, 'TransTable')
compiler = compile_python_object(compiler_py, 'Compiler')
scanner = compile_python_object(DHPARSER_IMPORTS + scanner_py, '\w*Scanner$')
ast = compile_python_object(DHPARSER_IMPORTS + ast_py, '\w*Pipeline$')
compiler = compile_python_object(DHPARSER_IMPORTS + compiler_py, '\w*Compiler$')
else:
# assume source is an ebnf grammar
parser_py, errors, AST = full_compilation(
source, EBNFGrammar(), EBNFTransTable, EBNFCompiler())
source, EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
if errors:
raise GrammarError(errors, source)
scanner = nil_scanner
ast = EBNFTransTable
ast = EBNF_ASTPipeline
compiler = EBNFCompiler()
parser = compile_python_object(parser_py, 'Grammar')()
parser = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
return scanner, parser, ast, compiler
def compileDSL(text_or_file, dsl_grammar, trans_table, compiler,
def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler,
scanner=nil_scanner):
"""Compiles a text in a domain specific language (DSL) with an
EBNF-specified grammar. Returns the compiled text.
"""
assert isinstance(text_or_file, str)
assert isinstance(compiler, CompilerBase)
assert isinstance(trans_table, dict)
assert isinstance(ast_pipeline, collections.abc.Sequence) or isinstance(ast_pipeline, dict)
parser_root, grammar_src = get_grammar_instance(dsl_grammar)
src = scanner(load_if_file(text_or_file))
result, errors, AST = full_compilation(src, parser_root, trans_table,
compiler)
result, errors, AST = full_compilation(src, parser_root, ast_pipeline, compiler)
if errors: raise CompilationError(errors, src, grammar_src, AST)
return result
......@@ -194,8 +193,8 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None):
which conforms to the language defined by ``ebnf_src``
"""
grammar = ebnf_grammar_obj or EBNFGrammar()
grammar_src = compileDSL(ebnf_src, grammar, EBNFTransTable, EBNFCompiler())
return compile_python_object(grammar_src)
grammar_src = compileDSL(ebnf_src, grammar, EBNF_ASTPipeline, EBNFCompiler())
return compile_python_object(DHPARSER_IMPORTS + grammar_src, '\w*Grammar$')
def run_compiler(source_file, compiler_suite="", extension=".xml"):
......@@ -212,16 +211,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
returns a list of error messages or an empty list if no errors
occurred.
"""
def import_block(python_module, symbols):
"""Generates an Python-``import`` statement that imports all
alls symbols in ``symbols`` (set or other container) from
python_module ``python_module``."""
symlist = list(symbols)
grouped = [symlist[i:i + 3] for i in range(0, len(symlist), 3)]
return ("\nfrom " + python_module + " import "
+ ', \\\n '.join(', '.join(g) for g in grouped))
filepath = os.path.normpath(source_file)
with open(source_file, encoding="utf-8") as f:
source = f.read()
......@@ -232,14 +221,14 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
else:
scanner = nil_scanner
parser = EBNFGrammar()
trans = EBNFTransTable
trans = EBNF_ASTPipeline
compiler = EBNFCompiler(os.path.basename(rootname), source)
result, errors, ast = full_compilation(scanner(source), parser,
trans, compiler)
if errors:
return errors
elif trans == EBNFTransTable: # either an EBNF- or no compiler suite given
elif trans == EBNF_ASTPipeline: # either an EBNF- or no compiler suite given
f = None
global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \
......@@ -250,8 +239,7 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source)
except (PermissionError, FileNotFoundError, IOError) as error:
intro, outro = '', ''
syms = 'import re\n' + import_block("DHParser.syntaxtree", AST_SYMBOLS)
syms += import_block("DHParser.parser", PARSER_SYMBOLS | {'CompilerBase'}) + '\n\n'
syms = DHPARSER_IMPORTS
scanner = compiler.gen_scanner_skeleton()
ast = compiler.gen_AST_skeleton()
compiler = compiler.gen_compiler_skeleton()
......
......@@ -29,8 +29,10 @@ except ImportError:
from toolkit import load_if_file, escape_re, md5, sane_parser_name
from parsercombinators import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase
from syntaxtree import *
from version import __version__
from syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
WHITESPACE_KEYWORD
from __init__ import __version__
__all__ = ['EBNFGrammar',
......@@ -111,7 +113,7 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
EBNFTransTable = {
EBNF_ASTTransform = {
# AST Transformations for EBNF-grammar
"syntax":
remove_expendables,
......@@ -137,6 +139,9 @@ EBNFTransTable = {
}
EBNF_ASTPipeline = [EBNF_ASTTransform]
class EBNFCompilerError(Exception):
"""Error raised by `EBNFCompiler` class. (Not compilation errors
in the strict sense, see `CompilationError` below)"""
......@@ -190,12 +195,14 @@ class EBNFCompiler(CompilerBase):
if not self.definition_names:
raise EBNFCompilerError('Compiler has not been run before calling '
'"gen_AST_Skeleton()"!')
transtable = [self.grammar_name + 'TransTable = {',
tt_name = self.grammar_name + '_ASTTransform'
pl_name = self.grammar_name + '_ASTPipeline'
transtable = [tt_name + ' = {',
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "": no_transformation', '}', '']
transtable.append(' "' + name + '": no_operation,')
transtable += [' "": no_operation', '}', '', pl_name + ' = [%s]' % tt_name]
return '\n'.join(transtable)
def gen_compiler_skeleton(self):
......
......@@ -582,7 +582,7 @@ def remove_brackets(node):
AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'no_transformation', 'remove_children_if',
'no_operation', 'remove_children_if',
'is_whitespace', 'is_expendable', 'remove_whitespace',
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables', 'flatten', 'remove_tokens',
......@@ -1608,8 +1608,8 @@ class EBNFCompiler(CompilerBase):
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "": no_transformation', '}', '']
transtable.append(' "' + name + '": no_operation,')
transtable += [' "": no_operation', '}', '']
return '\n'.join(transtable)
def gen_compiler_skeleton(self):
......
#!/usr/bin/python3
"""version.py - Version number of DHParser
"""__init__.py - package definition module for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
......@@ -21,3 +21,4 @@ permissions and limitations under the License.
import os
__version__ = '0.5.4' + '_dev' + str(os.stat(__file__).st_mtime)
__all__ = ['toolkit', 'syntaxtree', 'parsercombinators', 'EBNFcompiler', 'DSLsupport']
......@@ -23,7 +23,7 @@ import sys
from functools import partial
from DSLsupport import compileDSL, run_compiler
from EBNFcompiler import EBNFGrammar, EBNFTransTable, EBNFCompiler
from EBNFcompiler import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from parsercombinators import full_compilation
......@@ -35,13 +35,13 @@ def selftest(file_name):
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
result, errors, syntax_tree = full_compilation(grammar,
parser, EBNFTransTable, compiler)
parser, EBNF_ASTPipeline, compiler)
print(result)
if errors:
print(errors)
sys.exit(1)
else:
result = compileDSL(grammar, result, EBNFTransTable, compiler)
result = compileDSL(grammar, result, EBNF_ASTPipeline, compiler)
print(result)
return result
......
......@@ -649,7 +649,7 @@ def remove_enclosing_delimiters(node):
AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'no_transformation', 'remove_children_if', 'is_whitespace',
'no_operation', 'remove_children_if', 'is_whitespace',
'is_comment', 'is_scanner_token', 'is_expendable',
'remove_whitespace', 'remove_comments',
'remove_scanner_tokens', 'remove_expendables', 'flatten',
......@@ -1634,8 +1634,8 @@ class EBNFCompiler(CompilerBase):
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "": no_transformation', '}', '']
transtable.append(' "' + name + '": no_operation,')
transtable += [' "": no_operation', '}', '']
return '\n'.join(transtable)
def gen_compiler_skeleton(self):
......
......@@ -59,7 +59,7 @@ try:
except ImportError:
import re
from toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name
from toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name, sequence
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
error_messages, traverse
......@@ -94,10 +94,8 @@ __all__ = ['HistoryRecord',
'Retrieve',
'Pop',
'Forward',
'PARSER_SYMBOLS',
'CompilerBase',
'full_compilation',
'COMPILER_SYMBOLS']
'full_compilation']
LEFT_RECURSION_DEPTH = 10 # because of pythons recursion depth limit, this
......@@ -827,14 +825,6 @@ class Forward(Parser):
self.parser.apply(func)
PARSER_SYMBOLS = {'RegExp', 'mixin_comment', 'RE', 'Token', 'Required',
'Lookahead', 'NegativeLookahead', 'Optional',
'Lookbehind', 'NegativeLookbehind',
'ZeroOrMore', 'Sequence', 'Alternative', 'Forward',
'OneOrMore', 'GrammarBase', 'Capture', 'Retrieve',
'Pop'}
#######################################################################
#
# Syntax driven compilation support
......@@ -856,7 +846,7 @@ class CompilerBase:
return compiler(node)
def full_compilation(source, grammar_base, AST_transformations, compiler):
def full_compilation(source, grammar_base, AST_pipeline, compiler):
"""Compiles a source in three stages:
1. Parsing
2. AST-transformation
......@@ -867,9 +857,10 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
Paraemters:
source (str): The input text for compilation
grammar_base (GrammarBase): The GrammarBase object
AST_transformations (dict): The transformation-table that
assigns AST transformation functions to parser names (see
function ``syntaxtree.traverse``)
AST_pipeline (dict or list of dicts): A syntax-tree processing
table or a sequence of processing tables. The first of
these table usually contains the transformations for
turning the concrete into the abstract syntax tree.
compiler (object): An instance of a class derived from
``CompilerBase`` with a suitable method for every parser
name or class.
......@@ -897,7 +888,8 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
if syntax_tree.error_flag:
result = None
else:
traverse(syntax_tree, AST_transformations)
for processing_table in sequence(AST_pipeline):
traverse(syntax_tree, processing_table)
syntax_tree.log(log_file_name, ext='.ast')
result = compiler.compile__(syntax_tree)
errors = syntax_tree.collect_errors()
......@@ -905,4 +897,3 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
return result, messages, syntax_tree
COMPILER_SYMBOLS = {'CompilerBase', 'Node', 're'}
......@@ -40,7 +40,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'error_messages',
'compact_sexpr',
'traverse',
'no_transformation',
'no_operation',
'replace_by_single_child',
'reduce_single_child',
'is_whitespace',
......@@ -52,8 +52,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'remove_expendables',
'remove_tokens',
'flatten',
'remove_enclosing_delimiters',
'AST_SYMBOLS']
'remove_enclosing_delimiters']
class ZombieParser:
......@@ -391,7 +390,7 @@ WHITESPACE_KEYWORD = 'WSP__'
TOKEN_KEYWORD = 'TOKEN__'
def traverse(node, calltable):
def traverse(root_node, processing_table):
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback functions registered
in the ``calltable``-dictionary.
......@@ -400,10 +399,10 @@ def traverse(node, calltable):
into an abstract tree (AST) or the semantic analysis of the AST.
Args:
node (Node): The root-node of the syntax tree to be traversed
calltable (dict): parser.name -> sequence of functions that
will be applied to the current node in order. This
dictionary is interpreted as a ``compact_table``. See
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): parser.name -> sequence of functions that
will be applied to matching nodes in order. This dictionary
is interpreted as a ``compact_table``. See
``toolkit.expand_table`` or ``EBNFCompiler.EBNFTransTable``
Example:
......@@ -411,24 +410,24 @@ def traverse(node, calltable):
"factor, flowmarker, retrieveop": replace_by_single_child }
traverse(node, table)
"""
# normalize calltable entries by turning single values into lists
# normalize processing_table entries by turning single values into lists
# with a single value
table = {name: sequence(call) for name, call in list(calltable.items())}
table = {name: sequence(call) for name, call in list(processing_table.items())}
table = expand_table(table)
def traverse_recursive(nd):
if nd.children:
for child in nd.result:
def traverse_recursive(node):
if node.children:
for child in node.result:
traverse_recursive(child)
sequence = table.get(nd.parser.name,
table.get('~', [])) + table.get('*', [])
sequence = table.get(node.parser.name,
table.get('~', [])) + table.get('*', [])
for call in sequence:
call(nd)
call(node)
traverse_recursive(node)
traverse_recursive(root_node)
def no_transformation(node):
def no_operation(node):
pass
......@@ -551,10 +550,3 @@ def remove_enclosing_delimiters(node):
node.result = node.result[1:-1]
AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'no_transformation', 'remove_children_if',
'is_whitespace', 'is_expendable', 'remove_whitespace',
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables', 'flatten', 'remove_tokens',
'remove_enclosing_delimiters',
'TOKEN_KEYWORD', 'WHITESPACE_KEYWORD', 'partial'}
......@@ -6,22 +6,20 @@
#
#######################################################################
import re
from DHParser.syntaxtree import remove_whitespace, no_transformation, replace_by_single_child, \
is_expendable, remove_children_if, TOKEN_KEYWORD, \
remove_brackets, partial, flatten, \
remove_expendables, WHITESPACE_KEYWORD, is_whitespace, \
remove_tokens, reduce_single_child
from DHParser.parser import mixin_comment, Required, Pop, \
ZeroOrMore, Token, CompilerBase, \
Sequence, Retrieve, Lookahead, \
GrammarBase, Optional, NegativeLookbehind, \
RegExp, Lookbehind, Capture, \
NegativeLookahead, Alternative, OneOrMore, \
Forward, RE
from functools import partial
try:
import regex as re
except ImportError:
import re
from parsercombinators import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment
from syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \
is_whitespace, is_expendable
#######################################################################
......@@ -49,14 +47,14 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "4a1025732f79bf6787d1f753cbec7fc3"
source_hash__ = "48a3fd5a35aeaa7ce1729e09c65594b0"
parser_initialization__ = "upon instatiation"
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'\s*', comment=r'')
WSP__ = mixin_comment(whitespace=r'[ ]*', comment=r'')
wspL__ = ''
wspR__ = ''
text = RE('[^`]+')
delimiter_sign = RE('`+')
wspR__ = WSP__
text = RE('[^`]+', wR='')
delimiter_sign = RE('`+', wR='')
delimiter = Capture(delimiter_sign, "delimiter")
codeblock = Sequence(delimiter, ZeroOrMore(Alternative(text, Sequence(NegativeLookahead(Retrieve(delimiter)), delimiter_sign))), Pop(delimiter))
document = ZeroOrMore(Alternative(text, codeblock))
......@@ -69,16 +67,17 @@ class PopRetrieveGrammar(GrammarBase):
#
#######################################################################
PopRetrieveTransTable = {
PopRetrieve_ASTTransform = {
# AST Transformations for the PopRetrieve-grammar
"document": no_transformation,
"codeblock": no_transformation,
"delimiter": no_transformation,
"delimiter_sign": no_transformation,
"text": no_transformation,
"": no_transformation
"document": no_operation,
"codeblock": no_operation,
"delimiter": no_operation,
"delimiter_sign": no_operation,
"text": no_operation,
"": no_operation
}
PopRetrieve_ASTPipeline = [PopRetrieve_ASTTransform]
#######################################################################
#
......
......@@ -35,6 +35,10 @@ already exists.
import collections
import hashlib
import os
try:
import regex as re
except ImportError:
import re
__all__ = ['logging_on',
......@@ -193,4 +197,20 @@ def sane_parser_name(name):
"""Checks whether given name is an acceptable parser name. Parser names
must not be preceeded or succeeded by a double underscore '__'!
"""
return name and name[:2] != '__' and name[-2:] != '__'
\ No newline at end of file
return name and name[:2] != '__' and name[-2:] != '__'
def compile_python_object(python_src, catch_obj_regex):
"""Compiles the python source code and returns the object the name of which
ends is matched by ``catch_obj_regex``.
"""
if isinstance(catch_obj_regex, str):
catch_obj_regex = re.compile(catch_obj_regex)
code = compile(python_src, '<string>', 'exec')
namespace = {}
exec(code, namespace) # safety risk?
matches = [key for key in namespace.keys() if catch_obj_regex.match(key)]
if len(matches) > 1:
raise AssertionError("Ambigous matches for %s : %s" %
(str(catch_obj_regex), str(matches)))
return namespace[matches[0]] if matches else None
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment