Commit 48220a99 authored by di68kap's avatar di68kap

- Refactoring: AST-Transformation must now be passed as a function to...

- Refactoring: AST-Transformation must now be passed as a function to full_compilation(), compileDSL etc. This allows more flexibility in doing
custom validation of the syntax tree before, after or while AST transformation.
parent 93fc373b
......@@ -29,7 +29,7 @@ try:
except ImportError:
import re
from .ebnf import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler, grammar_changed
from .ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler, grammar_changed
from .toolkit import load_if_file, is_python_code, compile_python_object
from .parsers import GrammarBase, CompilerBase, full_compilation, nil_scanner
from .syntaxtree import Node
......@@ -139,7 +139,7 @@ def get_grammar_instance(grammar):
parser_py, errors, AST = grammar_src, '', None
else:
parser_py, errors, AST = full_compilation(grammar_src, None,
EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
EBNFGrammar(), EBNFTransform, EBNFCompiler())
if errors:
raise GrammarError('\n\n'.join(errors), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
......@@ -154,7 +154,7 @@ def get_grammar_instance(grammar):
return parser_root, grammar_src
def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler,
def compileDSL(text_or_file, dsl_grammar, ast_transformation, compiler,
scanner=nil_scanner):
"""Compiles a text in a domain specific language (DSL) with an
EBNF-specified grammar. Returns the compiled text or raises a
......@@ -165,10 +165,10 @@ def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler,
"""
assert isinstance(text_or_file, str)
assert isinstance(compiler, CompilerBase)
assert isinstance(ast_pipeline, collections.abc.Sequence) or isinstance(ast_pipeline, dict)
parser_root, grammar_src = get_grammar_instance(dsl_grammar)
src = load_if_file(text_or_file)
result, errors, AST = full_compilation(src, scanner, parser_root, ast_pipeline, compiler)
result, errors, AST = full_compilation(src, scanner, parser_root,
ast_transformation, compiler)
if errors: raise CompilationError(errors, src, grammar_src, AST)
return result
......@@ -196,7 +196,7 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
which conforms to the language defined by ``ebnf_src``.
"""
grammar = ebnf_grammar_obj or EBNFGrammar()
grammar_src = compileDSL(ebnf_src, grammar, EBNF_ASTPipeline, EBNFCompiler())
grammar_src = compileDSL(ebnf_src, grammar, EBNFTransform, EBNFCompiler())
return grammar_src if source_only else \
compile_python_object(DHPARSER_IMPORTS + grammar_src, '\w*Grammar$')
......@@ -221,11 +221,11 @@ def load_compiler_suite(compiler_suite):
else:
# assume source is an ebnf grammar
parser_py, errors, AST = full_compilation(
source, None, EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
source, None, EBNFGrammar(), EBNFTransform, EBNFCompiler())
if errors:
raise GrammarError('\n\n'.join(errors), source)
scanner = nil_scanner
ast = EBNF_ASTPipeline
ast = EBNFTransform
compiler = EBNFCompiler()
parser = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
......@@ -282,13 +282,13 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
else:
scanner = nil_scanner
parser = EBNFGrammar()
trans = EBNF_ASTPipeline
trans = EBNFTransform
compiler1 = EBNFCompiler(compiler_name, source_file)
result, errors, ast = full_compilation(source_file, scanner, parser, trans, compiler1)
if errors:
return errors
elif trans == EBNF_ASTPipeline: # either an EBNF- or no compiler suite given
elif trans == EBNFTransform: # either an EBNF- or no compiler suite given
f = None
global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \
......
......@@ -32,7 +32,7 @@ from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_KEYWORD
......@@ -124,7 +124,7 @@ class EBNFGrammar(GrammarBase):
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_ASTTransform = {
EBNF_transformation_table = {
# AST Transformations for EBNF-grammar
"syntax":
remove_expendables,
......@@ -152,7 +152,7 @@ EBNF_ASTTransform = {
}
EBNF_AST_validation = {
EBNF_validation_table = {
# Semantic validation on the AST
"repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
......@@ -160,7 +160,9 @@ EBNF_AST_validation = {
}
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_AST_validation]
def EBNFTransform(syntax_tree):
for processing_table in [EBNF_transformation_table, EBNF_validation_table]:
traverse(syntax_tree, processing_table)
class EBNFCompilerError(Exception):
......
......@@ -964,7 +964,7 @@ class CompilerBase:
return result
def full_compilation(source, scanner, parser, AST_pipeline, compiler):
def full_compilation(source, scanner, parser, transform, compiler):
"""Compiles a source in four stages:
1. Scanning (if needed)
2. Parsing
......@@ -979,10 +979,9 @@ def full_compilation(source, scanner, parser, AST_pipeline, compiler):
scanner (funciton): text -> text. A scanner function or None,
if no scanner is needed.
parser (GrammarBase): The GrammarBase object
AST_pipeline (dict or list of dicts): A syntax-tree processing
table or a sequence of processing tables. The first of
these table usually contains the transformations for
turning the concrete into the abstract syntax tree.
transform (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
transforms it (in place) into an abstract syntax tree.
compiler (object): An instance of a class derived from
``CompilerBase`` with a suitable method for every parser
name or class.
......@@ -1014,8 +1013,7 @@ def full_compilation(source, scanner, parser, AST_pipeline, compiler):
result = None
errors = syntax_tree.collect_errors()
else:
for processing_table in smart_list(AST_pipeline):
traverse(syntax_tree, processing_table)
transform(syntax_tree)
syntax_tree.log(log_file_name, ext='.ast')
errors = syntax_tree.collect_errors()
if not errors:
......
......@@ -25,7 +25,7 @@ import sys
from functools import partial
from DHParser.dsl import compileDSL, run_compiler
from DHParser.ebnf import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.parsers import full_compilation
......@@ -37,13 +37,15 @@ def selftest(file_name):
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
result, errors, syntax_tree = full_compilation(grammar, None, parser,
EBNF_ASTPipeline, compiler)
EBNFTransform, compiler)
print(result)
if errors:
print('\n\n'.join(errors))
sys.exit(1)
else:
result = compileDSL(grammar, result, EBNF_ASTPipeline, compiler)
# compile the grammar again using the result of the previous
# compilation as parser
result = compileDSL(grammar, result, EBNFTransform, compiler)
print(result)
return result
......
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import sys
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \
is_whitespace, is_expendable
#######################################################################
#
# SCANNER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def LaTeXScanner(text):
return text
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class LaTeXGrammar(GrammarBase):
r"""Parser for a LaTeX source file, with this grammar:
# latex Grammar
@ whitespace = /[ \t]*\n?(?!\s*\n)[ \t]*/
@ comment = /%.*(?:\n|$)/
genericenv = beginenv sequence endenv
beginenv = "\begin" §( "{" name "}" )
endenv = "\end" §( "{" ::name "}" )
name = ~/\w+/
genericcmd = command [ config ] block
command = /\\\w+/
config = "[" cfgtext §"]"
sequence = { partext | parblock }
parblock = "{" { partext | parblock } §"}"
block = "{" { text | block } §"}"
partext = text | par
text = cfgtext | brackets
cfgtext = chunk | wspc | escaped
escaped = /\\[%$&]/
brackets = /[\[\]]/ # left and right square brackets: [ ]
chunk = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
wspc = /[ \t]*\n?(?!\s*\n)[ \t]*/ # whitespace, including at most one linefeed
lf = /[ \t]*\n(?!\s*\n)/ # a linefeed, but not an empty line (i.e. par)
par = /\s*\n\s*\n/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
"""
block = Forward()
parblock = Forward()
source_hash__ = "9f01a5f7c1df86e103f920fda0339d14"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*\n?(?!\s*\n)[ \t]*', comment=r'%.*(?:\n|$)')
wspL__ = ''
wspR__ = WSP__
par = RE('\\s*\\n\\s*\\n', wR='')
lf = RE('[ \\t]*\\n(?!\\s*\\n)', wR='')
wspc = RE('[ \\t]*\\n?(?!\\s*\\n)[ \\t]*', wR='')
chunk = RE('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+', wR='')
brackets = RE('[\\[\\]]', wR='')
escaped = RE('\\\\[%$&]', wR='')
cfgtext = Alternative(chunk, wspc, escaped)
text = Alternative(cfgtext, brackets)
partext = Alternative(text, par)
block.set(Sequence(Token("{"), ZeroOrMore(Alternative(text, block)), Required(Token("}"))))
parblock.set(Sequence(Token("{"), ZeroOrMore(Alternative(partext, parblock)), Required(Token("}"))))
sequence = ZeroOrMore(Alternative(partext, parblock))
config = Sequence(Token("["), cfgtext, Required(Token("]")))
command = RE('\\\\\\w+', wR='')
genericcmd = Sequence(command, Optional(config), block)
name = Capture(RE('\\w+', wR='', wL=WSP__), "name")
endenv = Sequence(Token("\\end"), Required(Sequence(Token("{"), Pop(name), Token("}"))))
beginenv = Sequence(Token("\\begin"), Required(Sequence(Token("{"), name, Token("}"))))
genericenv = Sequence(beginenv, sequence, endenv)
root__ = genericenv
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
LaTeX_ASTTransform = {
# AST Transformations for the LaTeX-grammar
"genericenv": no_operation,
"beginenv": no_operation,
"endenv": no_operation,
"name": no_operation,
"genericcmd": no_operation,
"command": no_operation,
"config": no_operation,
"sequence": no_operation,
"parblock": no_operation,
"block": no_operation,
"partext": no_operation,
"text": no_operation,
"cfgtext": no_operation,
"escaped": no_operation,
"brackets": no_operation,
"chunk": no_operation,
"wspc": no_operation,
"lf": no_operation,
"par": no_operation,
"": no_operation
}
LaTeX_ASTPipeline = [LaTeX_ASTTransform]
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class LaTeXCompiler(CompilerBase):
"""Compiler for the abstract-syntax-tree of a LaTeX source file.
"""
def __init__(self, grammar_name="LaTeX"):
super(LaTeXCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name)
def genericenv(self, node):
return node
def beginenv(self, node):
pass
def endenv(self, node):
pass
def name(self, node):
pass
def genericcmd(self, node):
pass
def command(self, node):
pass
def config(self, node):
pass
def sequence(self, node):
pass
def parblock(self, node):
pass
def block(self, node):
pass
def partext(self, node):
pass
def text(self, node):
pass
def cfgtext(self, node):
pass
def escaped(self, node):
pass
def brackets(self, node):
pass
def chunk(self, node):
pass
def wspc(self, node):
pass
def lf(self, node):
pass
def par(self, node):
pass
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_LaTeX(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
return full_compilation(source, LaTeXScanner,
LaTeXGrammar(), LaTeX_ASTPipeline, LaTeXCompiler())
if __name__ == "__main__":
if len(sys.argv) > 1:
result, errors, ast = compile_LaTeX(sys.argv[1])
if errors:
for error in errors:
print(error)
sys.exit(1)
else:
print(result)
else:
print("Usage: LaTeX_compiler.py [FILENAME]")
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
<document>
<Alternative>
<text>
<RegExp>
Anfang
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
code block
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
``
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
<- keine Ende-Zeichen !
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
Ende
</:RegExp>
</text:RE>
</:Alternative>
</document:ZeroOrMore>
\ No newline at end of file
</RegExp>
</text>
</Alternative>
</document>
\ No newline at end of file
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
<document>
<Alternative>
<text>
<RegExp>
Anfang
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
code block
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
``
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
<- keine Ende-Zeichen !
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
Ende
Absatz ohne
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
</RegExp>