Commit c7b50f80 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- various refactorings

parent 4b26772d
...@@ -21,8 +21,8 @@ Module ``DSLsupport`` contains various functions to support the ...@@ -21,8 +21,8 @@ Module ``DSLsupport`` contains various functions to support the
compilation of domain specific languages based on an EBNF-grammar. compilation of domain specific languages based on an EBNF-grammar.
""" """
from functools import partial
import os import os
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
...@@ -30,11 +30,19 @@ except ImportError: ...@@ -30,11 +30,19 @@ except ImportError:
from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5 from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5
from logging import LOGGING from logging import LOGGING
from parser import PARSER_SYMBOLS, COMPILER_SYMBOLS, GrammarBase, CompilerBase, \ from parser import *
full_compilation, nil_scanner from syntaxtree import *
from syntaxtree import AST_SYMBOLS, Node
from version import __version__ from version import __version__
__all__ = ['GrammarError',
'CompilationError',
'load_compiler_suite',
'compileDSL',
'run_compiler',
'source_changed']
SECTION_MARKER = """\n SECTION_MARKER = """\n
####################################################################### #######################################################################
# #
...@@ -142,6 +150,9 @@ def get_grammar_instance(grammar): ...@@ -142,6 +150,9 @@ def get_grammar_instance(grammar):
def load_compiler_suite(compiler_suite): def load_compiler_suite(compiler_suite):
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
"""
global RX_SECTION_MARKER global RX_SECTION_MARKER
assert isinstance(compiler_suite, str) assert isinstance(compiler_suite, str)
source = load_if_file(compiler_suite) source = load_if_file(compiler_suite)
...@@ -189,24 +200,24 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -189,24 +200,24 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
"""Compiles the a source file with a given compiler and writes the """Compiles the a source file with a given compiler and writes the
result to a file. result to a file.
If no ``compiler_suite`` is given it is assumed that the source If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler. skeletons for a scanner, AST transformation table, and compiler.
If the Python script already exists only the parser name in the If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `run_compiler()` need to be delimited section marker blocks.). `run_compiler()`
returns a list of error messages or an empty list if no errors returns a list of error messages or an empty list if no errors
occurred. occurred.
""" """
def import_block(module, symbols): def import_block(python_module, symbols):
"""Generates an Python-``import`` statement that imports all """Generates an Python-``import`` statement that imports all
alls symbols in ``symbols`` (set or other container) from alls symbols in ``symbols`` (set or other container) from
module ``module``.""" python_module ``python_module``."""
symlist = list(symbols) symlist = list(symbols)
grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)] grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)]
return ("\nfrom " + module + " import " return ("\nfrom " + python_module + " import "
+ ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n') + ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n')
filepath = os.path.normpath(source_file) filepath = os.path.normpath(source_file)
......
...@@ -18,29 +18,27 @@ implied. See the License for the specific language governing ...@@ -18,29 +18,27 @@ implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
""" """
import collections # import collections
import hashlib import hashlib
import keyword import keyword
from functools import partial from functools import partial
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
import re import re
from parser import mixin_comment, RE, Token, Required, NegativeLookahead, Optional, ZeroOrMore, \ from parser import *
Sequence, Alternative, Forward, OneOrMore, GrammarBase, CompilerBase, escape_re, \ from syntaxtree import *
sane_parser_name
from syntaxtree import replace_by_single_child, reduce_single_child, remove_expendables, \
flatten, remove_tokens, remove_brackets, TOKEN_KEYWORD, WHITESPACE_KEYWORD, Node
from version import __version__ from version import __version__
######################################################################## __all__ = ['EBNFGrammar',
# 'EBNFTransTable',
# EBNF-Grammar-Compiler 'load_if_file',
# 'EBNFCompilerError',
######################################################################## # 'Scanner',
'md5',
'EBNFCompiler']
class EBNFGrammar(GrammarBase): class EBNFGrammar(GrammarBase):
...@@ -159,8 +157,8 @@ class EBNFCompilerError(Exception): ...@@ -159,8 +157,8 @@ class EBNFCompilerError(Exception):
pass pass
Scanner = collections.namedtuple('Scanner', # Scanner = collections.namedtuple('Scanner',
'symbol instantiation_call cls_name cls') # 'symbol instantiation_call cls_name cls')
def md5(*txt): def md5(*txt):
...@@ -254,10 +252,10 @@ class EBNFCompiler(CompilerBase): ...@@ -254,10 +252,10 @@ class EBNFCompiler(CompilerBase):
(definitions[1], definitions[0])) (definitions[1], definitions[0]))
self.definition_names = [defn[0] for defn in definitions] self.definition_names = [defn[0] for defn in definitions]
definitions.append(('wspR__', WHITESPACE_KEYWORD \ definitions.append(('wspR__', WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''")) if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', WHITESPACE_KEYWORD \ definitions.append(('wspL__', WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''")) if 'left' in self.directives['literalws'] else "''"))
definitions.append((WHITESPACE_KEYWORD, definitions.append((WHITESPACE_KEYWORD,
("mixin_comment(whitespace=" ("mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')"). "r'{whitespace}', comment=r'{comment}')").
...@@ -346,7 +344,7 @@ class EBNFCompiler(CompilerBase): ...@@ -346,7 +344,7 @@ class EBNFCompiler(CompilerBase):
errmsg = EBNFCompiler.AST_ERROR + " (" + str(error) + ")\n" + node.as_sexpr() errmsg = EBNFCompiler.AST_ERROR + " (" + str(error) + ")\n" + node.as_sexpr()
node.add_error(errmsg) node.add_error(errmsg)
rule, defn = rule + ':error', '"' + errmsg + '"' rule, defn = rule + ':error', '"' + errmsg + '"'
return (rule, defn) return rule, defn
@staticmethod @staticmethod
def _check_rx(node, rx): def _check_rx(node, rx):
...@@ -377,7 +375,7 @@ class EBNFCompiler(CompilerBase): ...@@ -377,7 +375,7 @@ class EBNFCompiler(CompilerBase):
elif key == 'literalws': elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])} value = {item.lower() for item in self.compile__(node.result[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0 if (len(value - {'left', 'right', 'both', 'none'}) > 0
or ('none' in value and len(value) > 1)): or ('none' in value and len(value) > 1)):
node.add_error('Directive "literalws" allows the values ' node.add_error('Directive "literalws" allows the values '
'`left`, `right`, `both` or `none`, ' '`left`, `right`, `both` or `none`, '
'but not `%s`' % ", ".join(value)) 'but not `%s`' % ", ".join(value))
...@@ -473,7 +471,7 @@ class EBNFCompiler(CompilerBase): ...@@ -473,7 +471,7 @@ class EBNFCompiler(CompilerBase):
elif 'left' in self.directives['literalws']: elif 'left' in self.directives['literalws']:
name = ["wL=''"] + name name = ["wL=''"] + name
if rx[-2:] == '/~': if rx[-2:] == '/~':
if not 'right' in self.directives['literalws']: if 'right' not in self.directives['literalws']:
name = ['wR=' + WHITESPACE_KEYWORD] + name name = ['wR=' + WHITESPACE_KEYWORD] + name
rx = rx[:-1] rx = rx[:-1]
elif 'right' in self.directives['literalws']: elif 'right' in self.directives['literalws']:
......
...@@ -69,7 +69,7 @@ if __name__ == "__main__": ...@@ -69,7 +69,7 @@ if __name__ == "__main__":
if len(sys.argv) > 1: if len(sys.argv) > 1:
_errors = run_compiler(sys.argv[1], _errors = run_compiler(sys.argv[1],
sys.argv[2] if len(sys.argv) > 2 else "") sys.argv[2] if len(sys.argv) > 2 else "")
if (_errors): if _errors:
print(_errors) print(_errors)
sys.exit(1) sys.exit(1)
else: else:
......
...@@ -30,6 +30,10 @@ already exists. ...@@ -30,6 +30,10 @@ already exists.
import os import os
__all__ = ['LOGGING', 'LOGS_DIR']
LOGGING: str = "LOGS" # LOGGING = "" turns logging off! LOGGING: str = "LOGS" # LOGGING = "" turns logging off!
......
...@@ -16,22 +16,94 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -16,22 +16,94 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
Module ``parsers.py`` contains a number of classes that together
make up parser combinators for left-recursive grammers. For each
element of the extended Backus-Naur-Form as well as for a regular
expression token a class is defined. The set of classes can be used to
define a parser for (ambiguous) left-recursive grammers.
References and Acknowledgements:
Dominikus Herzberg: Objekt-orientierte Parser-Kombinatoren in Python,
Blog-Post, September, 18th 2008 on denkspuren. gedanken, ideen,
anregungen und links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/objekt-orientierte-parser-kombinatoren.html
Dominikus Herzberg: Eine einfache Grammatik für LaTeX, Blog-Post,
September, 18th 2008 on denkspuren. gedanken, ideen, anregungen und
links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/eine-einfache-grammatik-fr-latex.html
Dominikus Herzberg: Uniform Syntax, Blog-Post, February, 27th 2007 on
denkspuren. gedanken, ideen, anregungen und links rund um
informatik-themen, URL:
http://denkspuren.blogspot.de/2007/02/uniform-syntax.html
Richard A. Frost, Rahmatullah Hafiz and Paul Callaghan: Parser
Combinators for Ambiguous Left-Recursive Grammars, in: P. Hudak and
D.S. Warren (Eds.): PADL 2008, LNCS 4902, pp. 167–181, Springer-Verlag
Berlin Heidelberg 2008.
Juancarlo Añez: grako, a PEG parser generator in Python,
https://bitbucket.org/apalala/grako
""" """
import copy import copy
import os import os
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
import re import re
from logging import LOGGING, LOGS_DIR from logging import LOGGING, LOGS_DIR
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, error_messages, \ from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
ASTTransform error_messages, ASTTransform
__all__ = ['HistoryRecord',
'Parser',
'GrammarBase',
'RX_SCANNER_TOKEN',
'BEGIN_SCANNER_TOKEN',
'END_SCANNER_TOKEN',
'make_token',
'nil_scanner',
'ScannerToken',
'RegExp',
'RE',
'escape_re',
'Token',
'mixin_comment',
'UnaryOperator',
'NaryOperator',
'Optional',
'ZeroOrMore',
'OneOrMore',
'Sequence',
'Alternative',
'FlowOperator',
'Required',
'Lookahead',
'NegativeLookahead',
'Lookbehind',
'NegativeLookbehind',
'Capture',
'Retrieve',
'Pop',
'Forward',
'PARSER_SYMBOLS',
'sane_parser_name',
'CompilerBase',
'full_compilation',
'COMPILER_SYMBOLS']
LEFT_RECURSION_DEPTH = 10 # because of pythons recursion depth limit, this LEFT_RECURSION_DEPTH = 10 # because of pythons recursion depth limit, this
# value ought not to be set too high # value ought not to be set too high
MAX_DROPOUTS = 25 # stop trying to recover parsing after so many errors MAX_DROPOUTS = 25 # stop trying to recover parsing after so many errors
...@@ -169,7 +241,7 @@ class Parser(metaclass=ParserMetaClass): ...@@ -169,7 +241,7 @@ class Parser(metaclass=ParserMetaClass):
def apply(self, func): def apply(self, func):
"""Applies function `func(parser)` recursively to this parser and all """Applies function `func(parser)` recursively to this parser and all
descendendants of the tree of parsers. The same function can never descendants of the tree of parsers. The same function can never
be applied twice between calls of the ``reset()``-method! be applied twice between calls of the ``reset()``-method!
""" """
if func in self.cycle_detection: if func in self.cycle_detection:
...@@ -320,6 +392,7 @@ class GrammarBase: ...@@ -320,6 +392,7 @@ class GrammarBase:
write_log(errors_only, '_errors') write_log(errors_only, '_errors')
######################################################################## ########################################################################
# #
# Token and Regular Expression parser classes (i.e. leaf classes) # Token and Regular Expression parser classes (i.e. leaf classes)
...@@ -327,7 +400,6 @@ class GrammarBase: ...@@ -327,7 +400,6 @@ class GrammarBase:
######################################################################## ########################################################################
RX_SCANNER_TOKEN = re.compile('\w+') RX_SCANNER_TOKEN = re.compile('\w+')
BEGIN_SCANNER_TOKEN = '\x1b' BEGIN_SCANNER_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c' END_SCANNER_TOKEN = '\x1c'
...@@ -347,7 +419,8 @@ def make_token(token, argument=''): ...@@ -347,7 +419,8 @@ def make_token(token, argument=''):
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN
nil_scanner = lambda text: text def nil_scanner(text):
return text
class ScannerToken(Parser): class ScannerToken(Parser):
...@@ -355,7 +428,7 @@ class ScannerToken(Parser): ...@@ -355,7 +428,7 @@ class ScannerToken(Parser):
assert isinstance(scanner_token, str) and scanner_token and \ assert isinstance(scanner_token, str) and scanner_token and \
scanner_token.isupper() scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token) assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token, name=TOKEN_KEYWORD) super(ScannerToken, self).__init__(scanner_token)
def __call__(self, text): def __call__(self, text):
if text[0:1] == BEGIN_SCANNER_TOKEN: if text[0:1] == BEGIN_SCANNER_TOKEN:
...@@ -400,8 +473,7 @@ class RegExp(Parser): ...@@ -400,8 +473,7 @@ class RegExp(Parser):
duplicate.regexp = self.regexp duplicate.regexp = self.regexp
duplicate.grammar = self.grammar duplicate.grammar = self.grammar
duplicate.visited = copy.deepcopy(self.visited, memo) duplicate.visited = copy.deepcopy(self.visited, memo)
duplicate.recursion_counter = copy.deepcopy(self.recursion_counter, duplicate.recursion_counter = copy.deepcopy(self.recursion_counter, memo)
memo)
return duplicate return duplicate
def __call__(self, text): def __call__(self, text):
......
...@@ -27,6 +27,31 @@ from typing import NamedTuple ...@@ -27,6 +27,31 @@ from typing import NamedTuple
from logging import LOGGING, LOGS_DIR from logging import LOGGING, LOGS_DIR
__all__ = ['WHITESPACE_KEYWORD',
'TOKEN_KEYWORD',
'line_col',
'ZOMBIE_PARSER',
'Error',
'Node',
'error_messages',
'ASTTransform',
'no_transformation',
'replace_by_single_child',
'reduce_single_child',
'is_whitespace',
'is_empty',
'is_expendable',
'is_token',
'remove_children_if',
'remove_whitespace',
'remove_expendables',
'remove_tokens',
'flatten',
'remove_brackets',
'AST_SYMBOLS']
WHITESPACE_KEYWORD = 'WSP__' WHITESPACE_KEYWORD = 'WSP__'
TOKEN_KEYWORD = 'TOKEN__' TOKEN_KEYWORD = 'TOKEN__'
...@@ -373,10 +398,8 @@ def ASTTransform(node, transtable): ...@@ -373,10 +398,8 @@ def ASTTransform(node, transtable):
""" """
# normalize transformation entries by turning single transformations # normalize transformation entries by turning single transformations
# into lists with a single item # into lists with a single item
table = {name: transformation table = {name: transformation if isinstance(transformation, collections.abc.Sequence)
if isinstance(transformation, collections.abc.Sequence) else [transformation] for name, transformation in list(transtable.items())}
else [transformation]
for name, transformation in list(transtable.items())}
table = expand_table(table) table = expand_table(table)
def recursive_ASTTransform(nd): def recursive_ASTTransform(nd):
...@@ -456,7 +479,7 @@ def is_expendable(node): ...@@ -456,7 +479,7 @@ def is_expendable(node):
return is_empty(node) or is_whitespace(node) # or is_scanner_token(node) return is_empty(node) or is_whitespace(node) # or is_scanner_token(node)
def is_token(node, token_set={}): def is_token(node, token_set=frozenset()):
return node.parser.name == TOKEN_KEYWORD and (not token_set or node.result in token_set) return node.parser.name == TOKEN_KEYWORD and (not token_set or node.result in token_set)
...@@ -472,7 +495,7 @@ remove_whitespace = partial(remove_children_if, condition=is_whitespace) ...@@ -472,7 +495,7 @@ remove_whitespace = partial(remove_children_if, condition=is_whitespace)
remove_expendables = partial(remove_children_if, condition=is_expendable) remove_expendables = partial(remove_children_if, condition=is_expendable)
def remove_tokens(node, tokens=set()): def remove_tokens(node, tokens=frozenset()):
"""Reomoves any among a particular set of tokens from the immediate """Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens descendants of a node. If ``tokens`` is the empty set, all tokens
are removed. are removed.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment