Commit c7b50f80 authored by Eckhart Arnold's avatar Eckhart Arnold

- various refactorings

parent 4b26772d
......@@ -21,8 +21,8 @@ Module ``DSLsupport`` contains various functions to support the
compilation of domain specific languages based on an EBNF-grammar.
"""
from functools import partial
import os
try:
import regex as re
except ImportError:
......@@ -30,11 +30,19 @@ except ImportError:
from EBNFcompiler import EBNFGrammar, EBNFCompiler, EBNFTransTable, load_if_file, md5
from logging import LOGGING
from parser import PARSER_SYMBOLS, COMPILER_SYMBOLS, GrammarBase, CompilerBase, \
full_compilation, nil_scanner
from syntaxtree import AST_SYMBOLS, Node
from parser import *
from syntaxtree import *
from version import __version__
__all__ = ['GrammarError',
'CompilationError',
'load_compiler_suite',
'compileDSL',
'run_compiler',
'source_changed']
SECTION_MARKER = """\n
#######################################################################
#
......@@ -142,6 +150,9 @@ def get_grammar_instance(grammar):
def load_compiler_suite(compiler_suite):
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
"""
global RX_SECTION_MARKER
assert isinstance(compiler_suite, str)
source = load_if_file(compiler_suite)
......@@ -189,24 +200,24 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
"""Compiles the a source file with a given compiler and writes the
result to a file.
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `run_compiler()`
returns a list of error messages or an empty list if no errors
occurred.
"""
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `run_compiler()`
returns a list of error messages or an empty list if no errors
occurred.
"""
def import_block(module, symbols):
def import_block(python_module, symbols):
"""Generates an Python-``import`` statement that imports all
alls symbols in ``symbols`` (set or other container) from
module ``module``."""
python_module ``python_module``."""
symlist = list(symbols)
grouped = [symlist[i:i + 4] for i in range(0, len(symlist), 4)]
return ("\nfrom " + module + " import "
return ("\nfrom " + python_module + " import "
+ ', \\\n '.join(', '.join(g) for g in grouped) + '\n\n')
filepath = os.path.normpath(source_file)
......
......@@ -18,29 +18,27 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import collections
# import collections
import hashlib
import keyword
from functools import partial
try:
import regex as re
except ImportError:
import re
from parser import mixin_comment, RE, Token, Required, NegativeLookahead, Optional, ZeroOrMore, \
Sequence, Alternative, Forward, OneOrMore, GrammarBase, CompilerBase, escape_re, \
sane_parser_name
from syntaxtree import replace_by_single_child, reduce_single_child, remove_expendables, \
flatten, remove_tokens, remove_brackets, TOKEN_KEYWORD, WHITESPACE_KEYWORD, Node
from parser import *
from syntaxtree import *
from version import __version__
########################################################################
#
# EBNF-Grammar-Compiler
#
########################################################################
__all__ = ['EBNFGrammar',
'EBNFTransTable',
'load_if_file',
'EBNFCompilerError',
# 'Scanner',
'md5',
'EBNFCompiler']
class EBNFGrammar(GrammarBase):
......@@ -159,8 +157,8 @@ class EBNFCompilerError(Exception):
pass
Scanner = collections.namedtuple('Scanner',
'symbol instantiation_call cls_name cls')
# Scanner = collections.namedtuple('Scanner',
# 'symbol instantiation_call cls_name cls')
def md5(*txt):
......@@ -254,10 +252,10 @@ class EBNFCompiler(CompilerBase):
(definitions[1], definitions[0]))
self.definition_names = [defn[0] for defn in definitions]
definitions.append(('wspR__', WHITESPACE_KEYWORD \
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', WHITESPACE_KEYWORD \
if 'left' in self.directives['literalws'] else "''"))
definitions.append(('wspR__', WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''"))
definitions.append((WHITESPACE_KEYWORD,
("mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')").
......@@ -346,7 +344,7 @@ class EBNFCompiler(CompilerBase):
errmsg = EBNFCompiler.AST_ERROR + " (" + str(error) + ")\n" + node.as_sexpr()
node.add_error(errmsg)
rule, defn = rule + ':error', '"' + errmsg + '"'
return (rule, defn)
return rule, defn
@staticmethod
def _check_rx(node, rx):
......@@ -377,7 +375,7 @@ class EBNFCompiler(CompilerBase):
elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0
or ('none' in value and len(value) > 1)):
or ('none' in value and len(value) > 1)):
node.add_error('Directive "literalws" allows the values '
'`left`, `right`, `both` or `none`, '
'but not `%s`' % ", ".join(value))
......@@ -473,7 +471,7 @@ class EBNFCompiler(CompilerBase):
elif 'left' in self.directives['literalws']:
name = ["wL=''"] + name
if rx[-2:] == '/~':
if not 'right' in self.directives['literalws']:
if 'right' not in self.directives['literalws']:
name = ['wR=' + WHITESPACE_KEYWORD] + name
rx = rx[:-1]
elif 'right' in self.directives['literalws']:
......
......@@ -69,7 +69,7 @@ if __name__ == "__main__":
if len(sys.argv) > 1:
_errors = run_compiler(sys.argv[1],
sys.argv[2] if len(sys.argv) > 2 else "")
if (_errors):
if _errors:
print(_errors)
sys.exit(1)
else:
......
......@@ -30,6 +30,10 @@ already exists.
import os
__all__ = ['LOGGING', 'LOGS_DIR']
LOGGING: str = "LOGS" # LOGGING = "" turns logging off!
......
......@@ -16,22 +16,94 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
Module ``parsers.py`` contains a number of classes that together
make up parser combinators for left-recursive grammers. For each
element of the extended Backus-Naur-Form as well as for a regular
expression token a class is defined. The set of classes can be used to
define a parser for (ambiguous) left-recursive grammers.
References and Acknowledgements:
Dominikus Herzberg: Objekt-orientierte Parser-Kombinatoren in Python,
Blog-Post, September, 18th 2008 on denkspuren. gedanken, ideen,
anregungen und links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/objekt-orientierte-parser-kombinatoren.html
Dominikus Herzberg: Eine einfache Grammatik für LaTeX, Blog-Post,
September, 18th 2008 on denkspuren. gedanken, ideen, anregungen und
links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/eine-einfache-grammatik-fr-latex.html
Dominikus Herzberg: Uniform Syntax, Blog-Post, February, 27th 2007 on
denkspuren. gedanken, ideen, anregungen und links rund um
informatik-themen, URL:
http://denkspuren.blogspot.de/2007/02/uniform-syntax.html
Richard A. Frost, Rahmatullah Hafiz and Paul Callaghan: Parser
Combinators for Ambiguous Left-Recursive Grammars, in: P. Hudak and
D.S. Warren (Eds.): PADL 2008, LNCS 4902, pp. 167–181, Springer-Verlag
Berlin Heidelberg 2008.
Juancarlo Añez: grako, a PEG parser generator in Python,
https://bitbucket.org/apalala/grako
"""
import copy
import os
try:
import regex as re
except ImportError:
import re
from logging import LOGGING, LOGS_DIR
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, error_messages, \
ASTTransform
from syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
error_messages, ASTTransform
__all__ = ['HistoryRecord',
'Parser',
'GrammarBase',
'RX_SCANNER_TOKEN',
'BEGIN_SCANNER_TOKEN',
'END_SCANNER_TOKEN',
'make_token',
'nil_scanner',
'ScannerToken',
'RegExp',
'RE',
'escape_re',
'Token',
'mixin_comment',
'UnaryOperator',
'NaryOperator',
'Optional',
'ZeroOrMore',
'OneOrMore',
'Sequence',
'Alternative',
'FlowOperator',
'Required',
'Lookahead',
'NegativeLookahead',
'Lookbehind',
'NegativeLookbehind',
'Capture',
'Retrieve',
'Pop',
'Forward',
'PARSER_SYMBOLS',
'sane_parser_name',
'CompilerBase',
'full_compilation',
'COMPILER_SYMBOLS']
LEFT_RECURSION_DEPTH = 10 # because of pythons recursion depth limit, this
# value ought not to be set too high
# value ought not to be set too high
MAX_DROPOUTS = 25 # stop trying to recover parsing after so many errors
......@@ -169,7 +241,7 @@ class Parser(metaclass=ParserMetaClass):
def apply(self, func):
"""Applies function `func(parser)` recursively to this parser and all
descendendants of the tree of parsers. The same function can never
descendants of the tree of parsers. The same function can never
be applied twice between calls of the ``reset()``-method!
"""
if func in self.cycle_detection:
......@@ -320,6 +392,7 @@ class GrammarBase:
write_log(errors_only, '_errors')
########################################################################
#
# Token and Regular Expression parser classes (i.e. leaf classes)
......@@ -327,7 +400,6 @@ class GrammarBase:
########################################################################
RX_SCANNER_TOKEN = re.compile('\w+')
BEGIN_SCANNER_TOKEN = '\x1b'
END_SCANNER_TOKEN = '\x1c'
......@@ -347,7 +419,8 @@ def make_token(token, argument=''):
return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN
nil_scanner = lambda text: text
def nil_scanner(text):
return text
class ScannerToken(Parser):
......@@ -355,7 +428,7 @@ class ScannerToken(Parser):
assert isinstance(scanner_token, str) and scanner_token and \
scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token, name=TOKEN_KEYWORD)
super(ScannerToken, self).__init__(scanner_token)
def __call__(self, text):
if text[0:1] == BEGIN_SCANNER_TOKEN:
......@@ -400,8 +473,7 @@ class RegExp(Parser):
duplicate.regexp = self.regexp
duplicate.grammar = self.grammar
duplicate.visited = copy.deepcopy(self.visited, memo)
duplicate.recursion_counter = copy.deepcopy(self.recursion_counter,
memo)
duplicate.recursion_counter = copy.deepcopy(self.recursion_counter, memo)
return duplicate
def __call__(self, text):
......
......@@ -27,6 +27,31 @@ from typing import NamedTuple
from logging import LOGGING, LOGS_DIR
__all__ = ['WHITESPACE_KEYWORD',
'TOKEN_KEYWORD',
'line_col',
'ZOMBIE_PARSER',
'Error',
'Node',
'error_messages',
'ASTTransform',
'no_transformation',
'replace_by_single_child',
'reduce_single_child',
'is_whitespace',
'is_empty',
'is_expendable',
'is_token',
'remove_children_if',
'remove_whitespace',
'remove_expendables',
'remove_tokens',
'flatten',
'remove_brackets',
'AST_SYMBOLS']
WHITESPACE_KEYWORD = 'WSP__'
TOKEN_KEYWORD = 'TOKEN__'
......@@ -373,10 +398,8 @@ def ASTTransform(node, transtable):
"""
# normalize transformation entries by turning single transformations
# into lists with a single item
table = {name: transformation
if isinstance(transformation, collections.abc.Sequence)
else [transformation]
for name, transformation in list(transtable.items())}
table = {name: transformation if isinstance(transformation, collections.abc.Sequence)
else [transformation] for name, transformation in list(transtable.items())}
table = expand_table(table)
def recursive_ASTTransform(nd):
......@@ -456,7 +479,7 @@ def is_expendable(node):
return is_empty(node) or is_whitespace(node) # or is_scanner_token(node)
def is_token(node, token_set={}):
def is_token(node, token_set=frozenset()):
return node.parser.name == TOKEN_KEYWORD and (not token_set or node.result in token_set)
......@@ -472,7 +495,7 @@ remove_whitespace = partial(remove_children_if, condition=is_whitespace)
remove_expendables = partial(remove_children_if, condition=is_expendable)
def remove_tokens(node, tokens=set()):
def remove_tokens(node, tokens=frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment