Commit ae99c15b authored by Eckhart Arnold's avatar Eckhart Arnold

- added (primitive) name mangeling to EBNFCompiler to avoid name conflicts...

- added (primitive) name mangeling to EBNFCompiler to avoid name conflicts with names used in the grammar definition
parent 88ad74ab
...@@ -163,7 +163,6 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler) ...@@ -163,7 +163,6 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler)
CompilationError if any errors occured during compilation CompilationError if any errors occured during compilation
""" """
assert isinstance(text_or_file, str) assert isinstance(text_or_file, str)
assert isinstance(dsl_grammar, GrammarBase)
assert isinstance(compiler, CompilerBase) assert isinstance(compiler, CompilerBase)
parser_root, grammar_src = get_grammar_instance(dsl_grammar) parser_root, grammar_src = get_grammar_instance(dsl_grammar)
src = load_if_file(text_or_file) src = load_if_file(text_or_file)
......
...@@ -18,10 +18,9 @@ implied. See the License for the specific language governing ...@@ -18,10 +18,9 @@ implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
""" """
# import collections
import keyword
from functools import partial from functools import partial
import keyword
import os
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
...@@ -187,11 +186,9 @@ class EBNFCompiler(CompilerBase): ...@@ -187,11 +186,9 @@ class EBNFCompiler(CompilerBase):
'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*', 'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*',
'vertical': r'\s*'} 'vertical': r'\s*'}
def __init__(self, grammar_name="", source_text=""): def __init__(self, grammar_name="", grammar_source=""):
super(EBNFCompiler, self).__init__() super(EBNFCompiler, self).__init__()
assert grammar_name == "" or re.match('\w+\Z', grammar_name) self.set_grammar_name(grammar_name, grammar_source)
self.grammar_name = grammar_name
self.source_text = load_if_file(source_text)
self._reset() self._reset()
def _reset(self): def _reset(self):
...@@ -207,6 +204,13 @@ class EBNFCompiler(CompilerBase): ...@@ -207,6 +204,13 @@ class EBNFCompiler(CompilerBase):
'tokens': set(), # alt. 'scanner_tokens' 'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart' 'counterpart': set()} # alt. 'retrieve_counterpart'
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
def gen_scanner_skeleton(self): def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner" name = self.grammar_name + "Scanner"
return "def %s(text):\n return text\n" % name return "def %s(text):\n return text\n" % name
...@@ -241,10 +245,10 @@ class EBNFCompiler(CompilerBase): ...@@ -241,10 +245,10 @@ class EBNFCompiler(CompilerBase):
" assert re.match('\w+\Z', grammar_name)", ''] " assert re.match('\w+\Z', grammar_name)", '']
for name in self.definition_names: for name in self.definition_names:
if name == self.root: if name == self.root:
compiler += [' def ' + name + '(self, node):', compiler += [' def ' + name + '__(self, node):',
' return node', ''] ' return node', '']
else: else:
compiler += [' def ' + name + '(self, node):', compiler += [' def ' + name + '__(self, node):',
' pass', ''] ' pass', '']
return '\n'.join(compiler) return '\n'.join(compiler)
...@@ -273,13 +277,13 @@ class EBNFCompiler(CompilerBase): ...@@ -273,13 +277,13 @@ class EBNFCompiler(CompilerBase):
'Grammar(GrammarBase):', 'Grammar(GrammarBase):',
'r"""Parser for ' + article + self.grammar_name + 'r"""Parser for ' + article + self.grammar_name +
' source file' + ' source file' +
(', with this grammar:' if self.source_text else '.')] (', with this grammar:' if self.grammar_source else '.')]
definitions.append(('parser_initialization__', '"upon instatiation"')) definitions.append(('parser_initialization__', '"upon instatiation"'))
if self.source_text: if self.grammar_source:
definitions.append(('source_hash__', definitions.append(('source_hash__',
'"%s"' % md5(self.source_text, __version__))) '"%s"' % md5(self.grammar_source, __version__)))
declarations.append('') declarations.append('')
declarations += [line for line in self.source_text.split('\n')] declarations += [line for line in self.grammar_source.split('\n')]
while declarations[-1].strip() == '': while declarations[-1].strip() == '':
declarations = declarations[:-1] declarations = declarations[:-1]
declarations.append('"""') declarations.append('"""')
...@@ -310,7 +314,7 @@ class EBNFCompiler(CompilerBase): ...@@ -310,7 +314,7 @@ class EBNFCompiler(CompilerBase):
declarations.append('') declarations.append('')
return '\n '.join(declarations) return '\n '.join(declarations)
def syntax(self, node): def syntax__(self, node):
self._reset() self._reset()
definitions = [] definitions = []
...@@ -322,14 +326,14 @@ class EBNFCompiler(CompilerBase): ...@@ -322,14 +326,14 @@ class EBNFCompiler(CompilerBase):
# compile definitions and directives and collect definitions # compile definitions and directives and collect definitions
for nd in node.result: for nd in node.result:
if nd.parser.name == "definition": if nd.parser.name == "definition":
definitions.append(self.compile__(nd)) definitions.append(self._compile(nd))
else: else:
assert nd.parser.name == "directive", nd.as_sexpr() assert nd.parser.name == "directive", nd.as_sexpr()
self.compile__(nd) self._compile(nd)
return self.gen_parser(definitions) return self.gen_parser(definitions)
def definition(self, node): def definition__(self, node):
rule = node.result[0].result rule = node.result[0].result
if rule in self.rules: if rule in self.rules:
node.add_error('A rule with name "%s" has already been defined.' % rule) node.add_error('A rule with name "%s" has already been defined.' % rule)
...@@ -346,7 +350,7 @@ class EBNFCompiler(CompilerBase): ...@@ -346,7 +350,7 @@ class EBNFCompiler(CompilerBase):
% rule + '(This may change in the furute.)') % rule + '(This may change in the furute.)')
try: try:
self.rules.add(rule) self.rules.add(rule)
defn = self.compile__(node.result[1]) defn = self._compile(node.result[1])
if rule in self.variables: if rule in self.variables:
defn = 'Capture(%s)' % defn defn = 'Capture(%s)' % defn
self.variables.remove(rule) self.variables.remove(rule)
...@@ -370,7 +374,7 @@ class EBNFCompiler(CompilerBase): ...@@ -370,7 +374,7 @@ class EBNFCompiler(CompilerBase):
(repr(rx), str(re_error))) (repr(rx), str(re_error)))
return rx return rx
def directive(self, node): def directive__(self, node):
key = node.result[0].result.lower() key = node.result[0].result.lower()
assert key not in self.directives['tokens'] assert key not in self.directives['tokens']
if key in {'comment', 'whitespace'}: if key in {'comment', 'whitespace'}:
...@@ -378,7 +382,7 @@ class EBNFCompiler(CompilerBase): ...@@ -378,7 +382,7 @@ class EBNFCompiler(CompilerBase):
if len(node.result[1].result) != 1: if len(node.result[1].result) != 1:
node.add_error('Directive "%s" must have one, but not %i values.' % node.add_error('Directive "%s" must have one, but not %i values.' %
(key, len(node.result[1]))) (key, len(node.result[1])))
value = self.compile__(node.result[1]).pop() value = self._compile(node.result[1]).pop()
if key == 'whitespace' and value in EBNFCompiler.WHITESPACE: if key == 'whitespace' and value in EBNFCompiler.WHITESPACE:
value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex
else: else:
...@@ -398,7 +402,7 @@ class EBNFCompiler(CompilerBase): ...@@ -398,7 +402,7 @@ class EBNFCompiler(CompilerBase):
self.directives[key] = value self.directives[key] = value
elif key == 'literalws': elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])} value = {item.lower() for item in self._compile(node.result[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0 if (len(value - {'left', 'right', 'both', 'none'}) > 0
or ('none' in value and len(value) > 1)): or ('none' in value and len(value) > 1)):
node.add_error('Directive "literalws" allows the values ' node.add_error('Directive "literalws" allows the values '
...@@ -409,10 +413,10 @@ class EBNFCompiler(CompilerBase): ...@@ -409,10 +413,10 @@ class EBNFCompiler(CompilerBase):
self.directives[key] = list(ws) self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}: elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1]) self.directives['tokens'] |= self._compile(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}: elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['counterpart'] |= self.compile__(node.result[1]) self.directives['counterpart'] |= self._compile(node.result[1])
else: else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' % node.add_error('Unknown directive %s ! (Known ones are %s .)' %
...@@ -424,16 +428,16 @@ class EBNFCompiler(CompilerBase): ...@@ -424,16 +428,16 @@ class EBNFCompiler(CompilerBase):
"""Compiles any non-terminal, where `parser_class` indicates the Parser class """Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal. name for the particular non-terminal.
""" """
arguments = [self.compile__(r) for r in node.result] + custom_args arguments = [self._compile(r) for r in node.result] + custom_args
return parser_class + '(' + ', '.join(arguments) + ')' return parser_class + '(' + ', '.join(arguments) + ')'
def expression(self, node): def expression__(self, node):
return self.non_terminal(node, 'Alternative') return self.non_terminal(node, 'Alternative')
def term(self, node): def term__(self, node):
return self.non_terminal(node, 'Sequence') return self.non_terminal(node, 'Sequence')
def factor(self, node): def factor__(self, node):
assert isinstance(node.parser, Sequence), node.as_sexpr() # these assert statements can be removed assert isinstance(node.parser, Sequence), node.as_sexpr() # these assert statements can be removed
assert node.children assert node.children
assert len(node.result) >= 2, node.as_sexpr() assert len(node.result) >= 2, node.as_sexpr()
...@@ -467,23 +471,23 @@ class EBNFCompiler(CompilerBase): ...@@ -467,23 +471,23 @@ class EBNFCompiler(CompilerBase):
except KeyError: except KeyError:
node.add_error('Unknown prefix "%s".' % prefix) node.add_error('Unknown prefix "%s".' % prefix)
def option(self, node): def option__(self, node):
return self.non_terminal(node, 'Optional') return self.non_terminal(node, 'Optional')
def repetition(self, node): def repetition__(self, node):
return self.non_terminal(node, 'ZeroOrMore') return self.non_terminal(node, 'ZeroOrMore')
def oneormore(self, node): def oneormore__(self, node):
return self.non_terminal(node, 'OneOrMore') return self.non_terminal(node, 'OneOrMore')
def regexchain(self, node): def regexchain__(self, node):
raise EBNFCompilerError("Not yet implemented!") raise EBNFCompilerError("Not yet implemented!")
def group(self, node): def group__(self, node):
raise EBNFCompilerError("Group nodes should have been eliminated by " raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!") "AST transformation!")
def symbol(self, node): def symbol__(self, node):
if node.result in self.directives['tokens']: if node.result in self.directives['tokens']:
return 'ScannerToken("' + node.result + '")' return 'ScannerToken("' + node.result + '")'
else: else:
...@@ -492,10 +496,10 @@ class EBNFCompiler(CompilerBase): ...@@ -492,10 +496,10 @@ class EBNFCompiler(CompilerBase):
self.recursive.add(node.result) self.recursive.add(node.result)
return node.result return node.result
def literal(self, node): def literal__(self, node):
return 'Token(' + node.result.replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ? return 'Token(' + node.result.replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
def regexp(self, node): def regexp__(self, node):
rx = node.result rx = node.result
name = [] name = []
if rx[:2] == '~/': if rx[:2] == '~/':
...@@ -519,7 +523,7 @@ class EBNFCompiler(CompilerBase): ...@@ -519,7 +523,7 @@ class EBNFCompiler(CompilerBase):
return '"' + errmsg + '"' return '"' + errmsg + '"'
return 'RE(' + ', '.join([arg] + name) + ')' return 'RE(' + ', '.join([arg] + name) + ')'
def list_(self, node): def list___(self, node):
assert node.children assert node.children
return set(item.result.strip() for item in node.result) return set(item.result.strip() for item in node.result)
......
...@@ -954,21 +954,35 @@ class CompilerBase: ...@@ -954,21 +954,35 @@ class CompilerBase:
def _reset(self): def _reset(self):
pass pass
def compile__(self, node): def compile_AST(self, node):
# if self.dirty_flag: """Compiles the abstract syntax tree with the root ``node``.
# self._reset() """
# else: if self.dirty_flag:
# self.dirty_flag = True self._reset()
else:
comp, cls = node.parser.name, node.parser.__class__.__name__ self.dirty_flag = True
elem = comp or cls return self._compile(node)
def _compile(self, node):
"""Calls the compilation method for the given node and returns
the result of the compilation.
The method's name is dreived from either the node's parser
name or, if the parser is anonymous, the node's parser's class
name by appending two underscores '__'.
Note that ``_compile`` does not call any compilation functions
for the parsers of the sub nodes by itself. Rather, this should
be done within the compilation methods.
"""
elem = node.parser.name or node.parser.__class__.__name__
if not sane_parser_name(elem): if not sane_parser_name(elem):
node.add_error("Must not use reserved name '%s' as parser " node.add_error("Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with " "name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)") "'_' or '__' or ending with '__' is reserved.)")
return None return None
else: else:
compiler = self.__getattribute__(elem) # TODO Add support for python keyword attributes compiler = self.__getattribute__(elem + '__')
result = compiler(node) result = compiler(node)
for child in node.children: for child in node.children:
node.error_flag |= child.error_flag node.error_flag |= child.error_flag
...@@ -1027,7 +1041,7 @@ def full_compilation(source, scanner, parser, transform, compiler): ...@@ -1027,7 +1041,7 @@ def full_compilation(source, scanner, parser, transform, compiler):
syntax_tree.log(log_file_name, ext='.ast') syntax_tree.log(log_file_name, ext='.ast')
errors = syntax_tree.collect_errors() errors = syntax_tree.collect_errors()
if not errors: if not errors:
result = compiler.compile__(syntax_tree) result = compiler.compile_AST(syntax_tree)
errors = syntax_tree.collect_errors() errors = syntax_tree.collect_errors()
messages = error_messages(source_text, errors) messages = error_messages(source_text, errors)
return result, messages, syntax_tree return result, messages, syntax_tree
......
...@@ -45,6 +45,7 @@ def selftest(file_name): ...@@ -45,6 +45,7 @@ def selftest(file_name):
else: else:
# compile the grammar again using the result of the previous # compile the grammar again using the result of the previous
# compilation as parser # compilation as parser
print(type(result))
result = compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler) result = compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
print(result) print(result)
return result return result
......
...@@ -62,7 +62,7 @@ CONTINUATION = "CONTINUATION" ...@@ -62,7 +62,7 @@ CONTINUATION = "CONTINUATION"
def continuation(regexp, line, unless): def continuation(regexp, line, unless):
m = regexp.match(line) m = regexp.match(line)
if m: if m:
content = m.group() content = m.group__()
if content: if content:
return not unless, make_token(CONTINUATION, content), line[m.end():] return not unless, make_token(CONTINUATION, content), line[m.end():]
return not unless, '', line return not unless, '', line
...@@ -89,7 +89,7 @@ def paragraph_cont(line, blockargs): ...@@ -89,7 +89,7 @@ def paragraph_cont(line, blockargs):
def newblock_if(regexp, blocktype, line): def newblock_if(regexp, blocktype, line):
m = regexp.match(line) m = regexp.match(line)
if m: if m:
return make_token(BEGIN_PREFIX + blocktype, m.group()), line[m.end():], m.end() return make_token(BEGIN_PREFIX + blocktype, m.group__()), line[m.end():], m.end()
return '', line, 0 return '', line, 0
......
...@@ -64,7 +64,7 @@ IGNORE = "IGNORE" ...@@ -64,7 +64,7 @@ IGNORE = "IGNORE"
def continuation(regexp, line, unless): def continuation(regexp, line, unless):
m = regexp.match(line) m = regexp.match(line)
if m: if m:
content = m.group() content = m.group__()
if content: if content:
return not unless, make_special(IGNORE, content), line[m.end():] return not unless, make_special(IGNORE, content), line[m.end():]
return not unless, '', line return not unless, '', line
...@@ -91,7 +91,7 @@ def paragraph_cont(line, blockargs): ...@@ -91,7 +91,7 @@ def paragraph_cont(line, blockargs):
def newblock_if(regexp, blocktype, line): def newblock_if(regexp, blocktype, line):
m = regexp.match(line) m = regexp.match(line)
if m: if m:
return make_special(BEGIN_PREFIX + blocktype, m.group()), line[m.end():], m.end() return make_special(BEGIN_PREFIX + blocktype, m.group__()), line[m.end():], m.end()
return '', line, 0 return '', line, 0
......
...@@ -24,9 +24,9 @@ from functools import partial ...@@ -24,9 +24,9 @@ from functools import partial
import os import os
import sys import sys
sys.path.append(os.path.abspath('../../')) sys.path.append(os.path.abspath('../../'))
from DHParser.parsers import full_compilation, Retrieve, WHITESPACE_KEYWORD from DHParser.parsers import full_compilation, Retrieve, WHITESPACE_KEYWORD, nil_scanner
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.dsl import compileEBNF from DHParser.dsl import compileEBNF, compileDSL
WRITE_LOGS = True WRITE_LOGS = True
...@@ -213,6 +213,64 @@ class TestCompilerErrors: ...@@ -213,6 +213,64 @@ class TestCompilerErrors:
assert messages assert messages
class TestSelfHosting:
def test_self(self):
grammar = r"""
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] regexchain
| [flowmarker] oneormore
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
regexchain = ">" expression §"<" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
compiler_name = "EBNF"
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
result, errors, syntax_tree = full_compilation(grammar, None, parser,
EBNFTransform, compiler)
assert not errors, str(errors)
# compile the grammar again using the result of the previous
# compilation as parser
compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
if __name__ == "__main__": if __name__ == "__main__":
from run import runner from run import runner
runner("TestPopRetrieve", globals()) runner("", globals())
...@@ -42,7 +42,6 @@ class TestInfiLoopsAndRecursion: ...@@ -42,7 +42,6 @@ class TestInfiLoopsAndRecursion:
# example: "5 + 3 * 4" # example: "5 + 3 * 4"
""" """
snippet = "5 + 3 * 4" snippet = "5 + 3 * 4"
print(compileEBNF(minilang, source_only=True))
parser = compileEBNF(minilang)() parser = compileEBNF(minilang)()
assert parser assert parser
syntax_tree = parser.parse(snippet) syntax_tree = parser.parse(snippet)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment