Commit ae99c15b authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- added (primitive) name mangeling to EBNFCompiler to avoid name conflicts...

- added (primitive) name mangeling to EBNFCompiler to avoid name conflicts with names used in the grammar definition
parent 88ad74ab
......@@ -163,7 +163,6 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler)
CompilationError if any errors occured during compilation
"""
assert isinstance(text_or_file, str)
assert isinstance(dsl_grammar, GrammarBase)
assert isinstance(compiler, CompilerBase)
parser_root, grammar_src = get_grammar_instance(dsl_grammar)
src = load_if_file(text_or_file)
......
......@@ -18,10 +18,9 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
# import collections
import keyword
from functools import partial
import keyword
import os
try:
import regex as re
except ImportError:
......@@ -187,11 +186,9 @@ class EBNFCompiler(CompilerBase):
'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*',
'vertical': r'\s*'}
def __init__(self, grammar_name="", source_text=""):
def __init__(self, grammar_name="", grammar_source=""):
super(EBNFCompiler, self).__init__()
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
self.grammar_name = grammar_name
self.source_text = load_if_file(source_text)
self.set_grammar_name(grammar_name, grammar_source)
self._reset()
def _reset(self):
......@@ -207,6 +204,13 @@ class EBNFCompiler(CompilerBase):
'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart'
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
return "def %s(text):\n return text\n" % name
......@@ -241,10 +245,10 @@ class EBNFCompiler(CompilerBase):
" assert re.match('\w+\Z', grammar_name)", '']
for name in self.definition_names:
if name == self.root:
compiler += [' def ' + name + '(self, node):',
compiler += [' def ' + name + '__(self, node):',
' return node', '']
else:
compiler += [' def ' + name + '(self, node):',
compiler += [' def ' + name + '__(self, node):',
' pass', '']
return '\n'.join(compiler)
......@@ -273,13 +277,13 @@ class EBNFCompiler(CompilerBase):
'Grammar(GrammarBase):',
'r"""Parser for ' + article + self.grammar_name +
' source file' +
(', with this grammar:' if self.source_text else '.')]
(', with this grammar:' if self.grammar_source else '.')]
definitions.append(('parser_initialization__', '"upon instatiation"'))
if self.source_text:
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.source_text, __version__)))
'"%s"' % md5(self.grammar_source, __version__)))
declarations.append('')
declarations += [line for line in self.source_text.split('\n')]
declarations += [line for line in self.grammar_source.split('\n')]
while declarations[-1].strip() == '':
declarations = declarations[:-1]
declarations.append('"""')
......@@ -310,7 +314,7 @@ class EBNFCompiler(CompilerBase):
declarations.append('')
return '\n '.join(declarations)
def syntax(self, node):
def syntax__(self, node):
self._reset()
definitions = []
......@@ -322,14 +326,14 @@ class EBNFCompiler(CompilerBase):
# compile definitions and directives and collect definitions
for nd in node.result:
if nd.parser.name == "definition":
definitions.append(self.compile__(nd))
definitions.append(self._compile(nd))
else:
assert nd.parser.name == "directive", nd.as_sexpr()
self.compile__(nd)
self._compile(nd)
return self.gen_parser(definitions)
def definition(self, node):
def definition__(self, node):
rule = node.result[0].result
if rule in self.rules:
node.add_error('A rule with name "%s" has already been defined.' % rule)
......@@ -346,7 +350,7 @@ class EBNFCompiler(CompilerBase):
% rule + '(This may change in the furute.)')
try:
self.rules.add(rule)
defn = self.compile__(node.result[1])
defn = self._compile(node.result[1])
if rule in self.variables:
defn = 'Capture(%s)' % defn
self.variables.remove(rule)
......@@ -370,7 +374,7 @@ class EBNFCompiler(CompilerBase):
(repr(rx), str(re_error)))
return rx
def directive(self, node):
def directive__(self, node):
key = node.result[0].result.lower()
assert key not in self.directives['tokens']
if key in {'comment', 'whitespace'}:
......@@ -378,7 +382,7 @@ class EBNFCompiler(CompilerBase):
if len(node.result[1].result) != 1:
node.add_error('Directive "%s" must have one, but not %i values.' %
(key, len(node.result[1])))
value = self.compile__(node.result[1]).pop()
value = self._compile(node.result[1]).pop()
if key == 'whitespace' and value in EBNFCompiler.WHITESPACE:
value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex
else:
......@@ -398,7 +402,7 @@ class EBNFCompiler(CompilerBase):
self.directives[key] = value
elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])}
value = {item.lower() for item in self._compile(node.result[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0
or ('none' in value and len(value) > 1)):
node.add_error('Directive "literalws" allows the values '
......@@ -409,10 +413,10 @@ class EBNFCompiler(CompilerBase):
self.directives[key] = list(ws)
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1])
self.directives['tokens'] |= self._compile(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['counterpart'] |= self.compile__(node.result[1])
self.directives['counterpart'] |= self._compile(node.result[1])
else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' %
......@@ -424,16 +428,16 @@ class EBNFCompiler(CompilerBase):
"""Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
arguments = [self.compile__(r) for r in node.result] + custom_args
arguments = [self._compile(r) for r in node.result] + custom_args
return parser_class + '(' + ', '.join(arguments) + ')'
def expression(self, node):
def expression__(self, node):
return self.non_terminal(node, 'Alternative')
def term(self, node):
def term__(self, node):
return self.non_terminal(node, 'Sequence')
def factor(self, node):
def factor__(self, node):
assert isinstance(node.parser, Sequence), node.as_sexpr() # these assert statements can be removed
assert node.children
assert len(node.result) >= 2, node.as_sexpr()
......@@ -467,23 +471,23 @@ class EBNFCompiler(CompilerBase):
except KeyError:
node.add_error('Unknown prefix "%s".' % prefix)
def option(self, node):
def option__(self, node):
return self.non_terminal(node, 'Optional')
def repetition(self, node):
def repetition__(self, node):
return self.non_terminal(node, 'ZeroOrMore')
def oneormore(self, node):
def oneormore__(self, node):
return self.non_terminal(node, 'OneOrMore')
def regexchain(self, node):
def regexchain__(self, node):
raise EBNFCompilerError("Not yet implemented!")
def group(self, node):
def group__(self, node):
raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!")
def symbol(self, node):
def symbol__(self, node):
if node.result in self.directives['tokens']:
return 'ScannerToken("' + node.result + '")'
else:
......@@ -492,10 +496,10 @@ class EBNFCompiler(CompilerBase):
self.recursive.add(node.result)
return node.result
def literal(self, node):
def literal__(self, node):
return 'Token(' + node.result.replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
def regexp(self, node):
def regexp__(self, node):
rx = node.result
name = []
if rx[:2] == '~/':
......@@ -519,7 +523,7 @@ class EBNFCompiler(CompilerBase):
return '"' + errmsg + '"'
return 'RE(' + ', '.join([arg] + name) + ')'
def list_(self, node):
def list___(self, node):
assert node.children
return set(item.result.strip() for item in node.result)
......
......@@ -954,21 +954,35 @@ class CompilerBase:
def _reset(self):
pass
def compile__(self, node):
# if self.dirty_flag:
# self._reset()
# else:
# self.dirty_flag = True
comp, cls = node.parser.name, node.parser.__class__.__name__
elem = comp or cls
def compile_AST(self, node):
"""Compiles the abstract syntax tree with the root ``node``.
"""
if self.dirty_flag:
self._reset()
else:
self.dirty_flag = True
return self._compile(node)
def _compile(self, node):
"""Calls the compilation method for the given node and returns
the result of the compilation.
The method's name is dreived from either the node's parser
name or, if the parser is anonymous, the node's parser's class
name by appending two underscores '__'.
Note that ``_compile`` does not call any compilation functions
for the parsers of the sub nodes by itself. Rather, this should
be done within the compilation methods.
"""
elem = node.parser.name or node.parser.__class__.__name__
if not sane_parser_name(elem):
node.add_error("Must not use reserved name '%s' as parser "
node.add_error("Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
compiler = self.__getattribute__(elem) # TODO Add support for python keyword attributes
compiler = self.__getattribute__(elem + '__')
result = compiler(node)
for child in node.children:
node.error_flag |= child.error_flag
......@@ -1027,7 +1041,7 @@ def full_compilation(source, scanner, parser, transform, compiler):
syntax_tree.log(log_file_name, ext='.ast')
errors = syntax_tree.collect_errors()
if not errors:
result = compiler.compile__(syntax_tree)
result = compiler.compile_AST(syntax_tree)
errors = syntax_tree.collect_errors()
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
......
......@@ -45,6 +45,7 @@ def selftest(file_name):
else:
# compile the grammar again using the result of the previous
# compilation as parser
print(type(result))
result = compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
print(result)
return result
......
......@@ -62,7 +62,7 @@ CONTINUATION = "CONTINUATION"
def continuation(regexp, line, unless):
m = regexp.match(line)
if m:
content = m.group()
content = m.group__()
if content:
return not unless, make_token(CONTINUATION, content), line[m.end():]
return not unless, '', line
......@@ -89,7 +89,7 @@ def paragraph_cont(line, blockargs):
def newblock_if(regexp, blocktype, line):
m = regexp.match(line)
if m:
return make_token(BEGIN_PREFIX + blocktype, m.group()), line[m.end():], m.end()
return make_token(BEGIN_PREFIX + blocktype, m.group__()), line[m.end():], m.end()
return '', line, 0
......
......@@ -64,7 +64,7 @@ IGNORE = "IGNORE"
def continuation(regexp, line, unless):
m = regexp.match(line)
if m:
content = m.group()
content = m.group__()
if content:
return not unless, make_special(IGNORE, content), line[m.end():]
return not unless, '', line
......@@ -91,7 +91,7 @@ def paragraph_cont(line, blockargs):
def newblock_if(regexp, blocktype, line):
m = regexp.match(line)
if m:
return make_special(BEGIN_PREFIX + blocktype, m.group()), line[m.end():], m.end()
return make_special(BEGIN_PREFIX + blocktype, m.group__()), line[m.end():], m.end()
return '', line, 0
......
......@@ -24,9 +24,9 @@ from functools import partial
import os
import sys
sys.path.append(os.path.abspath('../../'))
from DHParser.parsers import full_compilation, Retrieve, WHITESPACE_KEYWORD
from DHParser.parsers import full_compilation, Retrieve, WHITESPACE_KEYWORD, nil_scanner
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.dsl import compileEBNF
from DHParser.dsl import compileEBNF, compileDSL
WRITE_LOGS = True
......@@ -213,6 +213,64 @@ class TestCompilerErrors:
assert messages
class TestSelfHosting:
def test_self(self):
grammar = r"""
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] regexchain
| [flowmarker] oneormore
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
regexchain = ">" expression §"<" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
compiler_name = "EBNF"
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
result, errors, syntax_tree = full_compilation(grammar, None, parser,
EBNFTransform, compiler)
assert not errors, str(errors)
# compile the grammar again using the result of the previous
# compilation as parser
compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
if __name__ == "__main__":
from run import runner
runner("TestPopRetrieve", globals())
runner("", globals())
......@@ -42,7 +42,6 @@ class TestInfiLoopsAndRecursion:
# example: "5 + 3 * 4"
"""
snippet = "5 + 3 * 4"
print(compileEBNF(minilang, source_only=True))
parser = compileEBNF(minilang)()
assert parser
syntax_tree = parser.parse(snippet)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment