In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 761284f8 authored by Eckhart Arnold's avatar Eckhart Arnold

major refactorings

parent 980fd4a2
......@@ -27,7 +27,8 @@ try:
except ImportError:
import re
from .ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler, grammar_changed
from .ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler, grammar_changed, \
get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from .toolkit import logging, load_if_file, is_python_code, compile_python_object
from .parsers import GrammarBase, CompilerBase, compile_source, nil_scanner
from .syntaxtree import Node
......@@ -104,19 +105,55 @@ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
'''
DHPARSER_SCANNER = '''
def get_{NAME}_scanner():
return {NAME}Scanner
'''
DHPARSER_GRAMMAR = '''
def get_{NAME}_grammar():
global thread_local_{NAME}_grammar_singleton
try:
grammar = thread_local_{NAME}_grammar_singleton
return grammar
except NameError:
thread_local_{NAME}_grammar_singleton = {NAME}Grammar()
return thread_local_{NAME}_grammar_singleton
'''
DHPARSER_TRANSFORMER = '''
def get_{NAME}_transformer():
return {NAME}Transform
'''
DHPARSER_COMPILER = '''
def get_{NAME}_compiler(grammar_name="{NAME}", grammar_source=""):
global thread_local_{NAME}_compiler_singleton
try:
compiler = thread_local_{NAME}_compiler_singleton
compiler.set_grammar_name(grammar_name, grammar_source)
return compiler
except NameError:
thread_local_{NAME}_compiler_singleton = {NAME}Compiler(grammar_name, grammar_source)
return thread_local_{NAME}_compiler_singleton
'''
DHPARSER_MAIN = '''
def compile_{NAME}(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with logging("LOGS"):
grammar = {NAME}Grammar()
compiler = {NAME}Compiler()
compiler = get_{NAME}_compiler()
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, {NAME}Scanner, grammar.parse,
{NAME}Transform, compiler.compile_ast)
grammar.log_parsing_history(log_file_name)
result = compile_source(source, get_{NAME}_scanner(),
get_{NAME}_grammar(),
get_{NAME}_transformer(), compiler)
return result
......@@ -148,7 +185,7 @@ def grammar_instance(grammar_representation):
else:
with logging(False):
parser_py, errors, AST = compile_source(grammar_src, None,
EBNFGrammar(), EBNFTransform, EBNFCompiler())
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
......@@ -215,6 +252,9 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
def load_compiler_suite(compiler_suite):
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
Returns:
4-tuple (scanner function, parser class, ast transformer function, compiler class)
"""
global RX_SECTION_MARKER
assert isinstance(compiler_suite, str)
......@@ -231,8 +271,9 @@ def load_compiler_suite(compiler_suite):
compiler = compile_python_object(imports + compiler_py, '\w*Compiler$')
else:
# assume source is an ebnf grammar
parser_py, errors, AST = compile_source(source, None, EBNFGrammar(),
EBNFTransform, EBNFCompiler())
with logging(False):
parser_py, errors, AST = compile_source(source, None,
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), source)
scanner = nil_scanner
......@@ -243,7 +284,7 @@ def load_compiler_suite(compiler_suite):
return scanner, parser, ast, compiler
def suite_outdated(compiler_suite, grammar_source):
def is_outdated(compiler_suite, grammar_source):
"""Returns ``True`` if the ``compile_suite`` needs to be updated.
An update is needed, if either the grammar in the compieler suite
......@@ -333,9 +374,9 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
compiler1 = cclass()
else:
scanner = nil_scanner
parser = EBNFGrammar()
trans = EBNFTransform
compiler1 = EBNFCompiler(compiler_name, source_file)
parser = get_ebnf_grammar()
trans = get_ebnf_transformer()
compiler1 = get_ebnf_compiler(compiler_name, source_file)
result, errors, ast = compile_source(source_file, scanner, parser, trans, compiler1)
if errors:
return errors
......@@ -362,15 +403,18 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if RX_WHITESPACE.fullmatch(intro):
intro = '#!/usr/bin/python'
if RX_WHITESPACE.fullmatch(outro):
outro = DHPARSER_COMPILER.format(NAME=compiler_name)
outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(scanner):
scanner = compiler1.gen_scanner_skeleton()
scanner = compiler1.gen_scanner_skeleton() + \
DHPARSER_SCANNER.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(ast):
ast = compiler1.gen_AST_skeleton()
ast = compiler1.gen_AST_skeleton() + \
DHPARSER_TRANSFORMER.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(compiler):
compiler = compiler1.gen_compiler_skeleton()
compiler = compiler1.gen_compiler_skeleton() + \
DHPARSER_COMPILER.format(NAME=compiler_name)
try:
f = open(rootname + '_compiler.py', 'w', encoding="utf-8")
......@@ -380,7 +424,7 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
f.write(SECTION_MARKER.format(marker=SCANNER_SECTION))
f.write(scanner)
f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
f.write(result)
f.write(result); f.write(DHPARSER_GRAMMAR.format(NAME=compiler_name))
f.write(SECTION_MARKER.format(marker=AST_SECTION))
f.write(ast)
f.write(SECTION_MARKER.format(marker=COMPILER_SECTION))
......@@ -409,4 +453,3 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if f: f.close()
return []
......@@ -25,7 +25,7 @@ except ImportError:
import re
from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
from .parsers import GrammarBase, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
......@@ -41,6 +41,26 @@ __all__ = ['EBNFGrammar',
'grammar_changed']
########################################################################
#
# EBNF scanning
#
########################################################################
def get_ebnf_scanner():
return nil_scanner
########################################################################
#
# EBNF parsing
#
########################################################################
class EBNFGrammar(GrammarBase):
r"""Parser for an EBNF source file, with this grammar:
......@@ -116,6 +136,54 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
def grammar_changed(grammar_class, grammar_source):
"""Returns ``True`` if ``grammar_class`` does not reflect the latest
changes of ``grammar_source``
Parameters:
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
grammar_source: File name or string representation of the
EBNF code of the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
def get_ebnf_grammar():
global thread_local_ebnf_grammar_singleton
try:
grammar = thread_local_ebnf_grammar_singleton
return grammar
except NameError:
thread_local_ebnf_grammar_singleton = EBNFGrammar()
return thread_local_ebnf_grammar_singleton
########################################################################
#
# EBNF concrete to abstract syntax tree transformation and validation
#
########################################################################
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_transformation_table = {
......@@ -159,6 +227,17 @@ def EBNFTransform(syntax_tree):
traverse(syntax_tree, processing_table)
def get_ebnf_transformer():
return EBNFTransform
########################################################################
#
# EBNF abstract syntax tree to Python parser compilation
#
########################################################################
class EBNFCompilerError(Exception):
"""Error raised by `EBNFCompiler` class. (Not compilation errors
in the strict sense, see `CompilationError` below)"""
......@@ -182,8 +261,7 @@ class EBNFCompiler(CompilerBase):
'vertical': r'\s*'}
def __init__(self, grammar_name="", grammar_source=""):
super(EBNFCompiler, self).__init__()
self.set_grammar_name(grammar_name, grammar_source)
super(EBNFCompiler, self).__init__(grammar_name, grammar_source)
self._reset()
def _reset(self):
......@@ -199,13 +277,6 @@ class EBNFCompiler(CompilerBase):
'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart'
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
return "def %s(text):\n return text\n" % name
......@@ -234,9 +305,9 @@ class EBNFCompiler(CompilerBase):
self.grammar_name + ' source file.',
' """', '',
' def __init__(self, grammar_name="' +
self.grammar_name + '"):',
self.grammar_name + '", grammar_source=""):',
' super(' + self.grammar_name +
'Compiler, self).__init__()',
'Compiler, self).__init__(grammar_name, grammar_source)',
" assert re.match('\w+\Z', grammar_name)", '']
for name in self.definition_names:
method_name = CompilerBase.derive_method_name(name)
......@@ -523,32 +594,12 @@ class EBNFCompiler(CompilerBase):
return set(item.result.strip() for item in node.result)
def grammar_changed(grammar_class, grammar_source):
"""Returns ``True`` if ``grammar_class`` does not reflect the latest
changes of ``grammar_source``
Parameters:
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
grammar_source: File name or string representation of the
EBNF code of the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
def get_ebnf_compiler(grammar_name="", grammar_source=""):
global thread_local_ebnf_compiler_singleton
try:
compiler = thread_local_ebnf_compiler_singleton
compiler.set_grammar_name(grammar_name, grammar_source)
return compiler
except NameError:
thread_local_ebnf_compiler_singleton = EBNFCompiler(grammar_name, grammar_source)
return thread_local_ebnf_compiler_singleton
......@@ -337,7 +337,7 @@ class GrammarBase:
Returns:
Node: The root node ot the parse tree.
"""
assert isinstance(document, str)
assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self.dirty_flag:
......@@ -391,21 +391,20 @@ class GrammarBase:
elif os.path.exists(path):
os.remove(path)
if is_logging():
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], []
for record in self.history:
line = "; ".join(prepare_line(record))
full_history.append(line)
if record.node and record.node.parser.name != WHITESPACE_KEYWORD:
match_history.append(line)
if record.node.errors:
errors_only.append(line)
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], []
for record in self.history:
line = "; ".join(prepare_line(record))
full_history.append(line)
if record.node and record.node.parser.name != WHITESPACE_KEYWORD:
match_history.append(line)
if record.node.errors:
errors_only.append(line)
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
def dsl_error_msg(parser, error_str):
......@@ -959,8 +958,9 @@ class Forward(Parser):
class CompilerBase:
def __init__(self):
def __init__(self, grammar_name="", grammar_source=""):
self.dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
pass
......@@ -978,6 +978,13 @@ class CompilerBase:
self.dirty_flag = True
return self._compile(node)
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
@staticmethod
def derive_method_name(node_name):
"""Returns the method name for ``node_name``, e.g.
......@@ -1012,7 +1019,7 @@ class CompilerBase:
return result
def compile_source(source, scan, parse, transform, compile_ast):
def compile_source(source, scanner, parser, transformer, compiler):
"""Compiles a source in four stages:
1. Scanning (if needed)
2. Parsing
......@@ -1024,13 +1031,13 @@ def compile_source(source, scan, parse, transform, compile_ast):
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
scan (function): text -> text. A scanner function or None,
scanner (function): text -> text. A scanner function or None,
if no scanner is needed.
parse (function): A parsing function or grammar class
transform (function): A transformation function that takes
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
transforms it (in place) into an abstract syntax tree.
compile_ast (function): A compiler function or compiler class
compiler (function): A compiler function or compiler class
instance
Returns (tuple):
......@@ -1042,16 +1049,16 @@ def compile_source(source, scan, parse, transform, compile_ast):
3. The root-node of the abstract syntax treelow
"""
source_text = load_if_file(source)
log_file_name = logfile_basename(source, compile_ast)
if scan is not None:
source_text = scan(source_text)
syntax_tree = parse(source_text)
log_file_name = logfile_basename(source, compiler)
if scanner is not None:
source_text = scanner(source_text)
syntax_tree = parser(source_text)
if is_logging():
syntax_tree.log(log_file_name, ext='.cst')
syntax_tree.log(log_file_name + '.cst')
try:
parse.log_parsing_history(log_file_name)
parser.log_parsing_history(log_file_name)
except AttributeError:
# this is a hack in case a parse function or method was
# this is a hack in case a parser function or method was
# passed instead of a grammar class instance
for nd in syntax_tree.find(lambda nd: bool(nd.parser)):
nd.parser.grammar.log_parsing_history(log_file_name)
......@@ -1064,11 +1071,11 @@ def compile_source(source, scan, parse, transform, compile_ast):
result = None
errors = syntax_tree.collect_errors()
else:
transform(syntax_tree)
if is_logging(): syntax_tree.log(log_file_name, ext='.ast')
transformer(syntax_tree)
if is_logging(): syntax_tree.log(log_file_name + '.ast')
errors = syntax_tree.collect_errors()
if not errors:
result = compile_ast(syntax_tree)
result = compiler(syntax_tree)
errors = syntax_tree.collect_errors()
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
......
......@@ -338,11 +338,10 @@ class Node:
errors.extend(child.collect_errors(clear_errors))
return errors
def log(self, log_file_name, ext):
if is_logging():
st_file_name = log_file_name + ext
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr())
def log(self, log_file_name):
st_file_name = log_file_name
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr())
def find(self, match_function):
"""Finds nodes in the tree that match a specific criterion.
......
......@@ -103,6 +103,7 @@ def logging(dirname="LOGS"):
turn logging of
"""
global LOGGING
if dirname == True: dirname = "LOGS" # be fail tolerant here...
try:
save = LOGGING
except NameError:
......
......@@ -24,8 +24,9 @@ import os
import sys
from functools import partial
from DHParser.toolkit import logging
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.parsers import compile_source, nil_scanner
......@@ -34,10 +35,11 @@ def selftest(file_name):
with open('examples/' + file_name, encoding="utf-8") as f:
grammar = f.read()
compiler_name = os.path.basename(os.path.splitext(file_name)[0])
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
parser = get_ebnf_grammar()
transformer = get_ebnf_transformer()
compiler = get_ebnf_compiler(compiler_name, grammar)
result, errors, syntax_tree = compile_source(grammar, None, parser,
EBNFTransform, compiler)
transformer, compiler)
print(result)
if errors:
print('\n\n'.join(errors))
......@@ -46,7 +48,7 @@ def selftest(file_name):
# compile the grammar again using the result of the previous
# compilation as parser
print(type(result))
result = compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
result = compileDSL(grammar, nil_scanner, result, transformer, compiler)
print(result)
return result
......@@ -80,4 +82,5 @@ if __name__ == "__main__":
else:
# run self test
# selftest('EBNF/EBNF.ebnf')
profile(partial(selftest, file_name='EBNF/EBNF.ebnf'))
with logging(False):
profile(partial(selftest, file_name='EBNF/EBNF.ebnf'))
......@@ -23,10 +23,10 @@ limitations under the License.
import os
import sys
sys.path.append(os.path.abspath('../../'))
from DHParser.dsl import compile_on_disk, suite_outdated
from DHParser.dsl import compile_on_disk, is_outdated
if (not os.path.exists('PopRetrieve_compiler.py') or
suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
is_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
print("recompiling PopRetrieve parser")
errors = compile_on_disk("PopRetrieve.ebnf")
if errors:
......@@ -68,7 +68,7 @@ if errors:
if (not os.path.exists('PopRetrieveComplement_compiler.py') or
suite_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
is_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = compile_on_disk("PopRetrieveComplement.ebnf")
if errors:
......
......@@ -20,6 +20,9 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../', './'])
from DHParser.toolkit import *
from DHParser.syntaxtree import *
from DHParser.parsers import *
......
......@@ -22,6 +22,7 @@ limitations under the License.
import os
import sys
sys.path.extend(['../', './'])
from DHParser.dsl import compile_on_disk, run_compiler
......@@ -48,6 +49,7 @@ class TestCompilerGeneration:
for name in (self.grammar_name, self.compiler_name, self.text_name, self.result_name):
if os.path.exists(name):
os.remove(name)
pass
def test_compiling_functions(self):
# test if cutting and reassembling of compiler suite works:
......
This diff is collapsed.
......@@ -19,17 +19,14 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
sys.path.extend(['../', './'])
sys.path.append(os.path.abspath('../../'))
from DHParser.toolkit import compile_python_object
from DHParser.toolkit import is_logging, compile_python_object
from DHParser.parsers import compile_source
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import compileEBNF, DHPARSER_IMPORTS
WRITE_LOGS = True
class TestInfiLoopsAndRecursion:
def test_direct_left_recursion(self):
......@@ -47,8 +44,8 @@ class TestInfiLoopsAndRecursion:
syntax_tree = parser(snippet)
assert not syntax_tree.collect_errors()
assert snippet == str(syntax_tree)
if WRITE_LOGS:
syntax_tree.log("test_LeftRecursion_direct", '.cst')
if is_logging():
syntax_tree.log("test_LeftRecursion_direct.cst")
# self.minilang_parser1.log_parsing_history("test_LeftRecursion_direct")
def test_indirect_left_recursion(self):
......@@ -70,8 +67,8 @@ class TestRegex:
[+] # followed by a plus sign
\w* # possibly followed by more alpha chracters/
"""
result, messages, syntax_tree = compile_source(mlregex, None,
EBNFGrammar(), EBNFTransform, EBNFCompiler('MultilineRegexTest'))
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
......@@ -92,8 +89,8 @@ class TestRegex:
test
\end{document}
"""
result, messages, syntax_tree = compile_source(tokenlang, None, EBNFGrammar(),
EBNFTransform, EBNFCompiler("TokenTest"))
result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
......@@ -104,4 +101,4 @@ class TestRegex:
if __name__ == "__main__":
from run import runner
runner("TestInfiLoopsAndRecursion", globals())