The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 761284f8 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

major refactorings

parent 980fd4a2
......@@ -27,7 +27,8 @@ try:
except ImportError:
import re
from .ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler, grammar_changed
from .ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler, grammar_changed, \
get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from .toolkit import logging, load_if_file, is_python_code, compile_python_object
from .parsers import GrammarBase, CompilerBase, compile_source, nil_scanner
from .syntaxtree import Node
......@@ -104,19 +105,55 @@ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
'''
DHPARSER_SCANNER = '''
def get_{NAME}_scanner():
return {NAME}Scanner
'''
DHPARSER_GRAMMAR = '''
def get_{NAME}_grammar():
global thread_local_{NAME}_grammar_singleton
try:
grammar = thread_local_{NAME}_grammar_singleton
return grammar
except NameError:
thread_local_{NAME}_grammar_singleton = {NAME}Grammar()
return thread_local_{NAME}_grammar_singleton
'''
DHPARSER_TRANSFORMER = '''
def get_{NAME}_transformer():
return {NAME}Transform
'''
DHPARSER_COMPILER = '''
def get_{NAME}_compiler(grammar_name="{NAME}", grammar_source=""):
global thread_local_{NAME}_compiler_singleton
try:
compiler = thread_local_{NAME}_compiler_singleton
compiler.set_grammar_name(grammar_name, grammar_source)
return compiler
except NameError:
thread_local_{NAME}_compiler_singleton = {NAME}Compiler(grammar_name, grammar_source)
return thread_local_{NAME}_compiler_singleton
'''
DHPARSER_MAIN = '''
def compile_{NAME}(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with logging("LOGS"):
grammar = {NAME}Grammar()
compiler = {NAME}Compiler()
compiler = get_{NAME}_compiler()
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, {NAME}Scanner, grammar.parse,
{NAME}Transform, compiler.compile_ast)
grammar.log_parsing_history(log_file_name)
result = compile_source(source, get_{NAME}_scanner(),
get_{NAME}_grammar(),
get_{NAME}_transformer(), compiler)
return result
......@@ -148,7 +185,7 @@ def grammar_instance(grammar_representation):
else:
with logging(False):
parser_py, errors, AST = compile_source(grammar_src, None,
EBNFGrammar(), EBNFTransform, EBNFCompiler())
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
......@@ -215,6 +252,9 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
def load_compiler_suite(compiler_suite):
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
Returns:
4-tuple (scanner function, parser class, ast transformer function, compiler class)
"""
global RX_SECTION_MARKER
assert isinstance(compiler_suite, str)
......@@ -231,8 +271,9 @@ def load_compiler_suite(compiler_suite):
compiler = compile_python_object(imports + compiler_py, '\w*Compiler$')
else:
# assume source is an ebnf grammar
parser_py, errors, AST = compile_source(source, None, EBNFGrammar(),
EBNFTransform, EBNFCompiler())
with logging(False):
parser_py, errors, AST = compile_source(source, None,
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), source)
scanner = nil_scanner
......@@ -243,7 +284,7 @@ def load_compiler_suite(compiler_suite):
return scanner, parser, ast, compiler
def suite_outdated(compiler_suite, grammar_source):
def is_outdated(compiler_suite, grammar_source):
"""Returns ``True`` if the ``compile_suite`` needs to be updated.
An update is needed, if either the grammar in the compieler suite
......@@ -333,9 +374,9 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
compiler1 = cclass()
else:
scanner = nil_scanner
parser = EBNFGrammar()
trans = EBNFTransform
compiler1 = EBNFCompiler(compiler_name, source_file)
parser = get_ebnf_grammar()
trans = get_ebnf_transformer()
compiler1 = get_ebnf_compiler(compiler_name, source_file)
result, errors, ast = compile_source(source_file, scanner, parser, trans, compiler1)
if errors:
return errors
......@@ -362,15 +403,18 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if RX_WHITESPACE.fullmatch(intro):
intro = '#!/usr/bin/python'
if RX_WHITESPACE.fullmatch(outro):
outro = DHPARSER_COMPILER.format(NAME=compiler_name)
outro = DHPARSER_MAIN.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(imports):
imports = DHPARSER_IMPORTS
if RX_WHITESPACE.fullmatch(scanner):
scanner = compiler1.gen_scanner_skeleton()
scanner = compiler1.gen_scanner_skeleton() + \
DHPARSER_SCANNER.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(ast):
ast = compiler1.gen_AST_skeleton()
ast = compiler1.gen_AST_skeleton() + \
DHPARSER_TRANSFORMER.format(NAME=compiler_name)
if RX_WHITESPACE.fullmatch(compiler):
compiler = compiler1.gen_compiler_skeleton()
compiler = compiler1.gen_compiler_skeleton() + \
DHPARSER_COMPILER.format(NAME=compiler_name)
try:
f = open(rootname + '_compiler.py', 'w', encoding="utf-8")
......@@ -380,7 +424,7 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
f.write(SECTION_MARKER.format(marker=SCANNER_SECTION))
f.write(scanner)
f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
f.write(result)
f.write(result); f.write(DHPARSER_GRAMMAR.format(NAME=compiler_name))
f.write(SECTION_MARKER.format(marker=AST_SECTION))
f.write(ast)
f.write(SECTION_MARKER.format(marker=COMPILER_SECTION))
......@@ -409,4 +453,3 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if f: f.close()
return []
......@@ -25,7 +25,7 @@ except ImportError:
import re
from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
from .parsers import GrammarBase, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
......@@ -41,6 +41,26 @@ __all__ = ['EBNFGrammar',
'grammar_changed']
########################################################################
#
# EBNF scanning
#
########################################################################
def get_ebnf_scanner():
return nil_scanner
########################################################################
#
# EBNF parsing
#
########################################################################
class EBNFGrammar(GrammarBase):
r"""Parser for an EBNF source file, with this grammar:
......@@ -116,6 +136,54 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
def grammar_changed(grammar_class, grammar_source):
"""Returns ``True`` if ``grammar_class`` does not reflect the latest
changes of ``grammar_source``
Parameters:
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
grammar_source: File name or string representation of the
EBNF code of the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
def get_ebnf_grammar():
global thread_local_ebnf_grammar_singleton
try:
grammar = thread_local_ebnf_grammar_singleton
return grammar
except NameError:
thread_local_ebnf_grammar_singleton = EBNFGrammar()
return thread_local_ebnf_grammar_singleton
########################################################################
#
# EBNF concrete to abstract syntax tree transformation and validation
#
########################################################################
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_transformation_table = {
......@@ -159,6 +227,17 @@ def EBNFTransform(syntax_tree):
traverse(syntax_tree, processing_table)
def get_ebnf_transformer():
return EBNFTransform
########################################################################
#
# EBNF abstract syntax tree to Python parser compilation
#
########################################################################
class EBNFCompilerError(Exception):
"""Error raised by `EBNFCompiler` class. (Not compilation errors
in the strict sense, see `CompilationError` below)"""
......@@ -182,8 +261,7 @@ class EBNFCompiler(CompilerBase):
'vertical': r'\s*'}
def __init__(self, grammar_name="", grammar_source=""):
super(EBNFCompiler, self).__init__()
self.set_grammar_name(grammar_name, grammar_source)
super(EBNFCompiler, self).__init__(grammar_name, grammar_source)
self._reset()
def _reset(self):
......@@ -199,13 +277,6 @@ class EBNFCompiler(CompilerBase):
'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart'
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
return "def %s(text):\n return text\n" % name
......@@ -234,9 +305,9 @@ class EBNFCompiler(CompilerBase):
self.grammar_name + ' source file.',
' """', '',
' def __init__(self, grammar_name="' +
self.grammar_name + '"):',
self.grammar_name + '", grammar_source=""):',
' super(' + self.grammar_name +
'Compiler, self).__init__()',
'Compiler, self).__init__(grammar_name, grammar_source)',
" assert re.match('\w+\Z', grammar_name)", '']
for name in self.definition_names:
method_name = CompilerBase.derive_method_name(name)
......@@ -523,32 +594,12 @@ class EBNFCompiler(CompilerBase):
return set(item.result.strip() for item in node.result)
def grammar_changed(grammar_class, grammar_source):
"""Returns ``True`` if ``grammar_class`` does not reflect the latest
changes of ``grammar_source``
Parameters:
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
grammar_source: File name or string representation of the
EBNF code of the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
def get_ebnf_compiler(grammar_name="", grammar_source=""):
global thread_local_ebnf_compiler_singleton
try:
compiler = thread_local_ebnf_compiler_singleton
compiler.set_grammar_name(grammar_name, grammar_source)
return compiler
except NameError:
thread_local_ebnf_compiler_singleton = EBNFCompiler(grammar_name, grammar_source)
return thread_local_ebnf_compiler_singleton
......@@ -337,7 +337,7 @@ class GrammarBase:
Returns:
Node: The root node ot the parse tree.
"""
assert isinstance(document, str)
assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self.dirty_flag:
......@@ -391,21 +391,20 @@ class GrammarBase:
elif os.path.exists(path):
os.remove(path)
if is_logging():
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], []
for record in self.history:
line = "; ".join(prepare_line(record))
full_history.append(line)
if record.node and record.node.parser.name != WHITESPACE_KEYWORD:
match_history.append(line)
if record.node.errors:
errors_only.append(line)
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
if not log_file_name:
name = self.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
full_history, match_history, errors_only = [], [], []
for record in self.history:
line = "; ".join(prepare_line(record))
full_history.append(line)
if record.node and record.node.parser.name != WHITESPACE_KEYWORD:
match_history.append(line)
if record.node.errors:
errors_only.append(line)
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
def dsl_error_msg(parser, error_str):
......@@ -959,8 +958,9 @@ class Forward(Parser):
class CompilerBase:
def __init__(self):
def __init__(self, grammar_name="", grammar_source=""):
self.dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
pass
......@@ -978,6 +978,13 @@ class CompilerBase:
self.dirty_flag = True
return self._compile(node)
def set_grammar_name(self, grammar_name, grammar_source):
assert grammar_name == "" or re.match('\w+\Z', grammar_name)
if not grammar_name and re.fullmatch(r'[\w/:\\]+', grammar_source):
grammar_name = os.path.splitext(os.path.basename(grammar_source))[0]
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
@staticmethod
def derive_method_name(node_name):
"""Returns the method name for ``node_name``, e.g.
......@@ -1012,7 +1019,7 @@ class CompilerBase:
return result
def compile_source(source, scan, parse, transform, compile_ast):
def compile_source(source, scanner, parser, transformer, compiler):
"""Compiles a source in four stages:
1. Scanning (if needed)
2. Parsing
......@@ -1024,13 +1031,13 @@ def compile_source(source, scan, parse, transform, compile_ast):
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
scan (function): text -> text. A scanner function or None,
scanner (function): text -> text. A scanner function or None,
if no scanner is needed.
parse (function): A parsing function or grammar class
transform (function): A transformation function that takes
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
transforms it (in place) into an abstract syntax tree.
compile_ast (function): A compiler function or compiler class
compiler (function): A compiler function or compiler class
instance
Returns (tuple):
......@@ -1042,16 +1049,16 @@ def compile_source(source, scan, parse, transform, compile_ast):
3. The root-node of the abstract syntax treelow
"""
source_text = load_if_file(source)
log_file_name = logfile_basename(source, compile_ast)
if scan is not None:
source_text = scan(source_text)
syntax_tree = parse(source_text)
log_file_name = logfile_basename(source, compiler)
if scanner is not None:
source_text = scanner(source_text)
syntax_tree = parser(source_text)
if is_logging():
syntax_tree.log(log_file_name, ext='.cst')
syntax_tree.log(log_file_name + '.cst')
try:
parse.log_parsing_history(log_file_name)
parser.log_parsing_history(log_file_name)
except AttributeError:
# this is a hack in case a parse function or method was
# this is a hack in case a parser function or method was
# passed instead of a grammar class instance
for nd in syntax_tree.find(lambda nd: bool(nd.parser)):
nd.parser.grammar.log_parsing_history(log_file_name)
......@@ -1064,11 +1071,11 @@ def compile_source(source, scan, parse, transform, compile_ast):
result = None
errors = syntax_tree.collect_errors()
else:
transform(syntax_tree)
if is_logging(): syntax_tree.log(log_file_name, ext='.ast')
transformer(syntax_tree)
if is_logging(): syntax_tree.log(log_file_name + '.ast')
errors = syntax_tree.collect_errors()
if not errors:
result = compile_ast(syntax_tree)
result = compiler(syntax_tree)
errors = syntax_tree.collect_errors()
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
......
......@@ -338,11 +338,10 @@ class Node:
errors.extend(child.collect_errors(clear_errors))
return errors
def log(self, log_file_name, ext):
if is_logging():
st_file_name = log_file_name + ext
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr())
def log(self, log_file_name):
st_file_name = log_file_name
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sexpr())
def find(self, match_function):
"""Finds nodes in the tree that match a specific criterion.
......
......@@ -103,6 +103,7 @@ def logging(dirname="LOGS"):
turn logging of
"""
global LOGGING
if dirname == True: dirname = "LOGS" # be fail tolerant here...
try:
save = LOGGING
except NameError:
......
......@@ -24,8 +24,9 @@ import os
import sys
from functools import partial
from DHParser.toolkit import logging
from DHParser.dsl import compileDSL, compile_on_disk
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.parsers import compile_source, nil_scanner
......@@ -34,10 +35,11 @@ def selftest(file_name):
with open('examples/' + file_name, encoding="utf-8") as f:
grammar = f.read()
compiler_name = os.path.basename(os.path.splitext(file_name)[0])
compiler = EBNFCompiler(compiler_name, grammar)
parser = EBNFGrammar()
parser = get_ebnf_grammar()
transformer = get_ebnf_transformer()
compiler = get_ebnf_compiler(compiler_name, grammar)
result, errors, syntax_tree = compile_source(grammar, None, parser,
EBNFTransform, compiler)
transformer, compiler)
print(result)
if errors:
print('\n\n'.join(errors))
......@@ -46,7 +48,7 @@ def selftest(file_name):
# compile the grammar again using the result of the previous
# compilation as parser
print(type(result))
result = compileDSL(grammar, nil_scanner, result, EBNFTransform, compiler)
result = compileDSL(grammar, nil_scanner, result, transformer, compiler)
print(result)
return result
......@@ -80,4 +82,5 @@ if __name__ == "__main__":
else:
# run self test
# selftest('EBNF/EBNF.ebnf')
profile(partial(selftest, file_name='EBNF/EBNF.ebnf'))
with logging(False):
profile(partial(selftest, file_name='EBNF/EBNF.ebnf'))
......@@ -23,10 +23,10 @@ limitations under the License.
import os
import sys
sys.path.append(os.path.abspath('../../'))
from DHParser.dsl import compile_on_disk, suite_outdated
from DHParser.dsl import compile_on_disk, is_outdated
if (not os.path.exists('PopRetrieve_compiler.py') or
suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
is_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
print("recompiling PopRetrieve parser")
errors = compile_on_disk("PopRetrieve.ebnf")
if errors:
......@@ -68,7 +68,7 @@ if errors:
if (not os.path.exists('PopRetrieveComplement_compiler.py') or
suite_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
is_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = compile_on_disk("PopRetrieveComplement.ebnf")
if errors:
......
......@@ -20,6 +20,9 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../', './'])
from DHParser.toolkit import *
from DHParser.syntaxtree import *
from DHParser.parsers import *
......
......@@ -22,6 +22,7 @@ limitations under the License.
import os
import sys
sys.path.extend(['../', './'])
from DHParser.dsl import compile_on_disk, run_compiler
......@@ -48,6 +49,7 @@ class TestCompilerGeneration:
for name in (self.grammar_name, self.compiler_name, self.text_name, self.result_name):
if os.path.exists(name):
os.remove(name)
pass
def test_compiling_functions(self):
# test if cutting and reassembling of compiler suite works:
......
......@@ -21,17 +21,16 @@ limitations under the License.
"""
from functools import partial
import os
from multiprocessing import Pool
import sys
sys.path.append(os.path.abspath('../../'))
sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_KEYWORD, nil_scanner
from DHParser.ebnf import EBNFGrammar, EBNFTransform, EBNFCompiler
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler
from DHParser.dsl import compileEBNF, compileDSL
WRITE_LOGS = True
class TestDirectives:
mini_language = """
expression = term { ("+" | "-") term }
......@@ -98,7 +97,7 @@ class TestEBNFParser:
def setup(self):
self.EBNF = EBNFGrammar()
self.EBNF = get_ebnf_grammar()
def test_literal(self):
snippet = '"literal" '
......@@ -154,8 +153,8 @@ class TestPopRetrieve: