From c74091ca50cf9164b256ce914d8cbfd31019aae1 Mon Sep 17 00:00:00 2001 From: Eckhart Arnold Date: Sat, 10 Jun 2017 13:33:00 +0200 Subject: [PATCH] - added type annotations for better documentation and mypy type checks --- DHParser/dsl.py | 66 +++++---- DHParser/ebnf.py | 188 +++++++++++++------------ DHParser/parsers.py | 184 +++++++++++++----------- DHParser/syntaxtree.py | 89 +++++++----- DHParser/toolkit.py | 26 ++-- OLDSTUFF/ParserCombinators_obsolete.py | 8 +- examples/MLW/OLDSTUFF/MLW_compiler.py | 4 +- examples/Tutorial/LyrikCompiler.py | 92 ++++++------ test/test_dsl.py | 4 +- 9 files changed, 358 insertions(+), 303 deletions(-) diff --git a/DHParser/dsl.py b/DHParser/dsl.py index 8ae9d6e..b20cdcd 100644 --- a/DHParser/dsl.py +++ b/DHParser/dsl.py @@ -20,17 +20,18 @@ compilation of domain specific languages based on an EBNF-grammar. """ import os - try: import regex as re except ImportError: import re +from typing import Any, Tuple, cast -from .ebnf import EBNFTransformer, grammar_changed, \ - get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler -from .toolkit import logging, load_if_file, is_python_code, compile_python_object -from .parsers import Grammar, CompilerBase, compile_source, nil_scanner -from .syntaxtree import Node +from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \ + get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \ + ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc +from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object +from DHParser.parsers import Grammar, Compiler, compile_source, nil_scanner, ScannerFunc +from DHParser.syntaxtree import Node, TransformerFunc __all__ = ['GrammarError', @@ -71,7 +72,7 @@ try: except ImportError: import re from DHParser.toolkit import logging, is_filename, load_if_file -from DHParser.parsers import Grammar, CompilerBase, nil_scanner, \\ +from DHParser.parsers import Grammar, Compiler, nil_scanner, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ @@ -137,7 +138,7 @@ class CompilationError(Exception): return '\n'.join(self.error_messages) -def grammar_instance(grammar_representation): +def grammar_instance(grammar_representation) -> Tuple[Grammar, str]: """Returns a grammar object and the source code of the grammar, from the given `grammar`-data which can be either a file name, ebnf-code, python-code, a Grammar-derived grammar class or an instance of @@ -167,7 +168,11 @@ def grammar_instance(grammar_representation): return parser_root, grammar_src -def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler): +def compileDSL(text_or_file: str, + scanner: ScannerFunc, + dsl_grammar: Grammar, + ast_transformation: TransformerFunc, + compiler: Compiler) -> Any: """Compiles a text in a domain specific language (DSL) with an EBNF-specified grammar. Returns the compiled text or raises a compilation error. @@ -176,10 +181,10 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler) CompilationError if any errors occurred during compilation """ assert isinstance(text_or_file, str) - assert isinstance(compiler, CompilerBase) + assert isinstance(compiler, Compiler) - parser_root, grammar_src = grammar_instance(dsl_grammar) - result, errors, AST = compile_source(text_or_file, scanner, parser_root, + parser, grammar_src = grammar_instance(dsl_grammar) + result, errors, AST = compile_source(text_or_file, scanner, parser, ast_transformation, compiler) if errors: src = load_if_file(text_or_file) @@ -187,7 +192,7 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler) return result -def raw_compileEBNF(ebnf_src, branding="DSL"): +def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler: """Compiles an EBNF grammar file and returns the compiler object that was used and which can now be queried for the result as well as skeleton code for scanner, transformer and compiler objects. @@ -208,7 +213,7 @@ def raw_compileEBNF(ebnf_src, branding="DSL"): return compiler -def compileEBNF(ebnf_src, branding="DSL"): +def compileEBNF(ebnf_src: str, branding="DSL") -> str: """Compiles an EBNF source file and returns the source code of a compiler suite with skeletons for scanner, transformer and compiler. @@ -234,7 +239,7 @@ def compileEBNF(ebnf_src, branding="DSL"): return '\n'.join(src) -def parser_factory(ebnf_src, branding="DSL"): +def parser_factory(ebnf_src: str, branding="DSL") -> Grammar: """Compiles an EBNF grammar and returns a grammar-parser factory function for that grammar. @@ -253,7 +258,8 @@ def parser_factory(ebnf_src, branding="DSL"): return compile_python_object(DHPARSER_IMPORTS + grammar_src, 'get_(?:\w+_)?grammar$') -def load_compiler_suite(compiler_suite): +def load_compiler_suite(compiler_suite: str) -> \ + Tuple[ScannerFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]: """Extracts a compiler suite from file or string ``compiler suite`` and returns it as a tuple (scanner, parser, ast, compiler). @@ -282,13 +288,14 @@ def load_compiler_suite(compiler_suite): if errors: raise GrammarError('\n\n'.join(errors), source) scanner = get_ebnf_scanner + parser = get_ebnf_grammar ast = get_ebnf_transformer compiler = compile_python_object(imports + compiler_py, 'get_(?:\w+_)?compiler$') return scanner, parser, ast, compiler -def is_outdated(compiler_suite, grammar_source): +def is_outdated(compiler_suite: str, grammar_source: str) -> bool: """Returns ``True`` if the ``compile_suite`` needs to be updated. An update is needed, if either the grammar in the compieler suite @@ -313,7 +320,7 @@ def is_outdated(compiler_suite, grammar_source): return True -def run_compiler(text_or_file, compiler_suite): +def run_compiler(text_or_file: str, compiler_suite: str) -> Any: """Compiles a source with a given compiler suite. Args: @@ -336,7 +343,7 @@ def run_compiler(text_or_file, compiler_suite): return compileDSL(text_or_file, scanner(), parser(), ast(), compiler()) -def compile_on_disk(source_file, compiler_suite="", extension=".xml"): +def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"): """Compiles the a source file with a given compiler and writes the result to a file. @@ -373,18 +380,20 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"): rootname = os.path.splitext(filepath)[0] compiler_name = os.path.basename(rootname) if compiler_suite: - scanner, parser, trans, cfactory = load_compiler_suite(compiler_suite) + sfactory, pfactory, tfactory, cfactory = load_compiler_suite(compiler_suite) else: - scanner = get_ebnf_scanner - parser = get_ebnf_grammar - trans = get_ebnf_transformer + sfactory = get_ebnf_scanner + pfactory = get_ebnf_grammar + tfactory = get_ebnf_transformer cfactory = get_ebnf_compiler - compiler1 = cfactory(compiler_name, source_file) - result, errors, ast = compile_source(source_file, scanner(), parser(), trans(), compiler1) + compiler1 = cfactory() + compiler1.set_grammar_name(compiler_name, source_file) + result, errors, ast = compile_source(source_file, sfactory(), pfactory(), tfactory(), compiler1) if errors: return errors elif cfactory == get_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given + ebnf_compiler = cast(EBNFCompiler, compiler1) global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \ AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE, \ DHPARSER_MAIN, DHPARSER_IMPORTS @@ -412,11 +421,11 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"): if RX_WHITESPACE.fullmatch(imports): imports = DHPARSER_IMPORTS if RX_WHITESPACE.fullmatch(scanner): - scanner = compiler1.gen_scanner_skeleton() + scanner = ebnf_compiler.gen_scanner_skeleton() if RX_WHITESPACE.fullmatch(ast): - ast = compiler1.gen_transformer_skeleton() + ast = ebnf_compiler.gen_transformer_skeleton() if RX_WHITESPACE.fullmatch(compiler): - compiler = compiler1.gen_compiler_skeleton() + compiler = ebnf_compiler.gen_compiler_skeleton() try: f = open(rootname + 'Compiler.py', 'w', encoding="utf-8") @@ -441,6 +450,7 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"): if f: f.close() else: + f = None try: f = open(rootname + extension, 'w', encoding="utf-8") if isinstance(result, Node): diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index aad8feb..418f7d7 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -18,19 +18,20 @@ permissions and limitations under the License. import keyword from functools import partial - try: import regex as re except ImportError: import re +from typing import Callable, cast, List, Set, Tuple -from .toolkit import load_if_file, escape_re, md5, sane_parser_name -from .parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \ - Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase -from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \ +from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name +from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \ + Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ + ScannerFunc +from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \ replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \ - forbid, assert_content, WHITESPACE_PTYPE, key_tag_name -from .versionnumber import __version__ + forbid, assert_content, WHITESPACE_PTYPE, key_tag_name, TransformerFunc +from DHParser.versionnumber import __version__ __all__ = ['get_ebnf_scanner', @@ -41,7 +42,11 @@ __all__ = ['get_ebnf_scanner', 'EBNFTransformer', 'EBNFCompilerError', 'EBNFCompiler', - 'grammar_changed'] + 'grammar_changed', + 'ScannerFactoryFunc', + 'ParserFactoryFunc', + 'TransformerFactoryFunc', + 'CompilerFactoryFunc'] ######################################################################## @@ -51,7 +56,7 @@ __all__ = ['get_ebnf_scanner', ######################################################################## -def get_ebnf_scanner(): +def get_ebnf_scanner() -> ScannerFunc: return nil_scanner @@ -137,7 +142,7 @@ class EBNFGrammar(Grammar): root__ = syntax -def grammar_changed(grammar_class, grammar_source): +def grammar_changed(grammar_class, grammar_source: str) -> bool: """Returns ``True`` if ``grammar_class`` does not reflect the latest changes of ``grammar_source`` @@ -168,7 +173,7 @@ def grammar_changed(grammar_class, grammar_source): return chksum != grammar_class.source_hash__ -def get_ebnf_grammar(): +def get_ebnf_grammar() -> EBNFGrammar: global thread_local_ebnf_grammar_singleton try: grammar = thread_local_ebnf_grammar_singleton @@ -223,13 +228,13 @@ EBNF_validation_table = { } -def EBNFTransformer(syntax_tree): +def EBNFTransformer(syntax_tree: Node): for processing_table, key_func in [(EBNF_transformation_table, key_tag_name), (EBNF_validation_table, key_tag_name)]: traverse(syntax_tree, processing_table, key_func) -def get_ebnf_transformer(): +def get_ebnf_transformer() -> TransformerFunc: return EBNFTransformer @@ -239,6 +244,13 @@ def get_ebnf_transformer(): # ######################################################################## + +ScannerFactoryFunc = Callable[[], ScannerFunc] +ParserFactoryFunc = Callable[[], Grammar] +TransformerFactoryFunc = Callable[[], TransformerFunc] +CompilerFactoryFunc = Callable[[], Compiler] + + SCANNER_FACTORY = ''' def get_scanner(): return {NAME}Scanner @@ -283,7 +295,7 @@ class EBNFCompilerError(Exception): pass -class EBNFCompiler(CompilerBase): +class EBNFCompiler(Compiler): """Generates a Parser from an abstract syntax tree of a grammar specified in EBNF-Notation. """ @@ -305,13 +317,13 @@ class EBNFCompiler(CompilerBase): self._reset() def _reset(self): - self._result = None - self.rules = set() - self.variables = set() - self.symbol_nodes = [] - self.definition_names = [] - self.recursive = set() - self.root = "" + self._result = '' # type: str + self.rules = set() # type: Set[str] + self.variables = set() # type: Set[str] + self.symbol_nodes = [] # type: List[Node] + self.definition_names = [] # type: List[str] + self.recursive = set() # type: Set[str] + self.root = "" # type: str self.directives = {'whitespace': self.WHITESPACE['horizontal'], 'comment': '', 'literalws': ['right'], @@ -319,15 +331,15 @@ class EBNFCompiler(CompilerBase): 'filter': dict()} # alt. 'retrieve_filter' @property - def result(self): + def result(self) -> str: return self._result - def gen_scanner_skeleton(self): + def gen_scanner_skeleton(self) -> str: name = self.grammar_name + "Scanner" return "def %s(text):\n return text\n" % name \ + SCANNER_FACTORY.format(NAME=self.grammar_name) - def gen_transformer_skeleton(self): + def gen_transformer_skeleton(self) -> str: if not self.definition_names: raise EBNFCompilerError('Compiler must be run before calling ' '"gen_transformer_Skeleton()"!') @@ -343,11 +355,11 @@ class EBNFCompiler(CompilerBase): transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)] return '\n'.join(transtable) - def gen_compiler_skeleton(self): + def gen_compiler_skeleton(self) -> str: if not self.definition_names: raise EBNFCompilerError('Compiler has not been run before calling ' '"gen_Compiler_Skeleton()"!') - compiler = ['class ' + self.grammar_name + 'Compiler(CompilerBase):', + compiler = ['class ' + self.grammar_name + 'Compiler(Compiler):', ' """Compiler for the abstract-syntax-tree of a ' + self.grammar_name + ' source file.', ' """', '', @@ -357,23 +369,23 @@ class EBNFCompiler(CompilerBase): 'Compiler, self).__init__(grammar_name, grammar_source)', " assert re.match('\w+\Z', grammar_name)", ''] for name in self.definition_names: - method_name = CompilerBase.derive_method_name(name) + method_name = Compiler.derive_method_name(name) if name == self.root: - compiler += [' def ' + method_name + '(self, node):', + compiler += [' def ' + method_name + '(self, node: Node) -> str:', ' return node', ''] else: - compiler += [' def ' + method_name + '(self, node):', + compiler += [' def ' + method_name + '(self, node: Node) -> str:', ' pass', ''] compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)] return '\n'.join(compiler) - def assemble_parser(self, definitions, root_node): + def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str: # fix capture of variables that have been defined before usage [sic!] if self.variables: for i in range(len(definitions)): if definitions[i][0] in self.variables: - definitions[i] = (definitions[i][0], 'Capture(%s)' % definitions[1]) + definitions[i] = (definitions[i][0], 'Capture(%s)' % definitions[i][1]) self.definition_names = [defn[0] for defn in definitions] definitions.append(('wspR__', self.WHITESPACE_KEYWORD @@ -434,27 +446,27 @@ class EBNFCompiler(CompilerBase): + GRAMMAR_FACTORY.format(NAME=self.grammar_name) return self._result - def on_syntax(self, node): + def on_syntax(self, node: Node) -> str: self._reset() definitions = [] # drop the wrapping sequence node - if len(node.children) == 1 and not node.result[0].parser.name: - node = node.result[0] + if len(node.children) == 1 and not node.children[0].parser.name: + node = node.children[0] # compile definitions and directives and collect definitions - for nd in node.result: + for nd in node.children: if nd.parser.name == "definition": definitions.append(self._compile(nd)) else: assert nd.parser.name == "directive", nd.as_sexpr() self._compile(nd) - node.error_flag |= nd.error_flag + node.error_flag = node.error_flag or nd.error_flag return self.assemble_parser(definitions, node) - def on_definition(self, node): - rule = node.result[0].result + def on_definition(self, node: Node) -> Tuple[str, str]: + rule = cast(str, node.children[0].result) if rule in self.rules: node.add_error('A rule with name "%s" has already been defined.' % rule) elif rule in EBNFCompiler.RESERVED_SYMBOLS: @@ -470,7 +482,7 @@ class EBNFCompiler(CompilerBase): % rule + '(This may change in the furute.)') try: self.rules.add(rule) - defn = self._compile(node.result[1]) + defn = self._compile(node.children[1]) if rule in self.variables: defn = 'Capture(%s)' % defn self.variables.remove(rule) @@ -481,7 +493,7 @@ class EBNFCompiler(CompilerBase): return rule, defn @staticmethod - def _check_rx(node, rx): + def _check_rx(node: Node, rx: str) -> str: """Checks whether the string `rx` represents a valid regular expression. Makes sure that multiline regular expressions are prepended by the multiline-flag. Returns the regular expression string. @@ -494,22 +506,22 @@ class EBNFCompiler(CompilerBase): (repr(rx), str(re_error))) return rx - def on_directive(self, node): - key = node.result[0].result.lower() + def on_directive(self, node: Node) -> str: + key = cast(str, node.children[0].result).lower() assert key not in self.directives['tokens'] if key in {'comment', 'whitespace'}: - if node.result[1].parser.name == "list_": - if len(node.result[1].result) != 1: + if node.children[1].parser.name == "list_": + if len(node.children[1].result) != 1: node.add_error('Directive "%s" must have one, but not %i values.' % - (key, len(node.result[1]))) - value = self._compile(node.result[1]).pop() + (key, len(node.children[1].result))) + value = self._compile(node.children[1]).pop() if key == 'whitespace' and value in EBNFCompiler.WHITESPACE: value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex else: node.add_error('Value "%s" not allowed for directive "%s".' % (value, key)) else: - value = node.result[1].result.strip("~") - if value != node.result[1].result: + value = cast(str, node.children[1].result).strip("~") + if value != cast(str, node.children[1].result): node.add_error("Whitespace marker '~' not allowed in definition of " "%s regular expression." % key) if value[0] + value[-1] in {'""', "''"}: @@ -522,7 +534,7 @@ class EBNFCompiler(CompilerBase): self.directives[key] = value elif key == 'literalws': - value = {item.lower() for item in self._compile(node.result[1])} + value = {item.lower() for item in self._compile(node.children[1])} if (len(value - {'left', 'right', 'both', 'none'}) > 0 or ('none' in value and len(value) > 1)): node.add_error('Directive "literalws" allows the values ' @@ -533,10 +545,10 @@ class EBNFCompiler(CompilerBase): self.directives[key] = list(ws) elif key in {'tokens', 'scanner_tokens'}: - self.directives['tokens'] |= self._compile(node.result[1]) + self.directives['tokens'] |= self._compile(node.children[1]) elif key.endswith('_filter'): - filter_set = self._compile(node.result[1]) + filter_set = self._compile(node.children[1]) if not isinstance(filter_set, set) or len(filter_set) != 1: node.add_error('Directive "%s" accepts exactly on symbol, not %s' % (key, str(filter_set))) @@ -548,82 +560,84 @@ class EBNFCompiler(CompilerBase): ', '.join(list(self.directives.keys())))) return "" - def non_terminal(self, node, parser_class, custom_args=[]): + def non_terminal(self, node: Node, parser_class: str, custom_args: List[str]=[]) -> str: """Compiles any non-terminal, where `parser_class` indicates the Parser class name for the particular non-terminal. """ - arguments = [self._compile(r) for r in node.result] + custom_args + arguments = [self._compile(r) for r in node.children] + custom_args return parser_class + '(' + ', '.join(arguments) + ')' - def on_expression(self, node): + def on_expression(self, node) -> str: return self.non_terminal(node, 'Alternative') - def on_term(self, node): + def on_term(self, node) -> str: return self.non_terminal(node, 'Sequence') - def on_factor(self, node): + def on_factor(self, node: Node) -> str: assert node.children - assert len(node.result) >= 2, node.as_sexpr() - prefix = node.result[0].result - custom_args = [] + assert len(node.children) >= 2, node.as_sexpr() + prefix = cast(str, node.children[0].result) + custom_args = [] # type: List[str] if prefix in {'::', ':'}: - assert len(node.result) == 2 - arg = node.result[-1] + assert len(node.children) == 2 + arg = node.children[-1] if arg.parser.name != 'symbol': node.add_error(('Retrieve Operator "%s" requires a symbol, ' 'and not a %s.') % (prefix, str(arg.parser))) return str(arg.result) if str(arg) in self.directives['filter']: custom_args = ['retrieve_filter=%s' % self.directives['filter'][str(arg)]] - self.variables.add(arg.result) + self.variables.add(cast(str, arg.result)) - elif len(node.result) > 2: + elif len(node.children) > 2: # shift = (Node(node.parser, node.result[1].result),) # node.result[1].result = shift + node.result[2:] - node.result[1].result = (Node(node.result[1].parser, node.result[1].result),) \ - + node.result[2:] - node.result[1].parser = node.parser - node.result = (node.result[0], node.result[1]) + node.children[1].result = (Node(node.children[1].parser, node.children[1].result),) \ + + node.children[2:] + node.children[1].parser = node.parser + node.result = (node.children[0], node.children[1]) - node.result = node.result[1:] + node.result = node.children[1:] try: parser_class = self.PREFIX_TABLE[prefix] return self.non_terminal(node, parser_class, custom_args) except KeyError: node.add_error('Unknown prefix "%s".' % prefix) + return "" - def on_option(self, node): + def on_option(self, node) -> str: return self.non_terminal(node, 'Optional') - def on_repetition(self, node): + def on_repetition(self, node) -> str: return self.non_terminal(node, 'ZeroOrMore') - def on_oneormore(self, node): + def on_oneormore(self, node) -> str: return self.non_terminal(node, 'OneOrMore') - def on_regexchain(self, node): + def on_regexchain(self, node) -> str: raise EBNFCompilerError("Not yet implemented!") - def on_group(self, node): + def on_group(self, node) -> str: raise EBNFCompilerError("Group nodes should have been eliminated by " "AST transformation!") - def on_symbol(self, node): - if node.result in self.directives['tokens']: - return 'ScannerToken("' + node.result + '")' + def on_symbol(self, node: Node) -> str: + result = cast(str, node.result) + if result in self.directives['tokens']: + return 'ScannerToken("' + result + '")' else: self.symbol_nodes.append(node) - if node.result in self.rules: - self.recursive.add(node.result) - return node.result + if result in self.rules: + self.recursive.add(result) + return result - def on_literal(self, node): - return 'Token(' + node.result.replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ? + def on_literal(self, node) -> str: + return 'Token(' + cast(str, node.result).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ? - def on_regexp(self, node): - rx = node.result - name = [] + def on_regexp(self, node: Node) -> str: + rx = cast(str, node.result) + name = [] # type: List[str] if rx[:2] == '~/': if not 'left' in self.directives['literalws']: name = ['wL=' + self.WHITESPACE_KEYWORD] + name @@ -645,12 +659,12 @@ class EBNFCompiler(CompilerBase): return '"' + errmsg + '"' return 'RE(' + ', '.join([arg] + name) + ')' - def on_list_(self, node): + def on_list_(self, node) -> Set[str]: assert node.children - return set(item.result.strip() for item in node.result) + return set(item.result.strip() for item in node.children) -def get_ebnf_compiler(grammar_name="", grammar_source=""): +def get_ebnf_compiler(grammar_name="", grammar_source="") -> EBNFCompiler: global thread_local_ebnf_compiler_singleton try: compiler = thread_local_ebnf_compiler_singleton diff --git a/DHParser/parsers.py b/DHParser/parsers.py index e5a2fc0..ad7fb44 100644 --- a/DHParser/parsers.py +++ b/DHParser/parsers.py @@ -50,17 +50,21 @@ https://bitbucket.org/apalala/grako import copy +from functools import partial import os try: import regex as re except ImportError: import re +from typing import Any, Callable, Dict, Iterator, List, Set, Tuple, Union -from .toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name -from .syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node +from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name +from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node, \ + TransformerFunc from DHParser.toolkit import load_if_file, error_messages -__all__ = ['HistoryRecord', +__all__ = ['ScannerFunc', + 'HistoryRecord', 'Parser', 'Grammar', 'RX_SCANNER_TOKEN', @@ -90,10 +94,20 @@ __all__ = ['HistoryRecord', 'Retrieve', 'Pop', 'Forward', - 'CompilerBase', + 'Compiler', 'compile_source'] +######################################################################## +# +# Grammar and parsing infrastructure +# +######################################################################## + + +ScannerFunc = Union[Callable[[str], str], partial] + + LEFT_RECURSION_DEPTH = 10 # because of pythons recursion depth limit, this # value ought not to be set too high MAX_DROPOUTS = 25 # stop trying to recover parsing after so many errors @@ -123,26 +137,26 @@ class HistoryRecord: self.node = node self.remaining = remaining - def err_msg(self): + def err_msg(self) -> str: return self.ERROR + ": " + "; ".join(self.node._errors).replace('\n', '\\') @property - def stack(self): + def stack(self) -> str: return "->".join(str(parser) for parser in self.call_stack) @property - def status(self): + def status(self) -> str: return self.FAIL if self.node is None else \ self.err_msg() if self.node._errors else self.MATCH @property - def extent(self): + def extent(self) -> Tuple[int, int]: return ((-self.remaining - self.node.len, -self.remaining) if self.node else (-self.remaining, None)) def add_parser_guard(parser_func): - def guarded_call(parser, text): + def guarded_call(parser: 'Parser', text: str) -> Tuple[Node, str]: try: location = len(text) # if location has already been visited by the current parser, @@ -176,7 +190,7 @@ def add_parser_guard(parser_func): # in case of a recursive call saves the result of the first # (or left-most) call that matches parser.visited[location] = (node, rest) - grammar.last_node = node + grammar.last_node = node # store last node for Lookbehind operator elif location in parser.visited: # if parser did non match but a saved result exits, assume # left recursion and use the saved result @@ -208,27 +222,28 @@ class ParserMetaClass(type): class Parser(metaclass=ParserMetaClass): + ApplyFunc = Callable[['Parser'], None] + def __init__(self, name=''): assert isinstance(name, str), str(name) - self.name = name - # self.pbases = {cls.__name__ for cls in inspect.getmro(self.__class__)} - self._grammar = None # center for global variables etc. + self.name = name # type: str + self._grammar = None # type: 'Grammar' self.reset() def __deepcopy__(self, memo): return self.__class__(self.name) @property - def ptype(self): + def ptype(self) -> str: return ':' + self.__class__.__name__ def reset(self): - self.visited = dict() - self.recursion_counter = dict() - self.cycle_detection = set() + self.visited = dict() # type: Dict[int, Tuple[Node, str]] + self.recursion_counter = dict() # type: Dict[int, int] + self.cycle_detection = set() # type: Set[Callable] return self - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: return None, text # default behaviour: don't match def __str__(self): @@ -241,18 +256,18 @@ class Parser(metaclass=ParserMetaClass): return Alternative(self, other) @property - def grammar(self): + def grammar(self) -> 'Grammar': return self._grammar @grammar.setter - def grammar(self, grammar_base): - self._grammar = grammar_base + def grammar(self, grammar: 'Grammar'): + self._grammar = grammar self._grammar_assigned_notifier() def _grammar_assigned_notifier(self): pass - def apply(self, func): + def apply(self, func: ApplyFunc): """Applies function `func(parser)` recursively to this parser and all descendants of the tree of parsers. The same function can never be applied twice between calls of the ``reset()``-method! @@ -266,7 +281,8 @@ class Parser(metaclass=ParserMetaClass): class Grammar: - root__ = None # should be overwritten by grammar subclass + root__ = None # type: Union[Parser, None] + # root__ must be overwritten with the root-parser by grammar subclass @classmethod def _assign_parser_names(cls): @@ -330,14 +346,19 @@ class Grammar: return self.__dict__[key] def _reset(self): - self.variables = dict() # support for Pop and Retrieve operators - self.document = "" # source document - self.last_node = None - self.call_stack = [] # support for call stack tracing - self.history = [] # snapshots of call stacks - self.moving_forward = True # also needed for call stack tracing - - def _add_parser(self, parser): + # variables stored and recalled by Capture and Retrieve parsers + self.variables = dict() # type: Dict[str, List[str]] + self.document = "" # type: str + # previously parsed node, needed by Lookbehind parser + self.last_node = None # type: Node + # support for call stack tracing + self.call_stack = [] # type: List[Parser] + # snapshots of call stacks + self.history = [] # type: List[HistoryRecord] + # also needed for call stack tracing + self.moving_forward = True + + def _add_parser(self, parser: Parser): """Adds the copy of the classes parser object to this particular instance of Grammar. """ @@ -434,7 +455,7 @@ class Grammar: write_log(errors_only, log_file_name + '_errors') -def dsl_error_msg(parser, error_str): +def dsl_error_msg(parser, error_str) -> str: """Returns an error messsage for errors in the parser configuration, e.g. errors that result in infinite loops. @@ -467,7 +488,7 @@ BEGIN_SCANNER_TOKEN = '\x1b' END_SCANNER_TOKEN = '\x1c' -def make_token(token, argument=''): +def make_token(token, argument='') -> str: """Turns the ``token`` and ``argument`` into a special token that will be caught by the `ScannerToken`-parser. @@ -481,7 +502,7 @@ def make_token(token, argument=''): return BEGIN_SCANNER_TOKEN + token + argument + END_SCANNER_TOKEN -def nil_scanner(text): +def nil_scanner(text) -> str: return text @@ -502,7 +523,7 @@ class ScannerToken(Parser): assert RX_SCANNER_TOKEN.match(scanner_token) super(ScannerToken, self).__init__(scanner_token) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: if text[0:1] == BEGIN_SCANNER_TOKEN: end = text.find(END_SCANNER_TOKEN, 1) if end < 0: @@ -549,7 +570,7 @@ class RegExp(Parser): regexp = self.regexp.pattern return RegExp(regexp, self.name) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: match = text[0:1] != BEGIN_SCANNER_TOKEN and self.regexp.match(text) # ESC starts a scanner token. if match: end = match.end() @@ -607,7 +628,7 @@ class RE(Parser): regexp = self.main.regexp.pattern return self.__class__(regexp, self.wL, self.wR, self.name) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: # assert self.main.regexp.pattern != "@" t = text wL, t = self.wspLeft(t) @@ -633,7 +654,7 @@ class RE(Parser): if self.wR is None: self.wspRight = self.grammar.wsp_right_parser__ - def apply(self, func): + def apply(self, func: Parser.ApplyFunc): if super(RE, self).apply(func): if self.wL: self.wspLeft.apply(func) @@ -677,13 +698,13 @@ class UnaryOperator(Parser): def __init__(self, parser, name=''): super(UnaryOperator, self).__init__(name) assert isinstance(parser, Parser) - self.parser = parser + self.parser = parser # type: Parser def __deepcopy__(self, memo): parser = copy.deepcopy(self.parser, memo) return self.__class__(parser, self.name) - def apply(self, func): + def apply(self, func: Parser.ApplyFunc): if super(UnaryOperator, self).apply(func): self.parser.apply(func) @@ -692,13 +713,13 @@ class NaryOperator(Parser): def __init__(self, *parsers, name=''): super(NaryOperator, self).__init__(name) assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers) - self.parsers = parsers + self.parsers = parsers # type: List[Parser] def __deepcopy__(self, memo): parsers = copy.deepcopy(self.parsers, memo) return self.__class__(*parsers, name=self.name) - def apply(self, func): + def apply(self, func: Parser.ApplyFunc): if super(NaryOperator, self).apply(func): for parser in self.parsers: parser.apply(func) @@ -715,7 +736,7 @@ class Optional(UnaryOperator): "Nesting options with required elements is contradictory: " \ "%s(%s)" % (str(name), str(parser.name)) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: node, text = self.parser(text) if node: return Node(self, node), text @@ -723,8 +744,8 @@ class Optional(UnaryOperator): class ZeroOrMore(Optional): - def __call__(self, text): - results = () + def __call__(self, text: str) -> Tuple[Node, str]: + results = () # type: Tuple[Node, ...] n = len(text) + 1 while text and len(text) < n: n = len(text) @@ -744,9 +765,9 @@ class OneOrMore(UnaryOperator): "Use ZeroOrMore instead of nesting OneOrMore and Optional: " \ "%s(%s)" % (str(name), str(parser.name)) - def __call__(self, text): - results = () - text_ = text + def __call__(self, text: str) -> Tuple[Node, str]: + results = () # type: Tuple[Node, ...] + text_ = text # type: str n = len(text) + 1 while text_ and len(text_) < n: n = len(text_) @@ -766,9 +787,9 @@ class Sequence(NaryOperator): super(Sequence, self).__init__(*parsers, name=name) assert len(self.parsers) >= 1 - def __call__(self, text): - results = () - text_ = text + def __call__(self, text: str) -> Tuple[Node, str]: + results = () # type: Tuple[Node, ...] + text_ = text # type: str for parser in self.parsers: node, text_ = parser(text_) if not node: @@ -812,7 +833,7 @@ class Alternative(NaryOperator): assert len(self.parsers) >= 1 assert all(not isinstance(p, Optional) for p in self.parsers) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: for parser in self.parsers: node, text_ = parser(text) if node: @@ -847,7 +868,7 @@ class FlowOperator(UnaryOperator): class Required(FlowOperator): # Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ? - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: node, text_ = self.parser(text) if not node: m = re.search(r'\s(\S)', text) @@ -864,23 +885,23 @@ class Lookahead(FlowOperator): def __init__(self, parser, name=''): super(Lookahead, self).__init__(parser, name) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: node, text_ = self.parser(text) if self.sign(node is not None): return Node(self, ''), text else: return None, text - def sign(self, bool_value): + def sign(self, bool_value) -> bool: return bool_value class NegativeLookahead(Lookahead): - def sign(self, bool_value): + def sign(self, bool_value) -> bool: return not bool_value -def iter_right_branch(node): +def iter_right_branch(node) -> Iterator[Node]: """Iterates over the right branch of `node` starting with node itself. Iteration is stopped if either there are no child nodes any more or if the parser of a node is a Lookahead parser. (Reason is: Since @@ -897,7 +918,7 @@ class Lookbehind(FlowOperator): super(Lookbehind, self).__init__(parser, name) print("WARNING: Lookbehind Operator is experimental!") - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: if isinstance(self.grammar.last_node, Lookahead): return Node(self, '').add_error('Lookbehind right after Lookahead ' 'does not make sense!'), text @@ -906,7 +927,7 @@ class Lookbehind(FlowOperator): else: return None, text - def sign(self, bool_value): + def sign(self, bool_value) -> bool: return bool_value def condition(self): @@ -921,7 +942,7 @@ class Lookbehind(FlowOperator): class NegativeLookbehind(Lookbehind): - def sign(self, bool_value): + def sign(self, bool_value) -> bool: return not bool_value @@ -936,7 +957,7 @@ class Capture(UnaryOperator): def __init__(self, parser, name=''): super(Capture, self).__init__(parser, name) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: node, text = self.parser(text) if node: stack = self.grammar.variables.setdefault(self.name, []) @@ -970,7 +991,7 @@ class Retrieve(Parser): def __deepcopy__(self, memo): return self.__class__(self.symbol, self.retrieve_filter, self.name) - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: try: stack = self.grammar.variables[self.symbol.name] value = self.retrieve_filter(stack) @@ -983,12 +1004,12 @@ class Retrieve(Parser): else: return None, text - def pick_value(self, stack): + def pick_value(self, stack: List[str]) -> str: return stack[-1] class Pop(Retrieve): - def pick_value(self, stack): + def pick_value(self, stack: List[str]) -> str: return stack.pop() @@ -1012,7 +1033,7 @@ class Forward(Parser): duplicate.set(parser) return duplicate - def __call__(self, text): + def __call__(self, text: str) -> Tuple[Node, str]: return self.parser(text) def __str__(self): @@ -1024,12 +1045,12 @@ class Forward(Parser): self.cycle_reached = False return s - def set(self, parser): - assert isinstance(parser, Parser) + def set(self, parser: Parser): + # assert isinstance(parser, Parser) self.name = parser.name # redundant, see Grammar-constructor self.parser = parser - def apply(self, func): + def apply(self, func: Parser.ApplyFunc): if super(Forward, self).apply(func): assert not self.visited self.parser.apply(func) @@ -1042,7 +1063,7 @@ class Forward(Parser): ####################################################################### -class CompilerBase: +class Compiler: def __init__(self, grammar_name="", grammar_source=""): self.dirty_flag = False self.set_grammar_name(grammar_name, grammar_source) @@ -1050,7 +1071,7 @@ class CompilerBase: def _reset(self): pass - def __call__(self, node): + def __call__(self, node: Node) -> Any: """Compiles the abstract syntax tree with the root ``node``. It's called `compile_ast`` to avoid confusion with the @@ -1071,14 +1092,14 @@ class CompilerBase: self.grammar_source = load_if_file(grammar_source) @staticmethod - def derive_method_name(node_name): + def derive_method_name(node_name: str) -> str: """Returns the method name for ``node_name``, e.g. - >>> CompilerBase.method_name('expression') + >>> Compiler.method_name('expression') 'on_expression' """ return 'on_' + node_name - def _compile(self, node): + def _compile(self, node: Node) -> Any: """Calls the compilation method for the given node and returns the result of the compilation. @@ -1100,11 +1121,15 @@ class CompilerBase: compiler = self.__getattribute__(self.derive_method_name(elem)) result = compiler(node) for child in node.children: - node.error_flag |= child.error_flag + node.error_flag = node.error_flag or child.error_flag return result -def compile_source(source, scanner, parser, transformer, compiler): +def compile_source(source: str, + scanner: ScannerFunc, # str -> str + parser: Grammar, # str -> Node (concrete syntax tree (CST)) + transformer: TransformerFunc, # Node -> Node (abstract syntax tree (AST)) + compiler: Compiler): # Node (AST) -> Any """Compiles a source in four stages: 1. Scanning (if needed) 2. Parsing @@ -1140,14 +1165,7 @@ def compile_source(source, scanner, parser, transformer, compiler): syntax_tree = parser(source_text) if is_logging(): syntax_tree.log(log_file_name + '.cst') - try: - parser.log_parsing_history(log_file_name) - except AttributeError: - # this is a hack in case a parser function or method was - # passed instead of a grammar class instance - for nd in syntax_tree.find(lambda nd: bool(nd.parser)): - nd.parser.grammar.log_parsing_history(log_file_name) - break + parser.log_parsing_history(log_file_name) assert syntax_tree.error_flag or str(syntax_tree) == source_text, str(syntax_tree) # only compile if there were no syntax errors, for otherwise it is diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index 60729c6..860753e 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -25,9 +25,9 @@ try: import regex as re except ImportError: import re -from typing import NamedTuple +from typing import Any, Callable, cast, Iterator, NamedTuple, Union, Tuple, List -from .toolkit import log_dir, expand_table, line_col, smart_list +from DHParser.toolkit import log_dir, expand_table, line_col, smart_list __all__ = ['WHITESPACE_PTYPE', @@ -35,6 +35,7 @@ __all__ = ['WHITESPACE_PTYPE', 'ZOMBIE_PARSER', 'Error', 'Node', + 'TransformerFunc', 'key_parser_name', 'key_tag_name', 'traverse', @@ -117,6 +118,11 @@ ZOMBIE_PARSER = ZombieParser() Error = NamedTuple('Error', [('pos', int), ('msg', str)]) +ChildrenType = Tuple['Node', ...] +ResultType = Union[ChildrenType, str] +SloppyResultT = Union[ChildrenType, 'Node', str, None] + + class Node: """ Represents a node in the concrete or abstract syntax tree. @@ -157,19 +163,21 @@ class Node: parsing stage and never during or after the AST-transformation. """ - - def __init__(self, parser, result): + def __init__(self, parser, result: SloppyResultT) -> None: """Initializes the ``Node``-object with the ``Parser``-Instance that generated the node and the parser's result. """ + self._result = '' # type: ResultType + self._errors = [] # type: List[str] + self._children = () # type: ChildrenType + self._len = len(self.result) if not self.children else \ + sum(child._len for child in self.children) # type: int + # self.pos: int = 0 # continuous updating of pos values + self._pos = -1 # type: int self.result = result self.parser = parser or ZOMBIE_PARSER - self._errors = [] - self.error_flag = any(r.error_flag for r in self.result) if self.children else False - self._len = len(self.result) if not self.children else \ - sum(child._len for child in self.children) - # self.pos = 0 # continuous updating of pos values - self._pos = -1 + self.error_flag = any(r.error_flag for r in self.children) \ + if self.children else False # type: bool def __str__(self): if self.children: @@ -190,39 +198,41 @@ class Node: return other @property - def tag_name(self): + def tag_name(self) -> str: return self.parser.name or self.parser.ptype # ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.ptype @property - def result(self): + def result(self) -> ResultType: return self._result @result.setter - def result(self, result): - assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result)) - or isinstance(result, Node) - or isinstance(result, str)), str(result) + def result(self, result: SloppyResultT): + # # made obsolete by static type checking with mypy is done + # assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result)) + # or isinstance(result, Node) + # or isinstance(result, str)), str(result) self._result = (result,) if isinstance(result, Node) else result or '' - self._children = self._result if isinstance(self._result, tuple) else () + self._children = cast(ChildrenType, self._result) \ + if isinstance(self._result, tuple) else cast(ChildrenType, ()) @property - def children(self): + def children(self) -> ChildrenType: return self._children @property - def len(self): + def len(self) -> int: # DEBUGGING: print(self.tag_name, str(self.pos), str(self._len), str(self)[:10].replace('\n','.')) return self._len @property - def pos(self): + def pos(self) -> int: assert self._pos >= 0, "position value not initialized!" return self._pos @pos.setter - def pos(self, pos): - assert isinstance(pos, int) + def pos(self, pos: int): + # assert isinstance(pos, int) self._pos = pos offset = 0 for child in self.children: @@ -230,10 +240,10 @@ class Node: offset += child.len @property - def errors(self): + def errors(self) -> List[Error]: return [Error(self.pos, err) for err in self._errors] - def _tree_repr(self, tab, openF, closeF, dataF=lambda s: s): + def _tree_repr(self, tab, openF, closeF, dataF=lambda s: s) -> str: """ Generates a tree representation of this node and its children in string from. @@ -266,19 +276,19 @@ class Node: if self.children: content = [] - for child in self.result: + for child in self.children: subtree = child._tree_repr(tab, openF, closeF, dataF).split('\n') content.append('\n'.join((tab + s) for s in subtree)) return head + '\n'.join(content) + tail - if head[0] == "<" and self.result.find('\n') < 0: + res = cast(str, self.result) # safe, because if there are no children, result is a string + if head[0] == "<" and res.find('\n') < 0: # for XML: place tags for leaf-nodes on one line if possible return head[:-1] + self.result + tail[1:] else: - return head + '\n'.join([tab + dataF(s) - for s in self.result.split('\n')]) + tail + return head + '\n'.join([tab + dataF(s) for s in res.split('\n')]) + tail - def as_sexpr(self, src=None): + def as_sexpr(self, src=None) -> str: """ Returns content as S-expression, i.e. in lisp-like form. @@ -290,7 +300,7 @@ class Node: of leaf nodes shall be applied for better readability. """ - def opening(node): + def opening(node) -> str: s = '(' + node.tag_name # s += " '(pos %i)" % node.pos if src: @@ -307,7 +317,7 @@ class Node: return self._tree_repr(' ', opening, lambda node: ')', pretty) # pretty if prettyprint else lambda s: s) - def as_xml(self, src=None): + def as_xml(self, src=None) -> str: """ Returns content as XML-tree. @@ -317,7 +327,7 @@ class Node: column. """ - def opening(node): + def opening(node) -> str: s = '<' + node.tag_name # s += ' pos="%i"' % node.pos if src: @@ -333,7 +343,7 @@ class Node: return self._tree_repr(' ', opening, closing) - def add_error(self, error_str): + def add_error(self, error_str) -> 'Node': self._errors.append(error_str) self.error_flag = True return self @@ -347,7 +357,7 @@ class Node: child.propagate_error_flags() self.error_flag |= child.error_flag - def collect_errors(self, clear_errors=False): + def collect_errors(self, clear_errors=False) -> List[Error]: """ Returns all errors of this node or any child node in the form of a set of tuples (position, error_message), where position @@ -358,7 +368,7 @@ class Node: self._errors = [] self.error_flag = False if self.children: - for child in self.result: + for child in self.children: errors.extend(child.collect_errors(clear_errors)) return errors @@ -367,7 +377,7 @@ class Node: with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f: f.write(self.as_sexpr()) - def find(self, match_function): + def find(self, match_function) -> Iterator['Node']: """Finds nodes in the tree that match a specific criterion. ``find`` is a generator that yields all nodes for which the @@ -436,15 +446,18 @@ class Node: ######################################################################## +TransformerFunc = Union[Callable[[Node], Any], partial] + + WHITESPACE_PTYPE = ':Whitespace' TOKEN_PTYPE = ':Token' -def key_parser_name(node): +def key_parser_name(node) -> str: return node.parser.name -def key_tag_name(node): +def key_tag_name(node) -> str: return node.tag_name diff --git a/DHParser/toolkit.py b/DHParser/toolkit.py index 169c1f4..1c8a838 100644 --- a/DHParser/toolkit.py +++ b/DHParser/toolkit.py @@ -38,6 +38,7 @@ try: import regex as re except ImportError: import re +from typing import List, Tuple __all__ = ['logging', @@ -123,7 +124,7 @@ def is_logging(): return False -def line_col(text, pos): +def line_col(text: str, pos: int) -> Tuple[int, int]: """Returns the position within a text as (line, column)-tuple. """ assert pos < len(text), str(pos) + " >= " + str(len(text)) @@ -132,7 +133,7 @@ def line_col(text, pos): return line, column -def error_messages(source_text, errors): +def error_messages(source_text, errors) -> List[str]: """Returns the sequence or iterator of error objects as an intertor of error messages with line and column numbers at the beginning. @@ -149,7 +150,7 @@ def error_messages(source_text, errors): for err in sorted(list(errors))] -def compact_sexpr(s): +def compact_sexpr(s) -> str: """Returns S-expression ``s`` as a one liner without unnecessary whitespace. @@ -160,7 +161,7 @@ def compact_sexpr(s): return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip() -def escape_re(s): +def escape_re(s) -> str: """Returns `s` with all regular expression special characters escaped. """ assert isinstance(s, str) @@ -170,13 +171,13 @@ def escape_re(s): return s -def is_filename(s): +def is_filename(s) -> bool: """Tries to guess whether string ``s`` is a file name.""" return s.find('\n') < 0 and s[:1] != " " and s[-1:] != " " \ and s.find('*') < 0 and s.find('?') < 0 -def logfile_basename(filename_or_text, function_or_class_or_instance): +def logfile_basename(filename_or_text, function_or_class_or_instance) -> str: """Generates a reasonable logfile-name (without extension) based on the given information. """ @@ -191,10 +192,11 @@ def logfile_basename(filename_or_text, function_or_class_or_instance): return s[:i] + '_out' if i >= 0 else s -def load_if_file(text_or_file): - """Reads and returns content of a file if parameter `text_or_file` is a - file name (i.e. a single line string), otherwise (i.e. if `text_or_file` is - a multiline string) `text_or_file` is returned. +def load_if_file(text_or_file) -> str: + """Reads and returns content of a text-file if parameter + `text_or_file` is a file name (i.e. a single line string), + otherwise (i.e. if `text_or_file` is a multiline string) + `text_or_file` is returned. """ if is_filename(text_or_file): try: @@ -211,7 +213,7 @@ def load_if_file(text_or_file): return text_or_file -def is_python_code(text_or_file): +def is_python_code(text_or_file) -> bool: """Checks whether 'text_or_file' is python code or the name of a file that contains python code. """ @@ -295,7 +297,7 @@ def expand_table(compact_table): return expanded_table -def sane_parser_name(name): +def sane_parser_name(name) -> bool: """Checks whether given name is an acceptable parser name. Parser names must not be preceeded or succeeded by a double underscore '__'! """ diff --git a/OLDSTUFF/ParserCombinators_obsolete.py b/OLDSTUFF/ParserCombinators_obsolete.py index 34b416b..ea545f3 100644 --- a/OLDSTUFF/ParserCombinators_obsolete.py +++ b/OLDSTUFF/ParserCombinators_obsolete.py @@ -1390,7 +1390,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): assigns AST transformation functions to parser names (see function traverse) compiler (object): An instance of a class derived from - ``CompilerBase`` with a suitable method for every parser + ``Compiler`` with a suitable method for every parser name or class. Returns (tuple): @@ -1422,7 +1422,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler): return result, messages, syntax_tree -COMPILER_SYMBOLS = {'CompilerBase', 'Node', 're'} +COMPILER_SYMBOLS = {'Compiler', 'Node', 're'} ######################################################################## @@ -1616,7 +1616,7 @@ class EBNFCompiler(CompilerBase): if not self.definition_names: raise EBNFCompilerError('Compiler has not been run before calling ' '"gen_Compiler_Skeleton()"!') - compiler = ['class ' + self.grammar_name + 'Compiler(CompilerBase):', + compiler = ['class ' + self.grammar_name + 'Compiler(Compiler):', ' """Compiler for the abstract-syntax-tree of a ' + self.grammar_name + ' source file.', ' """', '', @@ -2086,7 +2086,7 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): intro, syms, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source) except (PermissionError, FileNotFoundError, IOError) as error: intro, outro = '', '' - syms = import_block("PyDSL", PARSER_SYMBOLS | AST_SYMBOLS | {'CompilerBase'}) + syms = import_block("PyDSL", PARSER_SYMBOLS | AST_SYMBOLS | {'Compiler'}) scanner = compiler.gen_scanner_skeleton() ast = compiler.gen_AST_skeleton() compiler = compiler.gen_compiler_skeleton() diff --git a/examples/MLW/OLDSTUFF/MLW_compiler.py b/examples/MLW/OLDSTUFF/MLW_compiler.py index 07e5b4a..15f06af 100644 --- a/examples/MLW/OLDSTUFF/MLW_compiler.py +++ b/examples/MLW/OLDSTUFF/MLW_compiler.py @@ -14,7 +14,7 @@ try: import regex as re except ImportError: import re -from DHParser.parsers import Grammar, CompilerBase, Alternative, Required, Token, \ +from DHParser.parsers import Grammar, Compiler, Alternative, Required, Token, \ Optional, OneOrMore, Sequence, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_operation, \ remove_expendables, remove_tokens, flatten, \ @@ -290,7 +290,7 @@ MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table) # ####################################################################### -class MLWCompiler(CompilerBase): +class MLWCompiler(Compiler): """Compiler for the abstract-syntax-tree of a MLW source file. """ diff --git a/examples/Tutorial/LyrikCompiler.py b/examples/Tutorial/LyrikCompiler.py index 503ab24..79dd938 100644 --- a/examples/Tutorial/LyrikCompiler.py +++ b/examples/Tutorial/LyrikCompiler.py @@ -7,19 +7,23 @@ ####################################################################### +from functools import partial import os import sys -from functools import partial - try: import regex as re except ImportError: import re -from DHParser.toolkit import logging, is_filename -from DHParser.parsers import Grammar, CompilerBase, Required, Token, \ - Optional, OneOrMore, ZeroOrMore, Sequence, RE, NegativeLookahead, \ - mixin_comment, compile_source -from DHParser.syntaxtree import traverse, no_operation +from DHParser.toolkit import logging, is_filename, load_if_file +from DHParser.parsers import Grammar, Compiler, nil_scanner, \ + Lookbehind, Lookahead, Alternative, Pop, Required, Token, \ + Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \ + ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \ + nop_filter, counterpart_filter, accumulating_filter +from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \ + remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \ + no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \ + collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE ####################################################################### @@ -31,7 +35,6 @@ from DHParser.syntaxtree import traverse, no_operation def LyrikScanner(text): return text - def get_scanner(): return LyrikScanner @@ -76,7 +79,7 @@ class LyrikGrammar(Grammar): JAHRESZAHL = /\d\d\d\d/~ ENDE = !/./ """ - source_hash__ = "7a99fa77a7d2b81976293d54696eb4f3" + source_hash__ = "3e9ec28cf58667fc259569326f76cf90" parser_initialization__ = "upon instatiation" COMMENT__ = r'' WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'') @@ -133,20 +136,19 @@ Lyrik_AST_transformation_table = { "untertitel": no_operation, "ort": no_operation, "jahr": no_operation, + "wortfolge": no_operation, + "namenfolge": no_operation, + "verknüpfung": no_operation, + "ziel": no_operation, "serie": no_operation, "titel": no_operation, "zeile": no_operation, "text": no_operation, "strophe": no_operation, "vers": no_operation, - "wortfolge": no_operation, - "namenfolge": no_operation, - "verknüpfung": no_operation, - "ziel": no_operation, "WORT": no_operation, "NAME": no_operation, "ZEICHENFOLGE": no_operation, - "LEER": no_operation, "NZ": no_operation, "LEERZEILE": no_operation, "JAHRESZAHL": no_operation, @@ -167,7 +169,7 @@ def get_transformer(): # ####################################################################### -class LyrikCompiler(CompilerBase): +class LyrikCompiler(Compiler): """Compiler for the abstract-syntax-tree of a Lyrik source file. """ @@ -175,79 +177,76 @@ class LyrikCompiler(CompilerBase): super(LyrikCompiler, self).__init__(grammar_name, grammar_source) assert re.match('\w+\Z', grammar_name) - def on_gedicht(self, node): + def on_gedicht(self, node: Node) -> str: return node - def on_bibliographisches(self, node): + def on_bibliographisches(self, node: Node) -> str: pass - def on_autor(self, node): + def on_autor(self, node: Node) -> str: pass - def on_werk(self, node): + def on_werk(self, node: Node) -> str: pass - def on_untertitel(self, node): + def on_untertitel(self, node: Node) -> str: pass - def on_ort(self, node): + def on_ort(self, node: Node) -> str: pass - def on_jahr(self, node): + def on_jahr(self, node: Node) -> str: pass - def on_serie(self, node): + def on_wortfolge(self, node: Node) -> str: pass - def on_titel(self, node): + def on_namenfolge(self, node: Node) -> str: pass - def on_zeile(self, node): + def on_verknüpfung(self, node: Node) -> str: pass - def on_text(self, node): + def on_ziel(self, node: Node) -> str: pass - def on_strophe(self, node): + def on_serie(self, node: Node) -> str: pass - def on_vers(self, node): + def on_titel(self, node: Node) -> str: pass - def on_wortfolge(self, node): + def on_zeile(self, node: Node) -> str: pass - def on_namenfolge(self, node): + def on_text(self, node: Node) -> str: pass - def on_verknüpfung(self, node): + def on_strophe(self, node: Node) -> str: pass - def on_ziel(self, node): + def on_vers(self, node: Node) -> str: pass - def on_WORT(self, node): + def on_WORT(self, node: Node) -> str: pass - def on_NAME(self, node): + def on_NAME(self, node: Node) -> str: pass - def on_ZEICHENFOLGE(self, node): + def on_ZEICHENFOLGE(self, node: Node) -> str: pass - def on_LEER(self, node): + def on_NZ(self, node: Node) -> str: pass - def on_NZ(self, node): + def on_LEERZEILE(self, node: Node) -> str: pass - def on_LEERZEILE(self, node): + def on_JAHRESZAHL(self, node: Node) -> str: pass - def on_JAHRESZAHL(self, node): - pass - - def on_ENDE(self, node): + def on_ENDE(self, node: Node) -> str: pass @@ -260,7 +259,8 @@ def get_compiler(grammar_name="Lyrik", grammar_source=""): except NameError: thread_local_Lyrik_compiler_singleton = \ LyrikCompiler(grammar_name, grammar_source) - return thread_local_Lyrik_compiler_singleton + return thread_local_Lyrik_compiler_singleton + ####################################################################### # @@ -276,16 +276,14 @@ def compile_src(source): compiler = get_compiler() cname = compiler.__class__.__name__ log_file_name = os.path.basename(os.path.splitext(source)[0]) \ - if is_filename(source) < 0 else cname[:cname.find('.')] + '_out' - result = compile_source(source, get_scanner(), + if is_filename(source) < 0 else cname[:cname.find('.')] + '_out' + result = compile_source(source, get_scanner(), get_grammar(), get_transformer(), compiler) return result if __name__ == "__main__": - if len(sys.argv) == 1: - sys.argv.append("Lyrisches_Intermezzo_IV.txt") if len(sys.argv) > 1: result, errors, ast = compile_src(sys.argv[1]) if errors: diff --git a/test/test_dsl.py b/test/test_dsl.py index e31774e..6f76de1 100644 --- a/test/test_dsl.py +++ b/test/test_dsl.py @@ -24,7 +24,7 @@ import os import sys sys.path.extend(['../', './']) -from DHParser.parsers import Grammar, CompilerBase +from DHParser.parsers import Grammar, Compiler from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, parser_factory, \ load_compiler_suite @@ -90,7 +90,7 @@ class TestCompilerGeneration: assert callable(scanner) assert isinstance(parser, Grammar) assert callable(transformer) - assert isinstance(compiler, CompilerBase) + assert isinstance(compiler, Compiler) def test_compiling_functions(self): # test if cutting and reassembling of compiler suite works: -- GitLab