diff --git a/DHParser/dsl.py b/DHParser/dsl.py index 611c833cb562923ea550584d3352d071a88613e8..f7376eeeb49fe8d47eaf6ab1af90e98e068e5fd9 100644 --- a/DHParser/dsl.py +++ b/DHParser/dsl.py @@ -26,16 +26,16 @@ try: except ImportError: import re try: - from typing import Any, cast, Tuple, Union, Iterable + from typing import Any, cast, Tuple, Union, Iterator, Iterable except ImportError: - from .typing34 import Any, cast, Tuple, Union, Iterable + from .typing34 import Any, cast, Tuple, Union, Iterator, Iterable from DHParser.ebnf import EBNFCompiler, grammar_changed, \ get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \ PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object from DHParser.parser import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc -from DHParser.syntaxtree import Error, is_error, has_errors, Node, TransformationFunc +from DHParser.syntaxtree import Error, is_error, has_errors, only_errors, Node, TransformationFunc __all__ = ('GrammarError', 'CompilationError', @@ -125,7 +125,8 @@ class DSLException(Exception): Base class for DSL-exceptions. """ def __init__(self, errors): - assert isinstance(errors, list) or isinstance(errors, tuple) + assert isinstance(errors, Iterator) or isinstance(errors, list) \ + or isinstance(errors, tuple) self.errors = errors def __str__(self): @@ -180,7 +181,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]: parser_py, messages, AST = compile_source(grammar_src, None, get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler()) if has_errors(messages): - raise GrammarError(messages, grammar_src) + raise GrammarError(only_errors(messages), grammar_src) parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w+Grammar$')() else: # assume that dsl_grammar is a ParserHQ-object or Grammar class @@ -214,7 +215,7 @@ def compileDSL(text_or_file: str, ast_transformation, compiler) if has_errors(messages): src = load_if_file(text_or_file) - raise CompilationError(messages, src, grammar_src, AST, result) + raise CompilationError(only_errors(messages), src, grammar_src, AST, result) return result @@ -317,7 +318,7 @@ def load_compiler_suite(compiler_suite: str) -> \ compile_py, messages, AST = compile_source(source, None, get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler()) if has_errors(messages): - raise GrammarError(messages, source) + raise GrammarError(only_errors(messages), source) preprocessor = get_ebnf_preprocessor parser = get_ebnf_grammar ast = get_ebnf_transformer @@ -533,9 +534,9 @@ def recompile_grammar(ebnf_filename, force=False) -> bool: messages = compile_on_disk(ebnf_filename) if messages: # print("Errors while compiling: " + ebnf_filename + '!') - with open(error_file_name, 'w') as f: + with open(error_file_name, 'w', encoding="UTF-8") as f: for e in messages: - f.write(e) + f.write(str(e)) f.write('\n') if has_errors(messages): return False diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index 3742c683c5d98ee6e9efc899d8fa4f67997455ae..10a40d18e1527e9c8eaacd1a93b9565dc08d1eda 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -33,7 +33,7 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \ Alternative, Series, Option, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ PreprocessorFunc -from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, Node, TransformationFunc +from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, Error, Node, TransformationFunc from DHParser.transform import TransformationDict, traverse, remove_brackets, \ reduce_single_child, replace_by_single_child, remove_expendables, \ remove_tokens, flatten, forbid, assert_content, remove_infix_operator @@ -397,8 +397,7 @@ class EBNFCompiler(Compiler): 'literalws': ['right'], 'tokens': set(), # alt. 'preprocessor_tokens' 'filter': dict(), # alt. 'filter' - 'ignorecase': False, - 'testing': False} + 'ignorecase': False} @property def result(self) -> str: @@ -544,22 +543,18 @@ class EBNFCompiler(Compiler): # check for unconnected rules - if not self.directives['testing']: - defined_symbols.difference_update(self.RESERVED_SYMBOLS) - - def remove_connections(symbol): - if symbol in defined_symbols: - defined_symbols.remove(symbol) - for related in self.rules[symbol][1:]: - remove_connections(str(related)) - - remove_connections(self.root_symbol) - for leftover in defined_symbols: - self.rules[leftover][0].add_error(('Rule "%s" is not connected to parser ' - 'root "%s" !') % (leftover, self.root_symbol) - + ' (Use directive "@testing=True" ' - 'to supress this error message.)') - # root_node.error_flag = True + defined_symbols.difference_update(self.RESERVED_SYMBOLS) + + def remove_connections(symbol): + if symbol in defined_symbols: + defined_symbols.remove(symbol) + for related in self.rules[symbol][1:]: + remove_connections(str(related)) + + remove_connections(self.root_symbol) + for leftover in defined_symbols: + self.rules[leftover][0].add_error(('Rule "%s" is not connected to ' + 'parser root "%s" !') % (leftover, self.root_symbol), Error.WARNING) # set root_symbol parser and assemble python grammar definition @@ -679,9 +674,9 @@ class EBNFCompiler(Compiler): if value: self.re_flags.add('i') - elif key == 'testing': - value = str(node.children[1]) - self.directives['testing'] = value.lower() not in {"off", "false", "no"} + # elif key == 'testing': + # value = str(node.children[1]) + # self.directives['testing'] = value.lower() not in {"off", "false", "no"} elif key == 'literalws': value = {item.lower() for item in self.compile(node.children[1])} diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index 08eb0b6c7982609740b00bd6571fe20668ddf836..7a9c6d8dff7bdded475ad6517fce902cabdeca5f 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -27,10 +27,10 @@ except ImportError: import re try: from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \ - Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple + Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple, Hashable except ImportError: from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \ - Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple + Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple, Hashable from DHParser.toolkit import is_logging, log_dir, StringView, linebreaks, line_col, identity @@ -133,7 +133,7 @@ class Error: ERROR = 1000 HIGHEST = ERROR - def __init__(self, message: str, level: int=ERROR, code: str=''): + def __init__(self, message: str, level: int=ERROR, code: Hashable=0): self.message = message assert level >= 0 self.level = level or Error.ERROR @@ -143,19 +143,14 @@ class Error: self.column = -1 def __str__(self): - return ("line: %3i, column: %2i" % (self.line, self.column) - + ", %s: %s" % (self.level_str, self.message)) - - @staticmethod - def from_template(template: str, level: int=ERROR, content: Union[tuple, dict]=()): - if isinstance(content, tuple): - return Error((template % content) if content else template, level, template) - else: - return Error(template.format(**content), level, template) + prefix = '' + if self.line > 0: + prefix = "line: %3i, column: %2i, " % (self.line, self.column) + return prefix + "%s: %s" % (self.level_str, self.message) @property def level_str(self): - return "warning" if is_warning(self.level) else "error" + return "Warning" if is_warning(self.level) else "Error" def is_warning(level: int) -> bool: @@ -177,6 +172,14 @@ def has_errors(messages: Iterable[Error], level: int=Error.ERROR) -> bool: return False +def only_errors(messages: Iterable[Error], level: int=Error.ERROR) -> Iterator[Error]: + """ + Returns an Iterator that yields only those messages that have + at least the given error level. + """ + return (err for err in messages if err.level >= level) + + ChildrenType = Tuple['Node', ...] StrictResultType = Union[ChildrenType, StringView, str] @@ -344,22 +347,8 @@ class Node(collections.abc.Sized): return self._errors.copy() - # def add_error(self, error_str: str) -> 'Node': - # assert isinstance(error_str, str) - # self._errors.append(error_str) - # self.error_flag = True - # return self - - - def add_error(self: 'Node', - template: Union[str, Error], - level: int=0, - content: Union[tuple, dict]=()) -> 'Node': - if isinstance(template, Error): - assert not (bool(level) or bool(content)) - self._errors.append(template) - else: - self._errors.append(Error.from_template(template, level, content)) + def add_error(self, message: str, level: int=Error.ERROR, code: Hashable=0) -> 'Node': + self._errors.append(Error(message, level, code)) self.error_flag = max(self.error_flag, self._errors[-1].level) return self @@ -540,47 +529,6 @@ class Node(collections.abc.Sized): yield nd - # def range(self, match_first, match_last): - # """Iterates over the range of nodes, starting from the first - # node for which ``match_first`` becomes True until the first node - # after this one for which ``match_last`` becomes true or until - # the end if it never does. - # - # Args: - # match_first (function): A function that takes as Node - # object as argument and returns True or False - # match_last (function): A function that takes as Node - # object as argument and returns True or False - # Yields: - # Node: all nodes of the tree for which - # ``match_function(node)`` returns True - # """ - - - # def navigate(self, path): - # """Yields the results of all descendant elements matched by - # ``path``, e.g. - # 'd/s' yields 'l' from (d (s l)(e (r x1) (r x2)) - # 'e/r' yields 'x1', then 'x2' - # 'e' yields (r x1)(r x2) - # - # Args: - # path (str): The path of the object, e.g. 'a/b/c'. The - # components of ``path`` can be regular expressions - # - # Returns: - # The object at the path, either a string or a Node or - # ``None``, if the path did not match. - # """ - # def nav(node, pl): - # if pl: - # return itertools.chain(nav(child, pl[1:]) for child in node.children - # if re.match(pl[0], child.tag_name)) - # else: - # return self.result, - # return nav(path.split('/')) - - def tree_size(self) -> int: """Recursively counts the number of nodes in the tree including the root node.""" return sum(child.tree_size() for child in self.children) + 1 diff --git a/DHParser/toolkit.py b/DHParser/toolkit.py index 76971faa8e41b100bd8f096e7912f81e3b0a38c8..f21be32560d8c845cb2e8b8638e0acc369ddb0b5 100644 --- a/DHParser/toolkit.py +++ b/DHParser/toolkit.py @@ -57,9 +57,6 @@ __all__ = ('logging', 'sv_match', 'sv_index', 'sv_search', - # 'supress_warnings', - # 'warnings', - # 'repr_call', 'linebreaks', 'line_col', 'error_messages', @@ -159,7 +156,7 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}): class StringView(collections.abc.Sized): """"A rudimentary StringView class, just enough for the use cases - in parswer.py. + in parser.py. Slicing Python-strings always yields copies of a segment of the original string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html @@ -275,27 +272,6 @@ def sv_search(regex, sv: StringView): EMPTY_STRING_VIEW = StringView('') -# def repr_call(f, parameter_list) -> str: -# """Turns a list of items into a string resembling the parameter -# list of a function call by omitting default values at the end: -# >>> def f(a, b=1): print(a, b) -# >>> repr_call(f, (5,1)) -# 'f(5)' -# >>> repr_call(f, (5,2)) -# 'f(5, 2)' -# """ -# i = 0 -# defaults = f.__defaults__ if f.__defaults__ is not None else [] -# for parameter, default in zip(reversed(parameter_list), reversed(defaults)): -# if parameter != default: -# break -# i -= 1 -# if i < 0: -# parameter_list = parameter_list[:i] -# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__ -# return "%s(%s)" % (name, ", ".merge_children(repr(item) for item in parameter_list)) - - def linebreaks(text: Union[StringView, str]): lb = [-1] i = text.find('\n', 0) @@ -344,7 +320,7 @@ def error_messages(source_text, errors) -> List[str]: string starts with "line: [Line-No], column: [Column-No] """ for err in errors: - if err.pos >= 0 and err.line < 0: + if err.pos >= 0 and err.line <= 0: err.line, err.column = line_col(source_text, err.pos) return [str(err) for err in sorted(errors, key=lambda err: err.pos)] diff --git a/examples/LaTeX/LaTeX.ebnf b/examples/LaTeX/LaTeX.ebnf index 4de78e6dbf39b53c2f7219052e1a7b83ecc14f6e..9944207b7750c920345a3152cceb01aa57110cba 100644 --- a/examples/LaTeX/LaTeX.ebnf +++ b/examples/LaTeX/LaTeX.ebnf @@ -1,10 +1,15 @@ # LaTeX-Grammar for DHParser -@ testing = True @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed @ comment = /%.*/ +######################################################################## +# +# outer document structure +# +######################################################################## + latexdoc = preamble document preamble = { [WSPC] command }+ diff --git a/examples/LaTeX/LaTeXCompiler.py b/examples/LaTeX/LaTeXCompiler.py index cf14b2b3cc743071bc442f6a8f93323e399c6264..6fc29b20c8e8780e26241105cc0aef1dfcd343e1 100644 --- a/examples/LaTeX/LaTeXCompiler.py +++ b/examples/LaTeX/LaTeXCompiler.py @@ -49,11 +49,16 @@ class LaTeXGrammar(Grammar): # LaTeX-Grammar for DHParser - @ testing = True @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed @ comment = /%.*/ + ######################################################################## + # + # outer document structure + # + ######################################################################## + latexdoc = preamble document preamble = { [WSPC] command }+ @@ -223,7 +228,7 @@ class LaTeXGrammar(Grammar): paragraph = Forward() tabular_config = Forward() text_element = Forward() - source_hash__ = "939c094e994677d2ab894169c013cf58" + source_hash__ = "37585004123d6b80ecf8f67217b43479" parser_initialization__ = "upon instantiation" COMMENT__ = r'%.*' WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?' diff --git a/examples/MLW/MLW.ebnf b/examples/MLW/MLW.ebnf index fd64593e0bcaa2355154d31cf5cdbc787d8f024d..a0f40d2f268d09252ed28df7f26e7ae5621bc354 100644 --- a/examples/MLW/MLW.ebnf +++ b/examples/MLW/MLW.ebnf @@ -1,6 +1,5 @@ # EBNF-Syntax für MLW-Artikel -@ testing = True @ comment = /#.*/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende # ohne das Zeilenende zu beinhalten diff --git a/examples/MLW/recompile_grammar.py b/examples/MLW/recompile_grammar.py deleted file mode 100644 index 87e3bf6c6d4252b8a6cb795e743dc14e962ba5a9..0000000000000000000000000000000000000000 --- a/examples/MLW/recompile_grammar.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python3 - -"""recompile_grammar.py - recompiles any .ebnf files in the current - directory if necessary - -Author: Eckhart Arnold - -Copyright 2017 Bavarian Academy of Sciences and Humanities - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from DHParser.dsl import recompile_grammar - -recompile_grammar('.') - -# import os -# -# from DHParser.ebnf import grammar_changed -# from DHParser.dsl import compile_on_disk -# -# -# def compile(name): -# base, ext = os.path.splitext(name) -# compiler_name = base + '_compiler.py' -# if (not os.path.exists(compiler_name) or -# grammar_changed(compiler_name, name)): -# print("recompiling parser for: " + name) -# errors = compile_on_disk(name) -# if errors: -# print("Errors while compiling: " + name + '!') -# with open(base + '_errors.txt', 'w') as f: -# for e in errors: -# f.write(e) -# f.write('\n') -# -# for entry in os.listdir(): -# if entry.lower().endswith('.ebnf') and os.path.isfile(entry): -# compile(entry) diff --git a/test/test_ebnf.py b/test/test_ebnf.py index 01f0a47bc0735f2a1f152b55b258c2de49a25f52..6a38615e0311614a6d68d85492a14ed46671197b 100644 --- a/test/test_ebnf.py +++ b/test/test_ebnf.py @@ -30,6 +30,7 @@ from multiprocessing import Pool sys.path.extend(['../', './']) from DHParser.toolkit import compile_python_object +from DHParser.syntaxtree import has_errors from DHParser.parser import compile_source, WHITESPACE_PTYPE, nil_preprocessor from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider @@ -297,13 +298,19 @@ class TestBoundaryCases: ebnf = """root = /.*/ unconnected = /.*/ """ - try: - grammar = grammar_provider(ebnf)() - assert False, "EBNF compiler should complain about unconnected rules." - except CompilationError as err: - grammar_src = err.result + result, messages, AST = compile_source(ebnf, nil_preprocessor, + get_ebnf_grammar(), + get_ebnf_transformer(), + get_ebnf_compiler()) + if messages: + assert not has_errors(messages), "Unconnected rules should result in a warning, " \ + "not an error: " + str(messages) + grammar_src = result grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src, 'get_(?:\w+_)?grammar$')() + else: + assert False, "EBNF compiler should warn about unconnected rules." + assert grammar['root'], "Grammar objects should be subscriptable by parser names!" try: unconnected = grammar['unconnected'] @@ -315,12 +322,6 @@ class TestBoundaryCases: "a non-existant parser name!" except KeyError: pass - ebnf_testing = "@testing = True\n" + ebnf - try: - grammar = grammar_provider(ebnf_testing)() - except CompilationError: - assert False, "EBNF compiler should not complain about unconnected " \ - "rules when directive @testing is set." class TestSynonymDetection: diff --git a/test/test_parser.py b/test/test_parser.py index ce9502ca725fff93a86b1eb13e262a67d60129e0..3fd97429a580246a63ae6af5d5132baa14d143ca 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -392,7 +392,7 @@ class TestPopRetrieve: class TestWhitespaceHandling: - minilang = """@testing = True + minilang = """ doc = A B A = "A" B = "B"