From 77f7890ba2b604c1311e101da16f26a46fe7d714 Mon Sep 17 00:00:00 2001 From: di68kap Date: Tue, 10 Jul 2018 13:51:45 +0200 Subject: [PATCH] =?UTF-8?q?-=20wsp=5F=5F=20statt=20whitespace=5F=5F=20als?= =?UTF-8?q?=20Schl=C3=BCsselwort?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DHParser/dsl.py | 2 +- DHParser/ebnf.py | 24 +++++++++++++--------- DHParser/parse.py | 21 ++++++++++--------- examples/Arithmetic/ArithmeticCompiler.py | 2 +- examples/BibTeX/BibTeXCompiler.py | 2 +- examples/EBNF/EBNFCompiler.py | 2 +- examples/LaTeX/LaTeXCompiler.py | 2 +- examples/Tutorial/LyrikCompiler_example.py | 2 +- examples/XML/XMLCompiler.py | 2 +- experimental/new2/new2Compiler.py | 2 +- experimental/ws/wsCompiler.py | 2 +- test/test_ebnf.py | 6 +++--- 12 files changed, 37 insertions(+), 32 deletions(-) diff --git a/DHParser/dsl.py b/DHParser/dsl.py index 253dabd..de20061 100644 --- a/DHParser/dsl.py +++ b/DHParser/dsl.py @@ -89,7 +89,7 @@ except ImportError: import re from DHParser import logging, is_filename, load_if_file, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\ - Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \\ + Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\ ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\ grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\ diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index d68225e..460ac90 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -31,7 +31,7 @@ from functools import partial from DHParser.compile import CompilerError, Compiler from DHParser.error import Error from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \ - NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, _Token + NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, _Token from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \ @@ -129,10 +129,10 @@ class EBNFGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'#.*(?:\n|$)' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' - wspR__ = WSP__ - whitespace__ = Whitespace(WSP__) + wspR__ = WSP_RE__ + whitespace__ = Whitespace(WSP_RE__) EOF = NegativeLookahead(RegExp('.')) list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__))) whitespace = Series(RegExp('~'), whitespace__) @@ -382,9 +382,9 @@ class EBNFCompiler(Compiler): regular expressions found in the current parsing process """ COMMENT_KEYWORD = "COMMENT__" - WHITESPACE_KEYWORD = "WSP__" + WHITESPACE_KEYWORD = "WSP_RE__" RAW_WS_KEYWORD = "WHITESPACE__" - WHITESPACE_PARSER_KEYWORD = "whitespace__" + WHITESPACE_PARSER_KEYWORD = "wsp__" RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD} AST_ERROR = "Badly structured syntax tree. " \ "Potentially due to erroneous AST transformation." @@ -922,12 +922,16 @@ class EBNFCompiler(Compiler): def on_literal(self, node: Node) -> str: - return '_Token(' + node.content.replace('\\', r'\\') + ')' + center = 'Token(' + node.content.replace('\\', r'\\') + ')' + left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else '' + right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else '' + if left or right: + return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')' + return center def on_plaintext(self, node: Node) -> str: - return '_Token(' + node.content.replace('\\', r'\\').replace('`', '"') \ - + ", wL='', wR='')" + return 'Token(' + node.content.replace('\\', r'\\') + ')' def on_regexp(self, node: Node) -> str: @@ -962,7 +966,7 @@ class EBNFCompiler(Compiler): def on_whitespace(self, node: Node) -> str: - return 'whitespace__' + return self.WHITESPACE_PARSER_KEYWORD def on_list_(self, node) -> Set[str]: diff --git a/DHParser/parse.py b/DHParser/parse.py index 9c5d272..d11073d 100644 --- a/DHParser/parse.py +++ b/DHParser/parse.py @@ -47,6 +47,7 @@ __all__ = ('Parser', 'UnknownParserError', 'Grammar', 'PreprocessorToken', + 'Token', 'RegExp', 'Whitespace', '_RE', @@ -426,13 +427,13 @@ class Grammar: Attributes: COMMENT__: regular expression string for matching comments - WSP__: regular expression for whitespace and comments + WSP_RE__: regular expression for whitespace and comments wspL__: regular expression string for left aligned whitespace, - which either equals WSP__ or is empty. + which either equals WSP_RE__ or is empty. wspR__: regular expression string for right aligned whitespace, - which either equals WSP__ or is empty. + which either equals WSP_RE__ or is empty. root__: The root parser of the grammar. Theoretically, all parsers of the grammar should be reachable by the root parser. However, for testing @@ -544,9 +545,9 @@ class Grammar: parser_initialization__ = "pending" # type: str # some default values COMMENT__ = r'' # type: str # r'#.*(?:\n|$)' - WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str + WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str wspL__ = '' # type: str - wspR__ = WSP__ # type: str + wspR__ = WSP_RE__ # type: str @classmethod @@ -608,19 +609,19 @@ class Grammar: # do so only arises during testing. self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__) - if self.WSP__: + if self.WSP_RE__: try: probe = self.whitespace__ # type: RegExp - assert self.whitespace__.regexp.pattern == self.WSP__ + assert self.whitespace__.regexp.pattern == self.WSP_RE__ except AttributeError: - self.whitespace__ = Whitespace(self.WSP__) # type: RegExp + self.whitespace__ = Whitespace(self.WSP_RE__) # type: RegExp self.whitespace__.grammar = self self.all_parsers__.add(self.whitespace__) # don't you forget about me... else: self.whitespace__ = cast(RegExp, ZOMBIE_PARSER) - assert not self.wspL__ or self.wspL__ == self.WSP__ - assert not self.wspR__ or self.wspR__ == self.WSP__ + assert not self.wspL__ or self.wspL__ == self.WSP_RE__ + assert not self.wspR__ or self.wspR__ == self.WSP_RE__ self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER diff --git a/examples/Arithmetic/ArithmeticCompiler.py b/examples/Arithmetic/ArithmeticCompiler.py index c2b8fe2..310c8ab 100644 --- a/examples/Arithmetic/ArithmeticCompiler.py +++ b/examples/Arithmetic/ArithmeticCompiler.py @@ -68,7 +68,7 @@ class ArithmeticGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'' WHITESPACE__ = r'[\t ]*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ test = Series(digit, constant, variable) diff --git a/examples/BibTeX/BibTeXCompiler.py b/examples/BibTeX/BibTeXCompiler.py index 2e5d092..2b278fb 100755 --- a/examples/BibTeX/BibTeXCompiler.py +++ b/examples/BibTeX/BibTeXCompiler.py @@ -110,7 +110,7 @@ class BibTeXGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'(?i)%.*(?:\n|$)' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ whitespace__ = Whitespace(WSP__) diff --git a/examples/EBNF/EBNFCompiler.py b/examples/EBNF/EBNFCompiler.py index 05f3ebb..0d773a8 100755 --- a/examples/EBNF/EBNFCompiler.py +++ b/examples/EBNF/EBNFCompiler.py @@ -105,7 +105,7 @@ class EBNFGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'#.*(?:\n|$)' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ whitespace__ = Whitespace(WSP__) diff --git a/examples/LaTeX/LaTeXCompiler.py b/examples/LaTeX/LaTeXCompiler.py index ec017c1..812c43e 100755 --- a/examples/LaTeX/LaTeXCompiler.py +++ b/examples/LaTeX/LaTeXCompiler.py @@ -241,7 +241,7 @@ class LaTeXGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'%.*' WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ whitespace__ = Whitespace(WSP__) diff --git a/examples/Tutorial/LyrikCompiler_example.py b/examples/Tutorial/LyrikCompiler_example.py index 8e00fe4..1fbf180 100755 --- a/examples/Tutorial/LyrikCompiler_example.py +++ b/examples/Tutorial/LyrikCompiler_example.py @@ -86,7 +86,7 @@ class LyrikGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'' WHITESPACE__ = r'[\t ]*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ ENDE = NegativeLookahead(RegExp('.')) diff --git a/examples/XML/XMLCompiler.py b/examples/XML/XMLCompiler.py index 196a636..c9de098 100755 --- a/examples/XML/XMLCompiler.py +++ b/examples/XML/XMLCompiler.py @@ -281,7 +281,7 @@ class XMLGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = '' whitespace__ = Whitespace(WSP__) diff --git a/experimental/new2/new2Compiler.py b/experimental/new2/new2Compiler.py index 9e32c6b..05783a9 100755 --- a/experimental/new2/new2Compiler.py +++ b/experimental/new2/new2Compiler.py @@ -66,7 +66,7 @@ class new2Grammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ whitespace__ = Whitespace(WSP__) diff --git a/experimental/ws/wsCompiler.py b/experimental/ws/wsCompiler.py index 1f1445f..787f17a 100755 --- a/experimental/ws/wsCompiler.py +++ b/experimental/ws/wsCompiler.py @@ -91,7 +91,7 @@ class wsGrammar(Grammar): parser_initialization__ = "upon instantiation" COMMENT__ = r'#.*' WHITESPACE__ = r'\s*' - WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) + WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) wspL__ = '' wspR__ = WSP__ whitespace__ = Whitespace(WSP__) diff --git a/test/test_ebnf.py b/test/test_ebnf.py index 1c41dae..c97ced1 100644 --- a/test/test_ebnf.py +++ b/test/test_ebnf.py @@ -95,7 +95,7 @@ class TestReservedSymbols: def test_whitespace(self): lang = r""" @whitespace = /\s*/ - document = WSP__ { word WSP__ } + document = WSP_RE__ { word WSP_RE__ } word = /\w+/ """ parser = grammar_provider(lang)() @@ -104,7 +104,7 @@ class TestReservedSymbols: lang = r""" @comment = /#.*(?:\n|$)/ @whitespace = /\s*/ - document = WSP__ { word WSP__ } + document = WSP_RE__ { word WSP_RE__ } word = /\w+/ """ parser = grammar_provider(lang)() @@ -221,7 +221,7 @@ class TestCompilerErrors: def test_no_error(self): """But reserved symbols should not be repoted as undefined. """ - ebnf = """nothing = WSP__ | COMMENT__\n""" + ebnf = """nothing = WSP_RE__ | COMMENT__\n""" result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols')) assert not bool(messages), messages -- GitLab