Commit 77f7890b authored by di68kap's avatar di68kap

- wsp__ statt whitespace__ als Schlüsselwort

parent 2d8f9c9f
......@@ -89,7 +89,7 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
......
......@@ -31,7 +31,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, _Token
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, _Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
......@@ -129,10 +129,10 @@ class EBNFGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__)))
whitespace = Series(RegExp('~'), whitespace__)
......@@ -382,9 +382,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__"
WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
......@@ -922,12 +922,16 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
return '_Token(' + node.content.replace('\\', r'\\') + ')'
center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str:
return '_Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
return 'Token(' + node.content.replace('\\', r'\\') + ')'
def on_regexp(self, node: Node) -> str:
......@@ -962,7 +966,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str:
return 'whitespace__'
return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]:
......
......@@ -47,6 +47,7 @@ __all__ = ('Parser',
'UnknownParserError',
'Grammar',
'PreprocessorToken',
'Token',
'RegExp',
'Whitespace',
'_RE',
......@@ -426,13 +427,13 @@ class Grammar:
Attributes:
COMMENT__: regular expression string for matching comments
WSP__: regular expression for whitespace and comments
WSP_RE__: regular expression for whitespace and comments
wspL__: regular expression string for left aligned whitespace,
which either equals WSP__ or is empty.
which either equals WSP_RE__ or is empty.
wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty.
which either equals WSP_RE__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing
......@@ -544,9 +545,9 @@ class Grammar:
parser_initialization__ = "pending" # type: str
# some default values
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
wspL__ = '' # type: str
wspR__ = WSP__ # type: str
wspR__ = WSP_RE__ # type: str
@classmethod
......@@ -608,19 +609,19 @@ class Grammar:
# do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.WSP__:
if self.WSP_RE__:
try:
probe = self.whitespace__ # type: RegExp
assert self.whitespace__.regexp.pattern == self.WSP__
assert self.whitespace__.regexp.pattern == self.WSP_RE__
except AttributeError:
self.whitespace__ = Whitespace(self.WSP__) # type: RegExp
self.whitespace__ = Whitespace(self.WSP_RE__) # type: RegExp
self.whitespace__.grammar = self
self.all_parsers__.add(self.whitespace__) # don't you forget about me...
else:
self.whitespace__ = cast(RegExp, ZOMBIE_PARSER)
assert not self.wspL__ or self.wspL__ == self.WSP__
assert not self.wspR__ or self.wspR__ == self.WSP__
assert not self.wspL__ or self.wspL__ == self.WSP_RE__
assert not self.wspR__ or self.wspR__ == self.WSP_RE__
self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER
self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER
......
......@@ -68,7 +68,7 @@ class ArithmeticGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
test = Series(digit, constant, variable)
......
......@@ -110,7 +110,7 @@ class BibTeXGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
......
......@@ -105,7 +105,7 @@ class EBNFGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
......
......@@ -241,7 +241,7 @@ class LaTeXGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
......
......@@ -86,7 +86,7 @@ class LyrikGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
ENDE = NegativeLookahead(RegExp('.'))
......
......@@ -281,7 +281,7 @@ class XMLGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = ''
whitespace__ = Whitespace(WSP__)
......
......@@ -66,7 +66,7 @@ class new2Grammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
......
......@@ -91,7 +91,7 @@ class wsGrammar(Grammar):
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
......
......@@ -95,7 +95,7 @@ class TestReservedSymbols:
def test_whitespace(self):
lang = r"""
@whitespace = /\s*/
document = WSP__ { word WSP__ }
document = WSP_RE__ { word WSP_RE__ }
word = /\w+/
"""
parser = grammar_provider(lang)()
......@@ -104,7 +104,7 @@ class TestReservedSymbols:
lang = r"""
@comment = /#.*(?:\n|$)/
@whitespace = /\s*/
document = WSP__ { word WSP__ }
document = WSP_RE__ { word WSP_RE__ }
word = /\w+/
"""
parser = grammar_provider(lang)()
......@@ -221,7 +221,7 @@ class TestCompilerErrors:
def test_no_error(self):
"""But reserved symbols should not be repoted as undefined.
"""
ebnf = """nothing = WSP__ | COMMENT__\n"""
ebnf = """nothing = WSP_RE__ | COMMENT__\n"""
result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
assert not bool(messages), messages
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment