22.1.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 77f7890b authored by di68kap's avatar di68kap

- wsp__ statt whitespace__ als Schlüsselwort

parent 2d8f9c9f
...@@ -89,7 +89,7 @@ except ImportError: ...@@ -89,7 +89,7 @@ except ImportError:
import re import re
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \\ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\ grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
......
...@@ -31,7 +31,7 @@ from functools import partial ...@@ -31,7 +31,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, _Token NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, _Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
...@@ -129,10 +129,10 @@ class EBNFGrammar(Grammar): ...@@ -129,10 +129,10 @@ class EBNFGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__))) list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__)))
whitespace = Series(RegExp('~'), whitespace__) whitespace = Series(RegExp('~'), whitespace__)
...@@ -382,9 +382,9 @@ class EBNFCompiler(Compiler): ...@@ -382,9 +382,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process regular expressions found in the current parsing process
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__" WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__" RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__" WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD} RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \ AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation." "Potentially due to erroneous AST transformation."
...@@ -922,12 +922,16 @@ class EBNFCompiler(Compiler): ...@@ -922,12 +922,16 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str: def on_literal(self, node: Node) -> str:
return '_Token(' + node.content.replace('\\', r'\\') + ')' center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str: def on_plaintext(self, node: Node) -> str:
return '_Token(' + node.content.replace('\\', r'\\').replace('`', '"') \ return 'Token(' + node.content.replace('\\', r'\\') + ')'
+ ", wL='', wR='')"
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
...@@ -962,7 +966,7 @@ class EBNFCompiler(Compiler): ...@@ -962,7 +966,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str: def on_whitespace(self, node: Node) -> str:
return 'whitespace__' return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]: def on_list_(self, node) -> Set[str]:
......
...@@ -47,6 +47,7 @@ __all__ = ('Parser', ...@@ -47,6 +47,7 @@ __all__ = ('Parser',
'UnknownParserError', 'UnknownParserError',
'Grammar', 'Grammar',
'PreprocessorToken', 'PreprocessorToken',
'Token',
'RegExp', 'RegExp',
'Whitespace', 'Whitespace',
'_RE', '_RE',
...@@ -426,13 +427,13 @@ class Grammar: ...@@ -426,13 +427,13 @@ class Grammar:
Attributes: Attributes:
COMMENT__: regular expression string for matching comments COMMENT__: regular expression string for matching comments
WSP__: regular expression for whitespace and comments WSP_RE__: regular expression for whitespace and comments
wspL__: regular expression string for left aligned whitespace, wspL__: regular expression string for left aligned whitespace,
which either equals WSP__ or is empty. which either equals WSP_RE__ or is empty.
wspR__: regular expression string for right aligned whitespace, wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty. which either equals WSP_RE__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing grammar should be reachable by the root parser. However, for testing
...@@ -544,9 +545,9 @@ class Grammar: ...@@ -544,9 +545,9 @@ class Grammar:
parser_initialization__ = "pending" # type: str parser_initialization__ = "pending" # type: str
# some default values # some default values
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)' COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
wspL__ = '' # type: str wspL__ = '' # type: str
wspR__ = WSP__ # type: str wspR__ = WSP_RE__ # type: str
@classmethod @classmethod
...@@ -608,19 +609,19 @@ class Grammar: ...@@ -608,19 +609,19 @@ class Grammar:
# do so only arises during testing. # do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__) self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.WSP__: if self.WSP_RE__:
try: try:
probe = self.whitespace__ # type: RegExp probe = self.whitespace__ # type: RegExp
assert self.whitespace__.regexp.pattern == self.WSP__ assert self.whitespace__.regexp.pattern == self.WSP_RE__
except AttributeError: except AttributeError:
self.whitespace__ = Whitespace(self.WSP__) # type: RegExp self.whitespace__ = Whitespace(self.WSP_RE__) # type: RegExp
self.whitespace__.grammar = self self.whitespace__.grammar = self
self.all_parsers__.add(self.whitespace__) # don't you forget about me... self.all_parsers__.add(self.whitespace__) # don't you forget about me...
else: else:
self.whitespace__ = cast(RegExp, ZOMBIE_PARSER) self.whitespace__ = cast(RegExp, ZOMBIE_PARSER)
assert not self.wspL__ or self.wspL__ == self.WSP__ assert not self.wspL__ or self.wspL__ == self.WSP_RE__
assert not self.wspR__ or self.wspR__ == self.WSP__ assert not self.wspR__ or self.wspR__ == self.WSP_RE__
self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER
self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER
......
...@@ -68,7 +68,7 @@ class ArithmeticGrammar(Grammar): ...@@ -68,7 +68,7 @@ class ArithmeticGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'[\t ]*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
test = Series(digit, constant, variable) test = Series(digit, constant, variable)
......
...@@ -110,7 +110,7 @@ class BibTeXGrammar(Grammar): ...@@ -110,7 +110,7 @@ class BibTeXGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)' COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -105,7 +105,7 @@ class EBNFGrammar(Grammar): ...@@ -105,7 +105,7 @@ class EBNFGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -241,7 +241,7 @@ class LaTeXGrammar(Grammar): ...@@ -241,7 +241,7 @@ class LaTeXGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*' COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?' WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -86,7 +86,7 @@ class LyrikGrammar(Grammar): ...@@ -86,7 +86,7 @@ class LyrikGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'[\t ]*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
ENDE = NegativeLookahead(RegExp('.')) ENDE = NegativeLookahead(RegExp('.'))
......
...@@ -281,7 +281,7 @@ class XMLGrammar(Grammar): ...@@ -281,7 +281,7 @@ class XMLGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = '' wspR__ = ''
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -66,7 +66,7 @@ class new2Grammar(Grammar): ...@@ -66,7 +66,7 @@ class new2Grammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -91,7 +91,7 @@ class wsGrammar(Grammar): ...@@ -91,7 +91,7 @@ class wsGrammar(Grammar):
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*' COMMENT__ = r'#.*'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
whitespace__ = Whitespace(WSP__) whitespace__ = Whitespace(WSP__)
......
...@@ -95,7 +95,7 @@ class TestReservedSymbols: ...@@ -95,7 +95,7 @@ class TestReservedSymbols:
def test_whitespace(self): def test_whitespace(self):
lang = r""" lang = r"""
@whitespace = /\s*/ @whitespace = /\s*/
document = WSP__ { word WSP__ } document = WSP_RE__ { word WSP_RE__ }
word = /\w+/ word = /\w+/
""" """
parser = grammar_provider(lang)() parser = grammar_provider(lang)()
...@@ -104,7 +104,7 @@ class TestReservedSymbols: ...@@ -104,7 +104,7 @@ class TestReservedSymbols:
lang = r""" lang = r"""
@comment = /#.*(?:\n|$)/ @comment = /#.*(?:\n|$)/
@whitespace = /\s*/ @whitespace = /\s*/
document = WSP__ { word WSP__ } document = WSP_RE__ { word WSP_RE__ }
word = /\w+/ word = /\w+/
""" """
parser = grammar_provider(lang)() parser = grammar_provider(lang)()
...@@ -221,7 +221,7 @@ class TestCompilerErrors: ...@@ -221,7 +221,7 @@ class TestCompilerErrors:
def test_no_error(self): def test_no_error(self):
"""But reserved symbols should not be repoted as undefined. """But reserved symbols should not be repoted as undefined.
""" """
ebnf = """nothing = WSP__ | COMMENT__\n""" ebnf = """nothing = WSP_RE__ | COMMENT__\n"""
result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(), result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols')) get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
assert not bool(messages), messages assert not bool(messages), messages
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment