2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f564f606 authored by di68kap's avatar di68kap
Browse files

sync commit

parents f2f3e6e2 72bec2ac
......@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token"
1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes
......@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["),
expression, Token("]"), mandatory=1))`
DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
......@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
......@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
......@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
......@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+"))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__"
WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
......@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')'
......@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
tk = node.content.replace('\\', r'\\')
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str:
......@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp('
else:
parser = 'RE('
parser = '_RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
......@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str:
return 'whitespace__'
return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]:
......
......@@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \
PLAINTEXT_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional
......@@ -47,10 +47,11 @@ __all__ = ('Parser',
'UnknownParserError',
'Grammar',
'PreprocessorToken',
'Token',
'RegExp',
'Whitespace',
'RE',
'Token',
'TKN',
'Whitespace',
'mixin_comment',
# 'UnaryOperator',
# 'NaryOperator',
......@@ -235,15 +236,15 @@ class Parser(ParserBase):
ApplyFunc = Callable[['Parser'], None]
def __init__(self, name: str = '') -> None:
def __init__(self) -> None:
# assert isinstance(name, str), str(name)
super().__init__(name)
super().__init__()
self._grammar = None # type: Optional['Grammar']
self.reset()
# add "aspect oriented" wrapper around parser calls
# for memoizing, left recursion and tracing
if not isinstance(self, Forward): # should Forward-Parser no be guarded? Not sure...
if not isinstance(self, Forward): # should Forward-Parser not be guarded? Not sure...
guarded_parser_call = add_parser_guard(self.__class__.__call__)
# The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden
......@@ -258,7 +259,10 @@ class Parser(ParserBase):
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
return self.__class__(self.name)
duplicate = self.__class__()
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def reset(self):
"""Initializes or resets any parser variables. If overwritten,
......@@ -392,9 +396,9 @@ class Grammar:
# parsers
expression = Forward()
INTEGER = RE('\\d+')
factor = INTEGER | Token("(") + expression + Token(")")
term = factor + ZeroOrMore((Token("*") | Token("/")) + factor)
expression.set(term + ZeroOrMore((Token("+") | Token("-")) + term))
factor = INTEGER | TKN("(") + expression + TKN(")")
term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
root__ = expression
Upon instantiation the parser objects are deep-copied to the
......@@ -421,16 +425,6 @@ class Grammar:
(no comments, horizontal right aligned whitespace) don't fit:
Attributes:
COMMENT__: regular expression string for matching comments
WSP__: regular expression for whitespace and comments
wspL__: regular expression string for left aligned whitespace,
which either equals WSP__ or is empty.
wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing
of yet incomplete grammars class Grammar does not assume that this
......@@ -452,19 +446,6 @@ class Grammar:
history_tracking__: A flag indicating that the parsing history shall
be tracked
whitespace__: A parser for the implicit optional whitespace (or the
:class:zombie-parser if the default is empty). The default
whitespace will be used by parsers :class:`Token` and, if no
other parsers are passed to its constructor, by parser
:class:`RE`. It can also be place explicitly in the
EBNF-Grammar via the "~"-sign.
wsp_left_parser__: The same as ``whitespace`` for
left-adjacent-whitespace.
wsp_right_parser__: The same as ``whitespace`` for
right-adjacent-whitespace.
_dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset
when it is called again.
......@@ -540,10 +521,8 @@ class Grammar:
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: str
# some default values
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
wspL__ = '' # type: str
wspR__ = WSP__ # type: str
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
@classmethod
......@@ -557,11 +536,9 @@ class Grammar:
...
symbol = RE('(?!\\d)\\w+')
After the call of this method symbol.name == "symbol"
holds. Names assigned via the ``name``-parameter of the
constructor will not be overwritten. Parser names starting or
ending with a double underscore like ``root__`` will be
ignored. See :func:`sane_parser_name()`
After the call of this method symbol.name == "symbol" holds.
Parser names starting or ending with a double underscore like
``root__`` will be ignored. See :func:`sane_parser_name()`
This is done only once, upon the first instantiation of the
grammar class!
......@@ -576,10 +553,11 @@ class Grammar:
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if not parser.name:
parser._name = entry
if isinstance(parser, Forward) and (not cast(Forward, parser).parser.name):
cast(Forward, parser).parser._name = entry
if isinstance(parser, Forward):
if not cast(Forward, parser).parser.name:
cast(Forward, parser).parser.name = entry
else: # if not parser.name:
parser.name = entry
cls.parser_initialization__ = "done"
......@@ -605,23 +583,6 @@ class Grammar:
# on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.WSP__:
try:
probe = self.whitespace__ # type: RegExp
assert self.whitespace__.regexp.pattern == self.WSP__
except AttributeError:
self.whitespace__ = Whitespace(self.WSP__) # type: RegExp
self.whitespace__.grammar = self
self.all_parsers__.add(self.whitespace__) # don't you forget about me...
else:
self.whitespace__ = cast(RegExp, ZOMBIE_PARSER)
assert not self.wspL__ or self.wspL__ == self.WSP__
assert not self.wspR__ or self.wspR__ == self.WSP__
self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER
self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER
self.root__.apply(self._add_parser__)
......@@ -680,7 +641,8 @@ class Grammar:
assert parser.name not in self.__dict__ or \
isinstance(self.__dict__[parser.name], parser.__class__), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object!' % parser.name)
'already exists in grammar object: %s!'
% (parser.name, str(self.__dict__[parser.name])))
setattr(self, parser.name, parser)
self.all_parsers__.add(parser)
parser.grammar = self
......@@ -836,7 +798,7 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
#
# Token and Regular Expression parser classes (i.e. leaf classes)
# _Token and Regular Expression parser classes (i.e. leaf classes)
#
########################################################################
......@@ -855,7 +817,14 @@ class PreprocessorToken(Parser):
def __init__(self, token: str) -> None:
assert token and token.isupper()
assert RX_TOKEN_NAME.match(token)
super().__init__(token)
super().__init__()
self.name = token
def __deepcopy__(self, memo):
duplicate = self.__class__(self.name)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text[0:1] == BEGIN_TOKEN:
......@@ -884,25 +853,28 @@ class PreprocessorToken(Parser):
return None, text
class PlainText(Parser):
class Token(Parser):
"""
Parses plain text strings. (Could be done by RegExp as well, but is faster.)
Example::
>>> while_token = PlainText("while")
>>> while_token = Token("while")
>>> Grammar(while_token)("while").content
'while'
"""
assert PLAINTEXT_PTYPE == ":PlainText"
assert TOKEN_PTYPE == ":Token"
def __init__(self, text: str, name: str = '') -> None:
super().__init__(name)
def __init__(self, text: str) -> None:
super().__init__()
self.text = text
self.len = len(text)
def __deepcopy__(self, memo):
return self.__class__(self.text, self.name)
duplicate = self.__class__(self.text)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text.startswith(self.text):
......@@ -933,8 +905,8 @@ class RegExp(Parser):
EBNF-Example: ``word = /\w+/``
"""
def __init__(self, regexp, name: str = '') -> None:
super().__init__(name)
def __init__(self, regexp) -> None:
super().__init__()
self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
def __deepcopy__(self, memo):
......@@ -943,7 +915,10 @@ class RegExp(Parser):
regexp = copy.deepcopy(self.regexp, memo)
except TypeError:
regexp = self.regexp.pattern
return self.__class__(regexp, self.name)
duplicate = self.__class__(regexp)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
match = text.match(self.regexp)
......@@ -963,158 +938,37 @@ class RegExp(Parser):
return escape_control_characters('/%s/' % self.regexp.pattern)
class Whitespace(RegExp):
"""An variant of RegExp that signifies through its class name that it
is a RegExp-parser for whitespace."""
assert WHITESPACE_PTYPE == ":Whitespace"
#######################################################################
#######################################################################
#
# WARNING: The following code is hard to maintain, because it
# introduces a special case, i.e. a parser with child parsers that is
# not a descendant of the NaryOperator and because it interacts
# With the constructor of the Grammar class (see the instantiations of
# the Whitespace-class, there).
#
# That is all the more regrettable, as class RE basically just
# introduces syntactical sugar for
#
# Series(whitespace__, RegExp('something'), whitespace__)
#
# What to do? Throw the syntactical sugar out? :-( Or find a more
# robust solution for that kind of syntactical sugar? Or just leave
# it be?
#
######################################################################
######################################################################
class RE(Parser):
r"""
Regular Expressions with optional leading or trailing whitespace.
The RE-parser parses pieces of text that match a given regular
expression. Other than the ``RegExp``-Parser it can also skip
"implicit whitespace" before or after the matched text.
The whitespace is in turn defined by a regular expression. It should
be made sure that this expression also matches the empty string,
e.g. use r'\s*' or r'[\t ]+', but not r'\s+'. If the respective
parameters in the constructor are set to ``None`` the default
whitespace expression from the Grammar object will be used.
Example (allowing whitespace on the right hand side, but not on
the left hand side of a regular expression)::
>>> word = RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word)
>>> result = parser('Haus ')
>>> result.content
'Haus '
>>> result.structure
'(:RE (:RegExp "Haus") (:Whitespace " "))'
>>> str(parser(' Haus'))
' <<< Error on " Haus" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: ``/ ... /~` or `~/ ... /` or `~/ ... /~``
EBNF-Example: ``word = /\w+/~``
def withWS(parser_factory, wsL='', wsR='\s*'):
"""Syntactic Sugar for 'Series(Whitespace(wsL), parser_factory(), Whitespace(wsR))'.
"""
def __init__(self, regexp, wL=None, wR=None, name: str='') -> None:
r"""Constructor for class RE.
Args:
regexp (str or regex object): The regular expression to be
used for parsing.
wL (str or regexp): Left whitespace regular expression,
i.e. either ``None``, the empty string or a regular
expression (e.g. "\s*") that defines whitespace. An
empty string means no whitespace will be skipped; ``None``
means that the default whitespace will be used.
wR (str or regexp): Right whitespace regular expression.
See above.
name: The optional name of the parser.
"""
super().__init__(name)
self.rx_wsl = wL
self.rx_wsr = wR
self.wsp_left = Whitespace(wL) if wL else ZOMBIE_PARSER
self.wsp_right = Whitespace(wR) if wR else ZOMBIE_PARSER
self.main = self.create_main_parser(regexp)
def __deepcopy__(self, memo={}):
try:
regexp = copy.deepcopy(self.main.regexp, memo)
except TypeError:
regexp = self.main.regexp.pattern
return self.__class__(regexp, self.rx_wsl, self.rx_wsr, self.name)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
# assert self.main.regexp.pattern != "@"
txt = text # type: StringView
wsl, txt = self.wsp_left(txt)
main, txt = self.main(txt)
if main:
wsr, txt = self.wsp_right(txt)
result = tuple(nd for nd in (wsl, main, wsr) if nd)
return Node(self, result), txt
return None, text
def __repr__(self):
wsl = '~' if self.wsp_left != ZOMBIE_PARSER else ''
wsr = '~' if self.wsp_right != ZOMBIE_PARSER else ''
return wsl + '/%s/' % self.main.regexp.pattern + wsr
def _grammar_assigned_notifier(self):
if self.grammar:
# use default whitespace parsers if not otherwise specified
if self.rx_wsl is None:
self.wsp_left = self.grammar.wsp_left_parser__
if self.rx_wsr is None:
self.wsp_right = self.grammar.wsp_right_parser__
def apply(self, func: Parser.ApplyFunc) -> bool:
if super().apply(func):
if self.rx_wsl:
self.wsp_left.apply(func)
if self.rx_wsr:
self.wsp_right.apply(func)
self.main.apply(func)
return True
return False
def create_main_parser(self, arg) -> Parser:
"""Creates the main parser of this compound parser. Can be overridden."""
return RegExp(arg)
if wsL and isinstance(wsL, str):
wsL = Whitespace(wsL)
if wsR and isinstance(wsR, str):
wsR = Whitespace(wsR)
if wsL and wsR:
return Series(wsL, parser_factory(), wsR)
elif wsL:
return Series(wsL, parser_factory())
elif wsR:
return Series(parser_factory(), wsR)
else:
return parser_factory()
class Token(RE):
"""
Class Token parses simple strings. Any regular regular expression
commands will be interpreted as simple sequence of characters.
def RE(regexp, wsL='', wsR='\s*'):
"""Syntactic Sugar for 'Series(Whitespace(wsL), RegExp(regexp), Whitespace(wsR))'"""
return withWS(lambda : RegExp(regexp), wsL, wsR)
Other than that class Token is essentially a renamed version of
class RE. Because tokens often have a particular semantic different
from other REs, parsing them with a separate parser class allows to
distinguish them by their parser type.
"""
assert TOKEN_PTYPE == ":Token"
def __init__(self, token: str, wL=None, wR=None, name: str = '') -> None:
self.token = token
super().__init__(token, wL, wR, name)
def TKN(token, wsL='', wsR='\s*'):
"""Syntactic Sugar for 'Series(Whitespace(wsL), Token(token), Whitespace(wsR))'"""
return withWS(lambda: Token(token), wsL, wsR)
def __deepcopy__(self, memo={}):
return self.__class__(self.token, self.rx_wsl, self.rx_wsr, self.name)