2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f564f606 authored by di68kap's avatar di68kap
Browse files

sync commit

parents f2f3e6e2 72bec2ac
...@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped ...@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be: during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token" 1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases) 2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes 3. empty Nodes
...@@ -143,8 +143,8 @@ parsers: ...@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the "contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["), DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, Token("]"), mandatory=1))` expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`, - For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option = e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
...@@ -90,7 +90,7 @@ except ImportError: ...@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\ grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\ Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
...@@ -30,7 +30,7 @@ from functools import partial ...@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
...@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar): ...@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while' literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly. | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~ regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing # '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly. # whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace whitespace = /~/~ # implicit or default whitespace
...@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar): ...@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar):
EOF = !/./ EOF = !/./
""" """
expression = Forward() expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP__) wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+')))) list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = RE('~') whitespace = Series(RegExp('~'), wsp__)
regexp = RE('~?/(?:\\\\/|[^/])*?/~?') regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = RE('`(?:[^"]|\\\\")*?`') plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'")) literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = RE('(?!\\d)\\w+') symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Token("["), expression, Token("]"), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+")) oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1) unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1) group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":")) retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&")) flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option) Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor)) term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term)))) expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1) directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
definition = Series(symbol, Token("="), expression, mandatory=1) Alternative(regexp, literal, list_), mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2) definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax root__ = syntax
...@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler): ...@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process regular expressions found in the current parsing process
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__" WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__" RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__" WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD} RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \ AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation." "Potentially due to erroneous AST transformation."
...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler): ...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'): elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]' transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations) transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,') transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', ''] transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable) return '\n'.join(transtable)
...@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler): ...@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal. name for the particular non-terminal.
""" """
arguments = [self.compile(r) for r in node.children] + custom_args arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')' return parser_class + '(' + ', '.join(arguments) + ')'
...@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler): ...@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str: def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')' center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str: def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \ tk = node.content.replace('\\', r'\\')
+ ", wL='', wR='')" rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
...@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler): ...@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/': if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp(' parser = 'RegExp('
else: else:
parser = 'RE(' parser = '_RE('
if rx[:2] == '~/': if rx[:2] == '~/':
if not 'left' in self.directives['literalws']: if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name name = ['wL=' + self.WHITESPACE_KEYWORD] + name
...@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler): ...@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str: def on_whitespace(self, node: Node) -> str:
return 'whitespace__' return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]: def on_list_(self, node) -> Set[str]:
......
...@@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord ...@@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \ from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \
PLAINTEXT_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional
...@@ -47,10 +47,11 @@ __all__ = ('Parser', ...@@ -47,10 +47,11 @@ __all__ = ('Parser',
'UnknownParserError', 'UnknownParserError',
'Grammar', 'Grammar',
'PreprocessorToken', 'PreprocessorToken',
'Token',
'RegExp', 'RegExp',
'Whitespace',
'RE', 'RE',
'Token', 'TKN',
'Whitespace',
'mixin_comment', 'mixin_comment',
# 'UnaryOperator', # 'UnaryOperator',
# 'NaryOperator', # 'NaryOperator',
...@@ -235,15 +236,15 @@ class Parser(ParserBase): ...@@ -235,15 +236,15 @@ class Parser(ParserBase):
ApplyFunc = Callable[['Parser'], None] ApplyFunc = Callable[['Parser'], None]
def __init__(self, name: str = '') -> None: def __init__(self) -> None:
# assert isinstance(name, str), str(name) # assert isinstance(name, str), str(name)
super().__init__(name) super().__init__()
self._grammar = None # type: Optional['Grammar'] self._grammar = None # type: Optional['Grammar']
self.reset() self.reset()
# add "aspect oriented" wrapper around parser calls # add "aspect oriented" wrapper around parser calls
# for memoizing, left recursion and tracing # for memoizing, left recursion and tracing
if not isinstance(self, Forward): # should Forward-Parser no be guarded? Not sure... if not isinstance(self, Forward): # should Forward-Parser not be guarded? Not sure...
guarded_parser_call = add_parser_guard(self.__class__.__call__) guarded_parser_call = add_parser_guard(self.__class__.__call__)
# The following check is necessary for classes that don't override # The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden # the __call__() method, because in these cases the non-overridden
...@@ -258,7 +259,10 @@ class Parser(ParserBase): ...@@ -258,7 +259,10 @@ class Parser(ParserBase):
`__deepcopy__`-method must be replaced (i.e. overridden without `__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class. calling the same method from the superclass) by the derived class.
""" """
return self.__class__(self.name) duplicate = self.__class__()
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def reset(self): def reset(self):
"""Initializes or resets any parser variables. If overwritten, """Initializes or resets any parser variables. If overwritten,
...@@ -392,9 +396,9 @@ class Grammar: ...@@ -392,9 +396,9 @@ class Grammar:
# parsers # parsers
expression = Forward() expression = Forward()
INTEGER = RE('\\d+') INTEGER = RE('\\d+')
factor = INTEGER | Token("(") + expression + Token(")") factor = INTEGER | TKN("(") + expression + TKN(")")
term = factor + ZeroOrMore((Token("*") | Token("/")) + factor) term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
expression.set(term + ZeroOrMore((Token("+") | Token("-")) + term)) expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
root__ = expression root__ = expression
Upon instantiation the parser objects are deep-copied to the Upon instantiation the parser objects are deep-copied to the
...@@ -421,16 +425,6 @@ class Grammar: ...@@ -421,16 +425,6 @@ class Grammar:
(no comments, horizontal right aligned whitespace) don't fit: (no comments, horizontal right aligned whitespace) don't fit:
Attributes: Attributes:
COMMENT__: regular expression string for matching comments
WSP__: regular expression for whitespace and comments
wspL__: regular expression string for left aligned whitespace,
which either equals WSP__ or is empty.
wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing grammar should be reachable by the root parser. However, for testing
of yet incomplete grammars class Grammar does not assume that this of yet incomplete grammars class Grammar does not assume that this
...@@ -452,19 +446,6 @@ class Grammar: ...@@ -452,19 +446,6 @@ class Grammar:
history_tracking__: A flag indicating that the parsing history shall history_tracking__: A flag indicating that the parsing history shall
be tracked be tracked
whitespace__: A parser for the implicit optional whitespace (or the
:class:zombie-parser if the default is empty). The default
whitespace will be used by parsers :class:`Token` and, if no
other parsers are passed to its constructor, by parser
:class:`RE`. It can also be place explicitly in the
EBNF-Grammar via the "~"-sign.
wsp_left_parser__: The same as ``whitespace`` for
left-adjacent-whitespace.
wsp_right_parser__: The same as ``whitespace`` for
right-adjacent-whitespace.
_dirty_flag__: A flag indicating that the Grammar has been called at _dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset least once so that the parsing-variables need to be reset
when it is called again. when it is called again.
...@@ -540,10 +521,8 @@ class Grammar: ...@@ -540,10 +521,8 @@ class Grammar:
# root__ must be overwritten with the root-parser by grammar subclass # root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: str parser_initialization__ = "pending" # type: str
# some default values # some default values
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)' # COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str # WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
wspL__ = '' # type: str
wspR__ = WSP__ # type: str
@classmethod @classmethod
...@@ -557,11 +536,9 @@ class Grammar: ...@@ -557,11 +536,9 @@ class Grammar:
... ...
symbol = RE('(?!\\d)\\w+') symbol = RE('(?!\\d)\\w+')
After the call of this method symbol.name == "symbol" After the call of this method symbol.name == "symbol" holds.
holds. Names assigned via the ``name``-parameter of the Parser names starting or ending with a double underscore like
constructor will not be overwritten. Parser names starting or ``root__`` will be ignored. See :func:`sane_parser_name()`
ending with a double underscore like ``root__`` will be
ignored. See :func:`sane_parser_name()`
This is done only once, upon the first instantiation of the This is done only once, upon the first instantiation of the
grammar class! grammar class!
...@@ -576,10 +553,11 @@ class Grammar: ...@@ -576,10 +553,11 @@ class Grammar:
cdict = cls.__dict__ cdict = cls.__dict__
for entry, parser in cdict.items(): for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry): if isinstance(parser, Parser) and sane_parser_name(entry):
if not parser.name: if isinstance(parser, Forward):
parser._name = entry if not cast(Forward, parser).parser.name:
if isinstance(parser, Forward) and (not cast(Forward, parser).parser.name): cast(Forward, parser).parser.name = entry
cast(Forward, parser).parser._name = entry else: # if not parser.name:
parser.name = entry
cls.parser_initialization__ = "done" cls.parser_initialization__ = "done"
...@@ -605,23 +583,6 @@ class Grammar: ...@@ -605,23 +583,6 @@ class Grammar:
# on demand (see Grammar.__getitem__()). Usually, the need to # on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing. # do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__) self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.WSP__:
try:
probe = self.whitespace__ # type: RegExp
assert self.whitespace__.regexp.pattern == self.WSP__
except AttributeError:
self.whitespace__ = Whitespace(self.WSP__) # type: RegExp
self.whitespace__.grammar = self
self.all_parsers__.add(self.whitespace__) # don't you forget about me...
else:
self.whitespace__ = cast(RegExp, ZOMBIE_PARSER)
assert not self.wspL__ or self.wspL__ == self.WSP__
assert not self.wspR__ or self.wspR__ == self.WSP__
self.wsp_left_parser__ = self.whitespace__ if self.wspL__ else ZOMBIE_PARSER
self.wsp_right_parser__ = self.whitespace__ if self.wspR__ else ZOMBIE_PARSER
self.root__.apply(self._add_parser__) self.root__.apply(self._add_parser__)
...@@ -680,7 +641,8 @@ class Grammar: ...@@ -680,7 +641,8 @@ class Grammar:
assert parser.name not in self.__dict__ or \ assert parser.name not in self.__dict__ or \
isinstance(self.__dict__[parser.name], parser.__class__), \ isinstance(self.__dict__[parser.name], parser.__class__), \
('Cannot add parser "%s" because a field with the same name ' ('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object!' % parser.name) 'already exists in grammar object: %s!'
% (parser.name, str(self.__dict__[parser.name])))
setattr(self, parser.name, parser) setattr(self, parser.name, parser)
self.all_parsers__.add(parser) self.all_parsers__.add(parser)
parser.grammar = self parser.grammar = self
...@@ -836,7 +798,7 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str: ...@@ -836,7 +798,7 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
######################################################################## ########################################################################
# #
# Token and Regular Expression parser classes (i.e. leaf classes) # _Token and Regular Expression parser classes (i.e. leaf classes)
# #
######################################################################## ########################################################################
...@@ -855,7 +817,14 @@ class PreprocessorToken(Parser): ...@@ -855,7 +817,14 @@ class PreprocessorToken(Parser):
def __init__(self, token: str) -> None: def __init__(self, token: str) -> None:
assert token and token.isupper() assert token and token.isupper()
assert RX_TOKEN_NAME.match(token) assert RX_TOKEN_NAME.match(token)
super().__init__(token) super().__init__()
self.name = token
def __deepcopy__(self, memo):
duplicate = self.__class__(self.name)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text[0:1] == BEGIN_TOKEN: if text[0:1] == BEGIN_TOKEN:
...@@ -884,25 +853,28 @@ class PreprocessorToken(Parser): ...@@ -884,25 +853,28 @@ class PreprocessorToken(Parser):
return None, text return None, text
class PlainText(Parser): class Token(Parser):
""" """
Parses plain text strings. (Could be done by RegExp as well, but is faster.) Parses plain text strings. (Could be done by RegExp as well, but is faster.)
Example:: Example::
>>> while_token = PlainText("while") >>> while_token = Token("while")
>>> Grammar(while_token)("while").content >>> Grammar(while_token)("while").content
'while' 'while'
""" """
assert PLAINTEXT_PTYPE == ":PlainText" assert TOKEN_PTYPE == ":Token"
def __init__(self, text: str, name: str = '') -> None: def __init__(self, text: str) -> None:
super().__init__(name) super().__init__()
self.text = text self.text = text
self.len = len(text) self.len = len(text)
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self.__class__(self.text, self.name) duplicate = self.__class__(self.text)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text.startswith(self.text): if text.startswith(self.text):
...@@ -933,8 +905,8 @@ class RegExp(Parser): ...@@ -933,8 +905,8 @@ class RegExp(Parser):
EBNF-Example: ``word = /\w+/`` EBNF-Example: ``word = /\w+/``
""" """
def __init__(self, regexp, name: str = '') -> None: def __init__(self, regexp) -> None:
super().__init__(name) super().__init__()
self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
...@@ -943,7 +915,10 @@ class RegExp(Parser): ...@@ -943,7 +915,10 @@ class RegExp(Parser):
regexp = copy.deepcopy(self.regexp, memo) regexp = copy.deepcopy(self.regexp, memo)
except TypeError: except TypeError:
regexp = self.regexp.pattern regexp = self.regexp.pattern
return self.__class__(regexp, self.name) duplicate = self.__class__(regexp)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
match = text.match(self.regexp) match = text.match(self.regexp)
...@@ -963,158 +938,37 @@ class RegExp(Parser): ...@@ -963,158 +938,37 @@ class RegExp(Parser):
return escape_control_characters('/%s/' % self.regexp.pattern) return escape_control_characters('/%s/' % self.regexp.pattern)
class Whitespace(RegExp): def withWS(parser_factory, wsL='', wsR='\s*'):
"""An variant of RegExp that signifies through its class name that it """Syntactic Sugar for 'Series(Whitespace(wsL), parser_factory(), Whitespace(wsR))'.
is a RegExp-parser for whitespace."""
assert WHITESPACE_PTYPE == ":Whitespace"