Commit 0a89a7c1 authored by di68kap's avatar di68kap
Browse files

parse.py: renamed class Token to Text to avoid misunderstandings due to ambiguous terminiology

parent 777d02bc
......@@ -39,7 +39,7 @@ from DHParser.error import Error, AMBIGUOUS_ERROR_HANDLING, WARNING, REDECLARED_
DIRECTIVE_FOR_NONEXISTANT_SYMBOL, UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING
from DHParser.parse import Parser, Grammar, mixin_comment, mixin_nonempty, Forward, RegExp, \
Drop, Lookahead, NegativeLookahead, Alternative, Series, Option, ZeroOrMore, OneOrMore, \
Token, Capture, Retrieve, Pop, optional_last_value, GrammarError, Whitespace, Always, Never, \
Text, Capture, Retrieve, Pop, optional_last_value, GrammarError, Whitespace, Always, Never, \
INFINITE, matching_bracket, ParseFunc
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, EMPTY_NODE
......@@ -100,7 +100,7 @@ except ImportError:
import re
from DHParser import start_logging, suspend_logging, resume_logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, AnyChar, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, Counted, Interleave, INFINITE, \\
Lookbehind, Lookahead, Alternative, Pop, Text, Synonym, Counted, Interleave, INFINITE, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, matching_bracket, PreprocessorFunc, is_empty, remove_if, \\
......@@ -170,24 +170,24 @@ class EBNFGrammar(Grammar):
RE_CORE = RegExp('(?:(?<!\\\\)\\\\(?:/)|[^/])*')
regex_heuristics = Alternative(RegExp('[^ ]'), RegExp('[^/\\n*?+\\\\]*[*?+\\\\][^/\\n]/'))
literal_heuristics = Alternative(RegExp('~?\\s*"(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^"]*)*"'), RegExp("~?\\s*'(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^']*)*'"), RegExp('~?\\s*`(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^`]*)*`'), RegExp('~?\\s*´(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^´]*)*´'), RegExp('~?\\s*/(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^/]*)*/'))
char_range_heuristics = NegativeLookahead(Alternative(RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics), Series(Option(Alternative(Token("::"), Token(":?"), Token(":"))), SYM_REGEX, RegExp('\\s*\\]'))))
CH_LEADIN = Capture(Alternative(Token("0x"), Token("#x")))
RE_LEADOUT = Capture(Token("/"))
RE_LEADIN = Capture(Alternative(Series(Token("/"), Lookahead(regex_heuristics)), Token("^/")))
TIMES = Capture(Token("*"))
RNG_DELIM = Capture(Token(","))
BRACE_SIGN = Capture(Alternative(Token("{"), Token("(")))
char_range_heuristics = NegativeLookahead(Alternative(RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics), Series(Option(Alternative(Text("::"), Text(":?"), Text(":"))), SYM_REGEX, RegExp('\\s*\\]'))))
CH_LEADIN = Capture(Alternative(Text("0x"), Text("#x")))
RE_LEADOUT = Capture(Text("/"))
RE_LEADIN = Capture(Alternative(Series(Text("/"), Lookahead(regex_heuristics)), Text("^/")))
TIMES = Capture(Text("*"))
RNG_DELIM = Capture(Text(","))
BRACE_SIGN = Capture(Alternative(Text("{"), Text("(")))
RNG_BRACE = Capture(Retrieve(BRACE_SIGN))
ENDL = Capture(Alternative(Token(";"), Token("")))
AND = Capture(Alternative(Token(","), Token("")))
OR = Capture(Alternative(Token("|"), Series(Token("/"), NegativeLookahead(regex_heuristics))))
DEF = Capture(Alternative(Token("="), Token(":="), Token("::="), Token("<-")))
ENDL = Capture(Alternative(Text(";"), Text("")))
AND = Capture(Alternative(Text(","), Text("")))
OR = Capture(Alternative(Text("|"), Series(Text("/"), NegativeLookahead(regex_heuristics))))
DEF = Capture(Alternative(Text("="), Text(":="), Text("::="), Text("<-")))
EOF = Drop(Drop(Series(Drop(NegativeLookahead(RegExp('.'))), Drop(Option(Drop(Pop(DEF, match_func=optional_last_value)))), Drop(Option(Drop(Pop(OR, match_func=optional_last_value)))), Drop(Option(Drop(Pop(AND, match_func=optional_last_value)))), Drop(Option(Drop(Pop(ENDL, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RNG_DELIM, match_func=optional_last_value)))), Drop(Option(Drop(Pop(BRACE_SIGN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(CH_LEADIN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(TIMES, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RE_LEADIN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RE_LEADOUT, match_func=optional_last_value)))))))
whitespace = Series(RegExp('~'), dwsp__)
any_char = Series(Token("."), dwsp__)
any_char = Series(Text("."), dwsp__)
free_char = Alternative(RegExp('[^\\n\\[\\]\\\\]'), RegExp('\\\\[nrt`´\'"(){}\\[\\]/\\\\]'))
character = Series(Retrieve(CH_LEADIN), HEXCODE)
char_range = Series(Token("["), Lookahead(char_range_heuristics), Option(Token("^")), Alternative(character, free_char), ZeroOrMore(Alternative(Series(Option(Token("-")), character), free_char)), Series(Token("]"), dwsp__))
char_range = Series(Text("["), Lookahead(char_range_heuristics), Option(Text("^")), Alternative(character, free_char), ZeroOrMore(Alternative(Series(Option(Text("-")), character), free_char)), Series(Text("]"), dwsp__))
regexp = Series(Retrieve(RE_LEADIN), RE_CORE, Retrieve(RE_LEADOUT), dwsp__)
plaintext = Alternative(Series(RegExp('`(?:(?<!\\\\)\\\\`|[^`])*?`'), dwsp__), Series(RegExp('´(?:(?<!\\\\)\\\\´|[^´])*?´'), dwsp__))
literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), dwsp__), Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), dwsp__))
......@@ -196,26 +196,26 @@ class EBNFGrammar(Grammar):
no_range = Alternative(NegativeLookahead(multiplier), Series(Lookahead(multiplier), Retrieve(TIMES)))
range = Series(RNG_BRACE, dwsp__, multiplier, Option(Series(Retrieve(RNG_DELIM), dwsp__, multiplier)), Pop(RNG_BRACE, match_func=matching_bracket), dwsp__)
counted = Alternative(Series(countable, range), Series(countable, Retrieve(TIMES), dwsp__, multiplier), Series(multiplier, Retrieve(TIMES), dwsp__, countable, mandatory=3))
option = Alternative(Series(NegativeLookahead(char_range), Series(Token("["), dwsp__), expression, Series(Token("]"), dwsp__), mandatory=2), Series(element, Series(Token("?"), dwsp__)))
repetition = Alternative(Series(Series(Token("{"), dwsp__), no_range, expression, Series(Token("}"), dwsp__), mandatory=2), Series(element, Series(Token("*"), dwsp__), no_range))
oneormore = Alternative(Series(Series(Token("{"), dwsp__), no_range, expression, Series(Token("}+"), dwsp__)), Series(element, Series(Token("+"), dwsp__)))
group = Series(Series(Token("("), dwsp__), no_range, expression, Series(Token(")"), dwsp__), mandatory=2)
retrieveop = Alternative(Series(Token("::"), dwsp__), Series(Token(":?"), dwsp__), Series(Token(":"), dwsp__))
flowmarker = Alternative(Series(Token("!"), dwsp__), Series(Token("&"), dwsp__), Series(Token("<-!"), dwsp__), Series(Token("<-&"), dwsp__))
option = Alternative(Series(NegativeLookahead(char_range), Series(Text("["), dwsp__), expression, Series(Text("]"), dwsp__), mandatory=2), Series(element, Series(Text("?"), dwsp__)))
repetition = Alternative(Series(Series(Text("{"), dwsp__), no_range, expression, Series(Text("}"), dwsp__), mandatory=2), Series(element, Series(Text("*"), dwsp__), no_range))
oneormore = Alternative(Series(Series(Text("{"), dwsp__), no_range, expression, Series(Text("}+"), dwsp__)), Series(element, Series(Text("+"), dwsp__)))
group = Series(Series(Text("("), dwsp__), no_range, expression, Series(Text(")"), dwsp__), mandatory=2)
retrieveop = Alternative(Series(Text("::"), dwsp__), Series(Text(":?"), dwsp__), Series(Text(":"), dwsp__))
flowmarker = Alternative(Series(Text("!"), dwsp__), Series(Text("&"), dwsp__), Series(Text("<-!"), dwsp__), Series(Text("<-&"), dwsp__))
ANY_SUFFIX = RegExp('[?*+]')
element.set(Alternative(Series(Option(retrieveop), symbol, NegativeLookahead(Retrieve(DEF))), literal, plaintext, regexp, char_range, Series(character, dwsp__), any_char, whitespace, group))
pure_elem = Series(element, NegativeLookahead(ANY_SUFFIX), mandatory=1)
countable.set(Alternative(option, oneormore, element))
term = Alternative(oneormore, counted, repetition, option, pure_elem)
difference = Series(term, Option(Series(Series(Token("-"), dwsp__), Alternative(oneormore, pure_elem), mandatory=1)))
difference = Series(term, Option(Series(Series(Text("-"), dwsp__), Alternative(oneormore, pure_elem), mandatory=1)))
lookaround = Series(flowmarker, Alternative(oneormore, pure_elem), mandatory=1)
interleave = Series(difference, ZeroOrMore(Series(Series(Token("°"), dwsp__), Option(Series(Token("§"), dwsp__)), difference)))
sequence = Series(Option(Series(Token("§"), dwsp__)), Alternative(interleave, lookaround), ZeroOrMore(Series(Retrieve(AND), dwsp__, Option(Series(Token("§"), dwsp__)), Alternative(interleave, lookaround))))
interleave = Series(difference, ZeroOrMore(Series(Series(Text("°"), dwsp__), Option(Series(Text("§"), dwsp__)), difference)))
sequence = Series(Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround), ZeroOrMore(Series(Retrieve(AND), dwsp__, Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround))))
expression.set(Series(sequence, ZeroOrMore(Series(Retrieve(OR), dwsp__, sequence))))
FOLLOW_UP = Alternative(Token("@"), symbol, EOF)
procedure = Series(SYM_REGEX, Series(Token("()"), dwsp__))
FOLLOW_UP = Alternative(Text("@"), symbol, EOF)
procedure = Series(SYM_REGEX, Series(Text("()"), dwsp__))
literals = OneOrMore(literal)
directive = Series(Series(Token("@"), dwsp__), symbol, Series(Token("="), dwsp__), Alternative(regexp, literals, procedure, Series(symbol, NegativeLookahead(DEF))), ZeroOrMore(Series(Series(Token(","), dwsp__), Alternative(regexp, literals, procedure, Series(symbol, NegativeLookahead(DEF))))), Lookahead(FOLLOW_UP), mandatory=1)
directive = Series(Series(Text("@"), dwsp__), symbol, Series(Text("="), dwsp__), Alternative(regexp, literals, procedure, Series(symbol, NegativeLookahead(DEF))), ZeroOrMore(Series(Series(Text(","), dwsp__), Alternative(regexp, literals, procedure, Series(symbol, NegativeLookahead(DEF))))), Lookahead(FOLLOW_UP), mandatory=1)
definition = Series(symbol, Retrieve(DEF), dwsp__, expression, Retrieve(ENDL), dwsp__, Lookahead(FOLLOW_UP), mandatory=1, err_msgs=error_messages__["definition"])
syntax = Series(Option(dwsp__), ZeroOrMore(Alternative(definition, directive)), EOF)
root__ = syntax
......@@ -501,10 +501,10 @@ WHITESPACE_TYPES = {'horizontal': r'[\t ]*', # default: horizontal
'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*',
'vertical': r'\s*'}
DROP_TOKEN = 'token'
DROP_STRINGS = 'strings'
DROP_WSPC = 'whitespace'
DROP_REGEXP = 'regexp'
DROP_VALUES = {DROP_TOKEN, DROP_WSPC, DROP_REGEXP}
DROP_REGEXP = 'regexps'
DROP_VALUES = {DROP_STRINGS, DROP_WSPC, DROP_REGEXP}
# Representation of Python code or, rather, something that will be output as Python code
ReprType = Union[str, unrepr]
......@@ -518,7 +518,7 @@ KNOWN_DIRECTIVES = {
'[preprocessor_]tokens': 'List of the names of all preprocessor tokens',
'anonymous': 'List of symbols that are NOT to appear as tag-names',
'drop': 'List of tags to be dropped early from syntax tree, '
'special values: token, whitespace, regexp',
'special values: strings, whitespace, regexps',
'$SYMBOL_filer': 'Function that transforms captured values of the givensymbol on retrieval',
'$SYMBOL_error': 'Pair of regular epxression an custom error message if regex matches',
'$SYMBOL_skip': 'List of regexes or functions to find reentry point after an error',
......@@ -567,7 +567,7 @@ class EBNFDirectives:
the failing parser (`parser.Series` or `parser.Interleave`)
has returned.
drop: A set that may contain the elements `DROP_TOKEN` and
drop: A set that may contain the elements `DROP_STRINGS` and
`DROP_WSP', 'DROP_REGEXP' or any name of a symbol
of an anonymous parser (e.g. '_linefeed') the results
of which will be dropped during the parsing process,
......@@ -1018,7 +1018,7 @@ class EBNFCompiler(Compiler):
# add special fields for Grammar class
if DROP_WSPC in self.directives.drop or DROP_TOKEN in self.directives.drop:
if DROP_WSPC in self.directives.drop or DROP_STRINGS in self.directives.drop:
definitions.append((EBNFCompiler.DROP_WHITESPACE_PARSER_KEYWORD,
'Drop(Whitespace(%s))' % EBNFCompiler.WHITESPACE_KEYWORD))
definitions.append((EBNFCompiler.WHITESPACE_PARSER_KEYWORD,
......@@ -1482,7 +1482,7 @@ class EBNFCompiler(Compiler):
# remove drop clause for non dropping definitions of forms like "/\w+/~"
if (parser_class == "Series" and node.tag_name not in self.directives.drop
and DROP_REGEXP in self.directives.drop and self.context[-2].tag_name == "definition"
and all((arg.startswith('Drop(RegExp(') or arg.startswith('Drop(Token(')
and all((arg.startswith('Drop(RegExp(') or arg.startswith('Drop(Text(')
or arg in EBNFCompiler.COMMENT_OR_WHITESPACE) for arg in arguments)):
arguments = [arg.replace('Drop(', '').replace('))', ')') for arg in arguments]
if self.drop_flag:
......@@ -1759,10 +1759,10 @@ class EBNFCompiler(Compiler):
return symbol
def TOKEN_PARSER(self, token):
if DROP_TOKEN in self.directives.drop and self.context[-2].tag_name != "definition":
return 'Drop(Token(' + token + '))'
return 'Token(' + token + ')'
def TEXT_PARSER(self, text):
if DROP_STRINGS in self.directives.drop and self.context[-2].tag_name != "definition":
return 'Drop(Text(' + text + '))'
return 'Text(' + text + ')'
def REGEXP_PARSER(self, regexp):
if DROP_REGEXP in self.directives.drop and self.context[-2].tag_name != "definition":
......@@ -1784,8 +1784,8 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
center = self.TOKEN_PARSER(escape_control_characters(node.content))
force = DROP_TOKEN in self.directives.drop
center = self.TEXT_PARSER(escape_control_characters(node.content))
force = DROP_STRINGS in self.directives.drop
left = self.WSPC_PARSER(force) if 'left' in self.directives.literalws else ''
right = self.WSPC_PARSER(force) if 'right' in self.directives.literalws else ''
if left or right:
......@@ -1800,7 +1800,7 @@ class EBNFCompiler(Compiler):
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return self.TOKEN_PARSER(tk)
return self.TEXT_PARSER(tk)
def on_regexp(self, node: Node) -> str:
......
......@@ -59,7 +59,7 @@ cdef class Grammar:
cdef class PreprocessorToken(Parser):
pass
cdef class Token(Parser):
cdef class Text(Parser):
cdef public str text
cdef public int len
......
......@@ -64,8 +64,8 @@ __all__ = ('ParserError',
'Never',
'AnyChar',
'PreprocessorToken',
'Token',
'DropToken',
'Text',
'DropText',
'RegExp',
'RE',
'TKN',
......@@ -1658,24 +1658,24 @@ class PreprocessorToken(Parser):
########################################################################
#
# Token and Regular Expression parser classes (leaf classes)
# Text and Regular Expression parser classes (leaf classes)
#
########################################################################
class Token(Parser):
class Text(Parser):
"""
Parses plain text strings. (Could be done by RegExp as well, but is faster.)
Example::
>>> while_token = Token("while")
>>> while_token = Text("while")
>>> Grammar(while_token)("while").content
'while'
"""
assert TOKEN_PTYPE == ":Token"
assert TOKEN_PTYPE == ":Text"
def __init__(self, text: str) -> None:
super(Token, self).__init__()
super(Text, self).__init__()
self.text = text
self.len = len(text)
......@@ -1759,8 +1759,8 @@ class RegExp(Parser):
.replace('/', '\\/') + '/'
def DropToken(text: str) -> Token:
return cast(Token, Drop(Token(text)))
def DropText(text: str) -> Text:
return cast(Text, Drop(Text(text)))
def DropRegExp(regexp) -> RegExp:
......@@ -1790,13 +1790,13 @@ def RE(regexp, wsL='', wsR=r'\s*'):
def TKN(token, wsL='', wsR=r'\s*'):
"""Syntactic Sugar for 'Series(Whitespace(wsL), Token(token), Whitespace(wsR))'"""
return withWS(lambda: Token(token), wsL, wsR)
"""Syntactic Sugar for 'Series(Whitespace(wsL), Text(token), Whitespace(wsR))'"""
return withWS(lambda: Text(token), wsL, wsR)
def DTKN(token, wsL='', wsR=r'\s*'):
"""Syntactic Sugar for 'Series(Whitespace(wsL), DropToken(token), Whitespace(wsR))'"""
return withWS(lambda: Drop(Token(token)), wsL, wsR)
"""Syntactic Sugar for 'Series(Whitespace(wsL), DropText(token), Whitespace(wsR))'"""
return withWS(lambda: Drop(Text(token)), wsL, wsR)
class Whitespace(RegExp):
......@@ -2131,7 +2131,7 @@ class Counted(UnaryParser):
Examples:
>>> A2_4 = Counted(Token('A'), (2, 4))
>>> A2_4 = Counted(Text('A'), (2, 4))
>>> A2_4
`A`{2,4}
>>> Grammar(A2_4)('AA').as_sxpr()
......@@ -2140,11 +2140,11 @@ class Counted(UnaryParser):
'(:Counted (:Token "A") (:Token "A") (:Token "A") (:Token "A"))'
>>> Grammar(A2_4)('A', complete_match=False).as_sxpr()
'(ZOMBIE__ `(Error (1040): Parser did not match!))'
>>> moves = OneOrMore(Counted(Token('A'), (1, 3)) + Counted(Token('B'), (1, 3)))
>>> moves = OneOrMore(Counted(Text('A'), (1, 3)) + Counted(Text('B'), (1, 3)))
>>> result = Grammar(moves)('AAABABB')
>>> result.tag_name, result.content
(':OneOrMore', 'AAABABB')
>>> moves = Counted(Token('A'), (2, 3)) * Counted(Token('B'), (2, 3))
>>> moves = Counted(Text('A'), (2, 3)) * Counted(Text('B'), (2, 3))
>>> moves
`A`{2,3} ° `B`{2,3}
>>> Grammar(moves)('AAABB').as_sxpr()
......@@ -2453,8 +2453,8 @@ def starting_string(parser: Parser) -> str:
return been_there[p]
else:
been_there[p] = ""
if isinstance(p, Token):
been_there[p] = cast(Token, p).text
if isinstance(p, Text):
been_there[p] = cast(Text, p).text
elif isinstance(p, Series) or isinstance(p, Alternative):
been_there[p] = find_starting_string(cast(NaryParser, p).parsers[0])
elif isinstance(p, Synonym) or isinstance(p, OneOrMore) \
......@@ -2798,7 +2798,7 @@ class NegativeLookahead(Lookahead):
class Lookbehind(FlowParser):
"""
Matches, if the contained parser would match backwards. Requires
the contained parser to be a RegExp, _RE, Token parser.
the contained parser to be a RegExp, _RE, Text parser.
EXPERIMENTAL
"""
......@@ -2806,13 +2806,13 @@ class Lookbehind(FlowParser):
p = parser
while isinstance(p, Synonym):
p = p.parser
assert isinstance(p, RegExp) or isinstance(p, Token)
assert isinstance(p, RegExp) or isinstance(p, Text)
self.regexp = None
self.text = '' # type: str
if isinstance(p, RegExp):
self.regexp = cast(RegExp, p).regexp
else: # p is of type Token
self.text = cast(Token, p).text
else: # p is of type Text
self.text = cast(Text, p).text
super(Lookbehind, self).__init__(parser)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
......
......@@ -78,7 +78,7 @@ __all__ = ('WHITESPACE_PTYPE',
WHITESPACE_PTYPE = ':Whitespace'
TOKEN_PTYPE = ':Token'
TOKEN_PTYPE = ':Text'
REGEXP_PTYPE = ':RegExp'
EMPTY_PTYPE = ':EMPTY'
LEAF_PTYPES = {WHITESPACE_PTYPE, TOKEN_PTYPE, REGEXP_PTYPE}
......
......@@ -8,7 +8,10 @@ def compile_src(source):
if __name__ == "__main__":
# recompile grammar if needed
grammar_path = os.path.abspath(__file__).replace('Parser.py', '.ebnf')
if __file__.endswith('Parser.py'):
grammar_path = os.path.abspath(__file__).replace('Parser.py', '.ebnf')
else:
grammar_path = os.path.splitext(__file__)[0] + '.ebnf'
parser_update = False
def notify():
......@@ -16,7 +19,7 @@ if __name__ == "__main__":
parser_update = True
print('recompiling ' + grammar_path)
if os.path.exists(grammar_path):
if os.path.exists(grammar_path) and os.path.isfile(grammar_path):
if not recompile_grammar(grammar_path, force=False, notify=notify):
error_file = os.path.basename(__file__).replace('Parser.py', '_ebnf_ERRORS.txt')
with open(error_file, encoding="utf-8") as f:
......
......@@ -522,7 +522,7 @@ def is_empty(context: List[Node]) -> bool:
@transformation_factory(collections.abc.Set)
def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> bool:
"""
Checks whether the last node in the context has the tag_name ":Token"
Checks whether the last node in the context has the tag_name ":Text"
and it's content matches one of the given tokens. Leading and trailing
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match.
......
......@@ -32,7 +32,7 @@ key_value_store.py:
from DHParser import *
# specify the grammar of your DSL in EBNF-notation
grammar = '''@ drop = whitespace, token
grammar = '''@ drop = whitespace, strings
key_store = ~ { entry }
entry = key "=" value
key = /\w+/~ # Scannerless parsing: Use regular
......
......@@ -6,11 +6,11 @@
#
#######################################################################
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, strings # drop anonymous whitespace
#######################################################################
#
......
......@@ -22,7 +22,7 @@ except ImportError:
import re
from DHParser import start_logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \
Lookbehind, Lookahead, Alternative, Pop, Token, Drop, Synonym, \
Lookbehind, Lookahead, Alternative, Pop, Text, Drop, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, matching_bracket, PreprocessorFunc, \
......@@ -75,11 +75,11 @@ class ArithmeticGrammar(Grammar):
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
NEGATIVE = RegExp('[-]')
POSITIVE = RegExp('[+]')
DIV = Series(Token("/"), dwsp__)
MUL = Series(Token("*"), dwsp__)
MINUS = Series(Token("-"), dwsp__)
PLUS = Series(Token("+"), dwsp__)
group = Series(Series(Drop(Token("(")), dwsp__), expression, Series(Drop(Token(")")), dwsp__))
DIV = Series(Text("/"), dwsp__)
MUL = Series(Text("*"), dwsp__)
MINUS = Series(Text("-"), dwsp__)
PLUS = Series(Text("+"), dwsp__)
group = Series(Series(Drop(Text("(")), dwsp__), expression, Series(Drop(Text(")")), dwsp__))
sign = Alternative(POSITIVE, NEGATIVE)
factor = Series(Option(sign), Alternative(NUMBER, VARIABLE, group), ZeroOrMore(Alternative(VARIABLE, group)))
term = Series(factor, ZeroOrMore(Series(Alternative(DIV, MUL), factor)))
......
......@@ -6,11 +6,11 @@
#
#######################################################################
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, strings # drop anonymous whitespace
#######################################################################
......
......@@ -22,7 +22,7 @@ except ImportError:
import re
from DHParser import start_logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, \
Lookbehind, Lookahead, Alternative, Pop, Text, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, matching_bracket, PreprocessorFunc, \
......@@ -79,32 +79,32 @@ class ArithmeticRightRecursiveGrammar(Grammar):
NUMBER = RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?')
MINUS = RegExp('-')
PLUS = RegExp('\\+')
i = Token("i")
e = Token("e")
pi = Alternative(Drop(Token("pi")), Drop(Token("π")))
i = Text("i")
e = Text("e")
pi = Alternative(Drop(Text("pi")), Drop(Text("π")))
special = Alternative(pi, e)
number = Synonym(NUMBER)
log = Series(Series(Drop(Token('log(')), dwsp__), expression, Drop(Token(")")), mandatory=1)
tan = Series(Series(Drop(Token('tan(')), dwsp__), expression, Drop(Token(")")), mandatory=1)
cos = Series(Series(Drop(Token('cos(')), dwsp__), expression, Drop(Token(")")), mandatory=1)
sin = Series(Series(Drop(Token('sin(')), dwsp__), expression, Drop(Token(")")), mandatory=1)
log = Series(Series(Drop(Text('log(')), dwsp__), expression, Drop(Text(")")), mandatory=1)
tan = Series(Series(Drop(Text('tan(')), dwsp__), expression, Drop(Text(")")), mandatory=1)
cos = Series(Series(Drop(Text('cos(')), dwsp__), expression, Drop(Text(")")), mandatory=1)
sin = Series(Series(Drop(Text('sin(')), dwsp__), expression, Drop(Text(")")), mandatory=1)
function = Alternative(sin, cos, tan, log)
group = Series(Drop(Token("(")), expression, Drop(Token(")")), mandatory=1)
group = Series(Drop(Text("(")), expression, Drop(Text(")")), mandatory=1)
tail_value = Alternative(special, function, VARIABLE, group)
tail_pow = Series(tail_value, Option(i), Drop(Token("^")), element)
tail_pow = Series(tail_value, Option(i), Drop(Text("^")), element)
tail_elem = Alternative(tail_pow, tail_value)
value = Series(Alternative(number, tail_value), Option(i))
pow = Series(value, Drop(Token("^")), Option(sign), element)
pow = Series(value, Drop(Text("^")), Option(sign), element)
element.set(Alternative(pow, value))
sign.set(Alternative(PLUS, MINUS))
seq = Series(tail_elem, tail)
tail.set(Series(Alternative(seq, tail_elem), Option(i)))
factor = Series(Option(sign), Alternative(Series(Option(element), tail), element), dwsp__)
div = Series(factor, Series(Drop(Token("/")), dwsp__), term)
mul = Series(factor, Series(Drop(Token("*")), dwsp__), term)
div = Series(factor, Series(Drop(Text("/")), dwsp__), term)
mul = Series(factor, Series(Drop(Text("*")), dwsp__), term)
term.set(Alternative(mul, div, factor))
sub = Series(term, Series(Drop(Token("-")), dwsp__), expression)
add = Series(term, Series(Drop(Token("+")), dwsp__), expression)
sub = Series(term, Series(Drop(Text("-")), dwsp__), expression)
add = Series(term, Series(Drop(Text("+")), dwsp__), expression)
expression.set(Alternative(add, sub, term))
root__ = expression
......
......@@ -6,11 +6,11 @@
#
#######################################################################
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, strings # drop anonymous whitespace
#######################################################################
#
......
......@@ -22,7 +22,7 @@ except ImportError:
import re
from DHParser import start_logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, \
Lookbehind, Lookahead, Alternative, Pop, Text, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, matching_bracket, PreprocessorFunc, is_empty, \
......@@ -77,14 +77,14 @@ class ArithmeticRightRecursiveGrammar(Grammar):
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
NEGATIVE = RegExp('[-]')
POSITIVE = RegExp('[+]')
group = Series(Series(Drop(Token("(")), dwsp__), expression, Series(Drop(Token(")")), dwsp__))
group = Series(Series(Drop(Text("(")), dwsp__), expression, Series(Drop(Text(")")), dwsp__))
sign = Alternative(POSITIVE, NEGATIVE)
factor = Series(Option(sign), Alternative(NUMBER, VARIABLE, group))
div = Series(factor, Series(Drop(Token("/")), dwsp__), term)
mul = Series(factor, Series(Drop(Token("*")), dwsp__), term)
div = Series(factor, Series(Drop(Text("/")), dwsp__), term)
mul = Series(factor, Series(Drop(Text("*")), dwsp__), term)
term.set(Alternative(mul, div, factor))
sub = Series(term, Series(Drop(Token("-")), dwsp__), expression)
add = Series(term, Series(Drop(Token("+")), dwsp__), expression)
sub = Series(term, Series(Drop(Text("-")), dwsp__), expression)
add = Series(term, Series(Drop(Text("+")), dwsp__), expression)
expression.set(Alternative(add, sub, term))
root__ = expression
......
......@@ -6,11 +6,11 @@
#
#######################################################################
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, strings # drop anonymous whitespace
#######################################################################
#
......
......@@ -22,7 +22,7 @@ except ImportError:
import re
from DHParser import start_logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym,\
Lookbehind, Lookahead, Alternative, Pop, Text, Synonym,\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, matching_bracket, PreprocessorFunc, \
......@@ -75,11 +75,11 @@ class ArithmeticSimpleGrammar(Grammar):
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
NEGATIVE = RegExp('[-]')
POSITIVE = RegExp('[+]')