Commit 7d72aa08 authored by eckhart's avatar eckhart
Browse files

- deprecated parse.Token and parse.RE

parent 6a14734a
......@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token"
1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes
......@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["),
expression, Token("]"), mandatory=1))`
DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
......@@ -89,8 +89,8 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
......@@ -30,8 +30,8 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, _Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
......@@ -135,29 +135,29 @@ class EBNFGrammar(Grammar):
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+"))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
list_ = Series(_RE('\\w+'), ZeroOrMore(Series(_Token(","), _RE('\\w+'))))
whitespace = _RE('~')
regexp = _RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = _RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(_RE('"(?:[^"]|\\\\")*?"'), _RE("'(?:[^']|\\\\')*?'"))
symbol = _RE('(?!\\d)\\w+')
option = Series(_Token("["), expression, _Token("]"), mandatory=1)
repetition = Series(_Token("{"), expression, _Token("}"), mandatory=1)
oneormore = Series(_Token("{"), expression, _Token("}+"))
unordered = Series(_Token("<"), expression, _Token(">"), mandatory=1)
group = Series(_Token("("), expression, _Token(")"), mandatory=1)
retrieveop = Alternative(_Token("::"), _Token(":"))
flowmarker = Alternative(_Token("!"), _Token("&"), _Token("-!"), _Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(_Token("="))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
term = OneOrMore(Series(Option(_Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(_Token("|"), term))))
directive = Series(_Token("@"), symbol, _Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, _Token("="), expression, mandatory=1)
syntax = Series(Option(_RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -459,7 +459,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable.append(' ":_Token, :_RE": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -921,11 +921,11 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
return '_Token(' + node.content.replace('\\', r'\\') + ')'
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
return '_Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
......@@ -935,7 +935,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp('
else:
parser = 'RE('
parser = '_RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
......
......@@ -49,8 +49,8 @@ __all__ = ('Parser',
'PreprocessorToken',
'RegExp',
'Whitespace',
'RE',
'Token',
'_RE',
'_Token',
'mixin_comment',
# 'UnaryOperator',
# 'NaryOperator',
......@@ -356,7 +356,7 @@ class Grammar:
Example for direct instantiation of a grammar::
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number = _RE('\d+') + _RE('\.') + _RE('\d+') | _RE('\d+')
>>> number_parser = Grammar(number)
>>> number_parser("3.1416").content
'3.1416'
......@@ -392,9 +392,9 @@ class Grammar:
# parsers
expression = Forward()
INTEGER = RE('\\d+')
factor = INTEGER | Token("(") + expression + Token(")")
term = factor + ZeroOrMore((Token("*") | Token("/")) + factor)
expression.set(term + ZeroOrMore((Token("+") | Token("-")) + term))
factor = INTEGER | _Token("(") + expression + _Token(")")
term = factor + ZeroOrMore((_Token("*") | _Token("/")) + factor)
expression.set(term + ZeroOrMore((_Token("+") | _Token("-")) + term))
root__ = expression
Upon instantiation the parser objects are deep-copied to the
......@@ -454,9 +454,9 @@ class Grammar:
whitespace__: A parser for the implicit optional whitespace (or the
:class:zombie-parser if the default is empty). The default
whitespace will be used by parsers :class:`Token` and, if no
whitespace will be used by parsers :class:`_Token` and, if no
other parsers are passed to its constructor, by parser
:class:`RE`. It can also be place explicitly in the
:class:`_RE`. It can also be place explicitly in the
EBNF-Grammar via the "~"-sign.
wsp_left_parser__: The same as ``whitespace`` for
......@@ -555,7 +555,7 @@ class Grammar:
class Grammar(Grammar):
...
symbol = RE('(?!\\d)\\w+')
symbol = _RE('(?!\\d)\\w+')
After the call of this method symbol.name == "symbol"
holds. Names assigned via the ``name``-parameter of the
......@@ -836,7 +836,7 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
#
# Token and Regular Expression parser classes (i.e. leaf classes)
# _Token and Regular Expression parser classes (i.e. leaf classes)
#
########################################################################
......@@ -969,6 +969,19 @@ class Whitespace(RegExp):
assert WHITESPACE_PTYPE == ":Whitespace"
def RE(regexp, wL=None, wR=None) -> Series:
def rxp(regex):
return regex if isinstance(regex, RegExp) else RegExp(regex)
if wL is None and wR is None:
return rxp(regexp)
elif wL is None:
return Series(rxp(regexp), rxp(wR))
elif wR is None:
return Series(rxp(wL), rxp(regexp))
else:
return Series(rxp(wL), rxp(regexp), rxp(wR))
#######################################################################
#######################################################################
#
......@@ -978,7 +991,7 @@ class Whitespace(RegExp):
# With the constructor of the Grammar class (see the instantiations of
# the Whitespace-class, there).
#
# That is all the more regrettable, as class RE basically just
# That is all the more regrettable, as class _RE basically just
# introduces syntactical sugar for
#
# Series(whitespace__, RegExp('something'), whitespace__)
......@@ -991,8 +1004,10 @@ class Whitespace(RegExp):
######################################################################
class RE(Parser):
class _RE(Parser):
r"""
DEPRECATED
Regular Expressions with optional leading or trailing whitespace.
The RE-parser parses pieces of text that match a given regular
......@@ -1008,7 +1023,7 @@ class RE(Parser):
Example (allowing whitespace on the right hand side, but not on
the left hand side of a regular expression)::
>>> word = RE(r'\w+', wR=r'\s*')
>>> word = _RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word)
>>> result = parser('Haus ')
>>> result.content
......@@ -1024,7 +1039,7 @@ class RE(Parser):
"""
def __init__(self, regexp, wL=None, wR=None, name: str='') -> None:
r"""Constructor for class RE.
r"""Constructor for class _RE.
Args:
regexp (str or regex object): The regular expression to be
......@@ -1091,17 +1106,19 @@ class RE(Parser):
return RegExp(arg)
class Token(RE):
class _Token(_RE):
"""
Class Token parses simple strings. Any regular regular expression
DEPRECATED!
Class _Token parses simple strings. Any regular regular expression
commands will be interpreted as simple sequence of characters.
Other than that class Token is essentially a renamed version of
class RE. Because tokens often have a particular semantic different
Other than that class _Token is essentially a renamed version of
class _RE. Because tokens often have a particular semantic different
from other REs, parsing them with a separate parser class allows to
distinguish them by their parser type.
"""
assert TOKEN_PTYPE == ":Token"
assert TOKEN_PTYPE == ":_Token"
def __init__(self, token: str, wL=None, wR=None, name: str = '') -> None:
self.token = token
......@@ -1196,7 +1213,7 @@ class Option(UnaryOperator):
Examples::
>>> number = Option(Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+'))
>>> number = Option(_Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content
'3.14159'
>>> Grammar(number)('3.14159').structure
......@@ -1237,7 +1254,7 @@ class ZeroOrMore(Option):
Examples::
>>> sentence = ZeroOrMore(RE(r'\w+,?')) + Token('.')
>>> sentence = ZeroOrMore(_RE(r'\w+,?')) + _Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> Grammar(sentence)('.').content # an empty sentence also matches
......@@ -1280,7 +1297,7 @@ class OneOrMore(UnaryOperator):
Examples::
>>> sentence = OneOrMore(RE(r'\w+,?')) + Token('.')
>>> sentence = OneOrMore(_RE(r'\w+,?')) + _Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> str(Grammar(sentence)('.')) # an empty sentence also matches
......@@ -1331,7 +1348,7 @@ class Series(NaryOperator):
Example::
>>> variable_name = RegExp('(?!\d)\w') + RE('\w*')
>>> variable_name = RegExp('(?!\d)\w') + _RE('\w*')
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> str(Grammar(variable_name)('1_variable'))
......@@ -1397,7 +1414,7 @@ class Series(NaryOperator):
+ [parser.repr for parser in self.parsers[self.mandatory:]])
# The following operator definitions add syntactical sugar, so one can write:
# `RE('\d+') + Optional(RE('\.\d+)` instead of `Series(RE('\d+'), Optional(RE('\.\d+))`
# `_RE('\d+') + Optional(_RE('\.\d+)` instead of `Series(_RE('\d+'), Optional(_RE('\.\d+))`
@staticmethod
def combined_mandatory(left: Parser, right: Parser):
......@@ -1444,12 +1461,12 @@ class Alternative(NaryOperator):
are broken by selecting the first match.::
# the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> number = _RE('\d+') | _RE('\d+') + _RE('\.') + _RE('\d+')
>>> str(Grammar(number)("3.1416"))
'3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> '
# the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number = _RE('\d+') + _RE('\.') + _RE('\d+') | _RE('\d+')
>>> Grammar(number)("3.1416").content
'3.1416'
......@@ -1480,8 +1497,8 @@ class Alternative(NaryOperator):
return self
# The following operator definitions add syntactical sugar, so one can write:
# `RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')` instead of:
# `Alternative(Series(RE('\d+'), RE('\.'), RE('\d+')), RE('\d+'))`
# `_RE('\d+') + _RE('\.') + _RE('\d+') | _RE('\d+')` instead of:
# `Alternative(Series(_RE('\d+'), _RE('\.'), _RE('\d+')), _RE('\d+'))`
def __or__(self, other: Parser) -> 'Alternative':
other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
......@@ -1508,7 +1525,7 @@ class AllOf(NaryOperator):
Example::
>>> prefixes = AllOf(Token("A"), Token("B"))
>>> prefixes = AllOf(_Token("A"), _Token("B"))
>>> Grammar(prefixes)('A B').content
'A B'
>>> Grammar(prefixes)('B A').content
......@@ -1559,7 +1576,7 @@ class SomeOf(NaryOperator):
Example::
>>> prefixes = SomeOf(Token("A"), Token("B"))
>>> prefixes = SomeOf(_Token("A"), _Token("B"))
>>> Grammar(prefixes)('A B').content
'A B'
>>> Grammar(prefixes)('B A').content
......@@ -1689,7 +1706,7 @@ class NegativeLookahead(Lookahead):
class Lookbehind(FlowOperator):
"""
Matches, if the contained parser would match backwards. Requires
the contained parser to be a RegExp, RE, PlainText or Token parser.
the contained parser to be a RegExp, _RE, PlainText or _Token parser.
EXPERIMENTAL
"""
......@@ -1697,14 +1714,14 @@ class Lookbehind(FlowOperator):
p = parser
while isinstance(p, Synonym):
p = p.parser
assert isinstance(p, RegExp) or isinstance(p, PlainText) or isinstance(p, RE), str(type(p))
assert isinstance(p, RegExp) or isinstance(p, PlainText) or isinstance(p, _RE), str(type(p))
self.regexp = None
self.text = None
if isinstance(p, RE):
if isinstance(cast(RE, p).main, RegExp):
self.regexp = cast(RegExp, cast(RE, p).main).regexp
if isinstance(p, _RE):
if isinstance(cast(_RE, p).main, RegExp):
self.regexp = cast(RegExp, cast(_RE, p).main).regexp
else: # p.main is of type PlainText
self.text = cast(PlainText, cast(RE, p).main).text
self.text = cast(PlainText, cast(_RE, p).main).text
elif isinstance(p, RegExp):
self.regexp = cast(RegExp, p).regexp
else: # p is of type PlainText
......@@ -1878,7 +1895,7 @@ class Synonym(UnaryOperator):
Otherwise the first line could not be represented by any parser
class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
_RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
"""
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
......@@ -1905,10 +1922,10 @@ class Forward(Parser):
INTEGER = /\d+/~
'''
expression = Forward()
INTEGER = RE('\\d+')
factor = INTEGER | Token("(") + expression + Token(")")
term = factor + ZeroOrMore((Token("*") | Token("/")) + factor)
expression.set(term + ZeroOrMore((Token("+") | Token("-")) + term))
INTEGER = _RE('\\d+')
factor = INTEGER | _Token("(") + expression + _Token(")")
term = factor + ZeroOrMore((_Token("*") | _Token("/")) + factor)
expression.set(term + ZeroOrMore((_Token("+") | _Token("-")) + term))
root__ = expression
"""
......
......@@ -112,7 +112,7 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace'
PLAINTEXT_PTYPE = ':PlainText'
TOKEN_PTYPE = ':Token'
TOKEN_PTYPE = ':_Token'
class MockParser(ParserBase):
......
......@@ -637,7 +637,7 @@ can easily write your own. How does this look like? ::
"part": [],
"WORD": [],
"EOF": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......@@ -654,7 +654,7 @@ As you can see, the transformation-table contains an entry for every known
parser, i.e. "document", "sentence", "part", "WORD", "EOF". (If any of these are
missing in the table of your ``poetryCompiler.py``, add them now!) In the
template you'll also find transformations for two anonymous parsers, i.e.
":Token" and ":RE" as well as some curious entries such as "*" and "+". The
":_Token" and ":_RE" as well as some curious entries such as "*" and "+". The
latter are considered to be "jokers". The transformations related to the
"+"-sign will be applied on any node, before any other transformation is
applied. In this case, all empty nodes will be removed first (transformation:
......@@ -722,10 +722,10 @@ Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields::
<WORD>shadow</WORD>
</part>
<:Series>
<:Token>
<:_Token>
<:PlainText>,</:PlainText>
<:Whitespace> </:Whitespace>
</:Token>
</:_Token>
<part>
<WORD>a</WORD>
...
......@@ -734,11 +734,11 @@ It starts to become more readable and concise, but there are sill some oddities.
Firstly, the Tokens that deliminate parts of sentences still contain whitespace.
Secondly, if several <part>-nodes follow each other in a <sentence>-node, the
<part>-nodes after the first one are enclosed by a <:Series>-node or even a
cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:Token>-nodes, have
cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:_Token>-nodes, have
can do the same trick as with the WORD-nodes::
":Token": [remove_whitespace, reduce_single_child],
":RE": reduce_single_child,
":_Token": [remove_whitespace, reduce_single_child],
":_RE": reduce_single_child,
As to the nested structure of the <part>-nodes within the <sentence>-node, this
a rather typical case of syntactic artefacts that can be found in concrete
......@@ -807,7 +807,7 @@ Now that everything is set, let's have a look at the result::
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Token>,</:Token>
<:_Token>,</:_Token>
<part>
<WORD>a</WORD>
<WORD>poor</WORD>
......@@ -816,8 +816,8 @@ Now that everything is set, let's have a look at the result::
That is much better. There is but one slight blemish in the output: While all
nodes left a named nodes, i.e. nodes associated with a named parser, there are a
few anonymous <:Token> nodes. Here is a little exercise: Do away with those
<:Token>-nodes by replacing them by something semantically more meaningful.
few anonymous <:_Token> nodes. Here is a little exercise: Do away with those
<:_Token>-nodes by replacing them by something semantically more meaningful.
Hint: Add a new symbol "delimiter" in the grammar definition "poetry.ebnf". An
alternative strategy to extending the grammar would be to use the
``replace_parser`` operator. Which of the strategy is the better one? Explain
......
......@@ -16,8 +16,8 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
......@@ -72,12 +72,12 @@ class ArithmeticGrammar(Grammar):
wspL__ = ''
wspR__ = WSP__
test = Series(digit, constant, variable)
digit.set(Alternative(Token("0"), Token("1"), Token("..."), Token("9")))
digit.set(Alternative(_Token("0"), _Token("1"), _Token("..."), _Token("9")))
constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(Token("x"), Token("y"), Token("z")))
factor = Alternative(constant, variable, Series(Token("("), expression, Token(")")))
term = Series(factor, ZeroOrMore(Series(Alternative(Token("*"), Token("/")), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Token("+"), Token("-")), term))))
variable.set(Alternative(_Token("x"), _Token("y"), _Token("z")))
factor = Alternative(constant, variable, Series(_Token("("), expression, _Token(")")))
term = Series(factor, ZeroOrMore(Series(Alternative(_Token("*"), _Token("/")), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(_Token("+"), _Token("-")), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......@@ -106,7 +106,7 @@ Arithmetic_AST_transformation_table = {
"constant": [],
"digit": [replace_or_reduce],
"test": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -19,8 +19,8 @@ sys.path.extend(['../../', '../', './'])
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationDict, Whitespace, \
......@@ -114,21 +114,21 @@ class BibTeXGrammar(Grammar):
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), RE('(?i)(?=%)')))
NO_BLANK_STRING = RE('(?i)[^ \\t\\n,%]+')
WORD_ = RE('(?i)\\w+')
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), _RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), _RE('(?i)(?=%)')))
NO_BLANK_STRING = _RE('(?i)[^ \\t\\n,%]+')
WORD_ = _RE('(?i)\\w+')
WORD = RegExp('(?i)\\w+')
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Token("{"), text, Token("}")))))
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(_Token("{"), text, _Token("}")))))
plain_content = Synonym(COMMA_TERMINATED_STRING)
content = Alternative(Series(Token("{"), text, Token("}")), plain_content)
content = Alternative(Series(_Token("{"), text, _Token("}")), plain_content)
field = Synonym(WORD_)
key = Synonym(NO_BLANK_STRING)
type = Synonym(WORD)
entry = Series(RegExp('(?i)@'), type, Token("{"), key, ZeroOrMore(Series(Token(","), field, Token("="), content, mandatory=2)), Token("}"), mandatory=5)
comment = Series(Token("@Comment{"), text, Token("}"), mandatory=2)
entry = Series(RegExp('(?i)@'), type, _Token("{"), key, ZeroOrMore(Series(_Token(","), field, _Token("="), content, mandatory=2)), _Token("}"), mandatory=5)
comment = Series(_Token("@Comment{"), text, _Token("}"), mandatory=2)
pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
preamble = Series(Token("@Preamble{"), RegExp('(?i)"'), pre_code, RE('(?i)"'), Token("}"), mandatory=4)
preamble = Series(_Token("@Preamble{"), RegExp('(?i)"'), pre_code, _RE('(?i)"'), _Token("}"), mandatory=4)
bibliography = ZeroOrMore(Alternative(preamble, comment, entry))
root__ = bibliography
......@@ -162,7 +162,7 @@ BibTeX_AST_transformation_table = {
"content": [replace_or_reduce],
"plain_content": [],
"text": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -191,7 +191,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Option', 'mixin_comment',
name_space = {k: module_vars[k] for k in {'RegExp', '_RE', '_Token', 'Required', 'Option', 'mixin_comment',
'ZeroOrMore', 'OneOrMore', 'Sequence', 'Alternative', 'Forward',
'NegativeLookahead', 'PositiveLookahead', 'PreprocessorToken', 'Grammar'}}
exec(code, name_space)
......
......@@ -269,7 +269,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Option', 'mixin_comment',
name_space = {k: module_vars[k] for k in {'RegExp', '_RE', '_Token', 'Required', 'Option', 'mixin_comment',
'ZeroOrMore', 'Sequence', 'Alternative', 'Forward'}}
exec(code, name_space)
parser = name_space['Grammar']
......
......@@ -16,8 +16,8 @@ except ImportError:
import re
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
......@@ -107,25 +107,25 @@ class EBNFGrammar(Grammar):
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1)