Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f564f606 authored by di68kap's avatar di68kap
Browse files

sync commit

parents f2f3e6e2 72bec2ac
......@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token"
1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes
......@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["),
expression, Token("]"), mandatory=1))`
DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
......@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
......@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
......@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
......@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+"))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__"
WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
......@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')'
......@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
tk = node.content.replace('\\', r'\\')
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str:
......@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp('
else:
parser = 'RE('
parser = '_RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
......@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str:
return 'whitespace__'
return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]:
......
This diff is collapsed.
......@@ -35,7 +35,6 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl
__all__ = ('ParserBase',
'WHITESPACE_PTYPE',
'PLAINTEXT_PTYPE',
'TOKEN_PTYPE',
'MockParser',
'ZombieParser',
......@@ -62,11 +61,11 @@ class ParserBase:
It is defined here, because Node objects require a parser object
for instantiation.
"""
__slots__ = '_name', '_ptype'
__slots__ = 'name', 'ptype'
def __init__(self, name=''): # , pbases=frozenset()):
self._name = name # type: str
self._ptype = ':' + self.__class__.__name__ # type: str
def __init__(self,): # , pbases=frozenset()):
self.name = '' # type: str
self.ptype = ':' + self.__class__.__name__ # type: str
def __repr__(self):
return self.name + self.ptype
......@@ -77,17 +76,17 @@ class ParserBase:
def __call__(self, text: StringView) -> Tuple[Optional['Node'], StringView]:
return None, text
@property
def name(self):
"""Returns the name of the parser or the empty string '' for unnamed
parsers."""
return self._name
@property
def ptype(self) -> str:
"""Returns the type of the parser. By default this is the parser's
class name preceded by a colon, e.g. ':ZeroOrMore'."""
return self._ptype
# @property
# def name(self):
# """Returns the name of the parser or the empty string '' for unnamed
# parsers."""
# return self._name
#
# @property
# def ptype(self) -> str:
# """Returns the type of the parser. By default this is the parser's
# class name preceded by a colon, e.g. ':ZeroOrMore'."""
# return self._ptype
@property
def repr(self) -> str:
......@@ -111,7 +110,6 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace'
PLAINTEXT_PTYPE = ':PlainText'
TOKEN_PTYPE = ':Token'
......@@ -129,8 +127,10 @@ class MockParser(ParserBase):
def __init__(self, name='', ptype=''): # , pbases=frozenset()):
assert not ptype or ptype[0] == ':'
super().__init__(name)
self._ptype = ptype or ':' + self.__class__.__name__
super().__init__()
self.name = name
if ptype:
self.ptype = ptype # or ':' + self.__class__.__name__
class ZombieParser(MockParser):
......@@ -147,9 +147,10 @@ class ZombieParser(MockParser):
__slots__ = ()
def __init__(self):
super(ZombieParser, self).__init__("__ZOMBIE__")
super(ZombieParser, self).__init__()
assert not self.__class__.alive, "There can be only one!"
assert self.__class__ == ZombieParser, "No derivatives, please!"
self.name = "__ZOMBIE__"
self.__class__.alive = True
def __copy__(self):
......@@ -935,8 +936,8 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml = StringView(xml)
PlainText = MockParser('', PLAINTEXT_PTYPE)
mock_parsers = {PLAINTEXT_PTYPE: PlainText}
PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
......@@ -996,7 +997,7 @@ def parse_xml(xml: str) -> Node:
result.append(child)
s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname
if len(result) == 1 and result[0].parser.ptype == PLAINTEXT_PTYPE:
if len(result) == 1 and result[0].parser.ptype == TOKEN_PTYPE:
result = result[0].result
else:
result = tuple(result)
......
......@@ -97,7 +97,8 @@ __all__ = ('TransformationDict',
'assert_content',
'error_on',
'warn_on',
'assert_has_children')
'assert_has_children',
'peek')
TransformationProc = Callable[[List[Node]], None]
......@@ -409,19 +410,21 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match.
"""
def stripped(nd: Node) -> str:
"""Removes leading and trailing whitespace-nodes from content."""
# assert node.parser.ptype == TOKEN_PTYPE
if nd.children:
i, k = 0, len(nd.children)
while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
i += 1
while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
k -= 1
return "".join(child.content for child in node.children[i:k])
return nd.content
# def stripped(nd: Node) -> str:
# """Removes leading and trailing whitespace-nodes from content."""
# # assert node.parser.ptype == TOKEN_PTYPE
# if nd.children:
# i, k = 0, len(nd.children)
# while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
# i += 1
# while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
# k -= 1
# return "".join(child.content for child in node.children[i:k])
# return nd.content
# node = context[-1]
# return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
node = context[-1]
return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.content in tokens)
@transformation_factory(collections.abc.Set)
......@@ -983,3 +986,8 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
context[0].new_error(node, 'Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
def peek(context: List[Node]):
"""For debugging: Prints the last node in the context as S-expression."""
print(context[-1].as_sxpr())
......@@ -637,7 +637,7 @@ can easily write your own. How does this look like? ::
"part": [],
"WORD": [],
"EOF": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......@@ -654,7 +654,7 @@ As you can see, the transformation-table contains an entry for every known
parser, i.e. "document", "sentence", "part", "WORD", "EOF". (If any of these are
missing in the table of your ``poetryCompiler.py``, add them now!) In the
template you'll also find transformations for two anonymous parsers, i.e.
":Token" and ":RE" as well as some curious entries such as "*" and "+". The
":_Token" and ":_RE" as well as some curious entries such as "*" and "+". The
latter are considered to be "jokers". The transformations related to the
"+"-sign will be applied on any node, before any other transformation is
applied. In this case, all empty nodes will be removed first (transformation:
......@@ -722,10 +722,10 @@ Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields::
<WORD>shadow</WORD>
</part>
<:Series>
<:Token>
<:_Token>
<:PlainText>,</:PlainText>
<:Whitespace> </:Whitespace>
</:Token>
</:_Token>
<part>
<WORD>a</WORD>
...
......@@ -734,11 +734,11 @@ It starts to become more readable and concise, but there are sill some oddities.
Firstly, the Tokens that deliminate parts of sentences still contain whitespace.
Secondly, if several <part>-nodes follow each other in a <sentence>-node, the
<part>-nodes after the first one are enclosed by a <:Series>-node or even a
cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:Token>-nodes, have
cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:_Token>-nodes, have
can do the same trick as with the WORD-nodes::
":Token": [remove_whitespace, reduce_single_child],
":RE": reduce_single_child,
":_Token": [remove_whitespace, reduce_single_child],
":_RE": reduce_single_child,
As to the nested structure of the <part>-nodes within the <sentence>-node, this
a rather typical case of syntactic artefacts that can be found in concrete
......@@ -807,7 +807,7 @@ Now that everything is set, let's have a look at the result::
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Token>,</:Token>
<:_Token>,</:_Token>
<part>
<WORD>a</WORD>
<WORD>poor</WORD>
......@@ -816,8 +816,8 @@ Now that everything is set, let's have a look at the result::
That is much better. There is but one slight blemish in the output: While all
nodes left a named nodes, i.e. nodes associated with a named parser, there are a
few anonymous <:Token> nodes. Here is a little exercise: Do away with those
<:Token>-nodes by replacing them by something semantically more meaningful.
few anonymous <:_Token> nodes. Here is a little exercise: Do away with those
<:_Token>-nodes by replacing them by something semantically more meaningful.
Hint: Add a new symbol "delimiter" in the grammar definition "poetry.ebnf". An
alternative strategy to extending the grammar would be to use the
``replace_parser`` operator. Which of the strategy is the better one? Explain
......
......@@ -17,11 +17,11 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, merge_children, is_anonymous, \
traverse, remove_children_if, merge_children, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -64,20 +64,21 @@ class ArithmeticGrammar(Grammar):
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "3064cea87c9ceb59ade35566a31c3d75"
source_hash__ = "385a94a70cb629d46a13e15305692667"
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
test = Series(digit, constant, variable)
digit.set(Alternative(Token("0"), Token("1"), Token("..."), Token("9")))
digit.set(Alternative(Series(Token("0"), wsp__), Series(Token("1"), wsp__), Series(Token("..."), wsp__), Series(Token("9"), wsp__)))
constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(Token("x"), Token("y"), Token("z")))
factor = Alternative(constant, variable, Series(Token("("), expression, Token(")")))
term = Series(factor, ZeroOrMore(Series(Alternative(Token("*"), Token("/")), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Token("+"), Token("-")), term))))
variable.set(Alternative(Series(Token("x"), wsp__), Series(Token("y"), wsp__), Series(Token("z"), wsp__)))
factor = Alternative(constant, variable, Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__)))
term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), wsp__), Series(Token("/"), wsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), wsp__), Series(Token("-"), wsp__)), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......@@ -106,7 +107,7 @@ Arithmetic_AST_transformation_table = {
"constant": [],
"digit": [replace_or_reduce],
"test": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......
#!/usr/bin/python3
"""recompile_grammar.py - recompiles all pdf files in the current directoy
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
from DHParser import dsl
dsl.recompile_grammar('.', force=True)
......@@ -20,7 +20,7 @@ sys.path.extend(['../../', '../', './'])
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationDict, Whitespace, \
......@@ -106,29 +106,29 @@ class BibTeXGrammar(Grammar):
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
"""
text = Forward()
source_hash__ = "5ce8838ebbb255548cf3e14cd90bae6d"
source_hash__ = "534895885bfdddb19785f5d943b356a7"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), RE('(?i)(?=%)')))
NO_BLANK_STRING = RE('(?i)[^ \\t\\n,%]+')
WORD_ = RE('(?i)\\w+')
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD_ = Series(RegExp('(?i)\\w+'), wsp__)
WORD = RegExp('(?i)\\w+')
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Token("{"), text, Token("}")))))
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
plain_content = Synonym(COMMA_TERMINATED_STRING)
content = Alternative(Series(Token("{"), text, Token("}")), plain_content)
content = Alternative(Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)), plain_content)
field = Synonym(WORD_)
key = Synonym(NO_BLANK_STRING)
type = Synonym(WORD)
entry = Series(RegExp('(?i)@'), type, Token("{"), key, ZeroOrMore(Series(Token(","), field, Token("="), content, mandatory=2)), Token("}"), mandatory=5)
comment = Series(Token("@Comment{"), text, Token("}"), mandatory=2)
entry = Series(RegExp('(?i)@'), type, Series(Token("{"), wsp__), key, ZeroOrMore(Series(Series(Token(","), wsp__), field, Series(Token("="), wsp__), content, mandatory=2)), Series(Token("}"), wsp__), mandatory=5)
comment = Series(Series(Token("@Comment{"), wsp__), text, Series(Token("}"), wsp__), mandatory=2)
pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
preamble = Series(Token("@Preamble{"), RegExp('(?i)"'), pre_code, RE('(?i)"'), Token("}"), mandatory=4)
preamble = Series(Series(Token("@Preamble{"), wsp__), RegExp('(?i)"'), pre_code, RegExp('(?i)"'), wsp__, Series(Token("}"), wsp__), mandatory=5)
bibliography = ZeroOrMore(Alternative(preamble, comment, entry))
root__ = bibliography
......@@ -162,7 +162,7 @@ BibTeX_AST_transformation_table = {
"content": [replace_or_reduce],
"plain_content": [],
"text": [],
":Token, :RE": reduce_single_child,
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -12,17 +12,17 @@ Match-test "simple"
### AST
(content
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
)
)
Match-test "nested_braces"
......@@ -33,28 +33,28 @@ Match-test "nested_braces"
### AST
(content
(:Token
"{"
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
)
(text
(:Series
(:Token
"{"
)
(text