Commit f564f606 authored by di68kap's avatar di68kap
Browse files

sync commit

parents f2f3e6e2 72bec2ac
...@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped ...@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be: during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token" 1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases) 2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes 3. empty Nodes
...@@ -143,8 +143,8 @@ parsers: ...@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the "contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["), DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, Token("]"), mandatory=1))` expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`, - For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option = e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
...@@ -90,7 +90,7 @@ except ImportError: ...@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\ grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\ Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
...@@ -30,7 +30,7 @@ from functools import partial ...@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
...@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar): ...@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while' literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly. | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~ regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing # '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly. # whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace whitespace = /~/~ # implicit or default whitespace
...@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar): ...@@ -126,38 +126,41 @@ class EBNFGrammar(Grammar):
EOF = !/./ EOF = !/./
""" """
expression = Forward() expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP__) wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+')))) list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = RE('~') whitespace = Series(RegExp('~'), wsp__)
regexp = RE('~?/(?:\\\\/|[^/])*?/~?') regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = RE('`(?:[^"]|\\\\")*?`') plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'")) literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = RE('(?!\\d)\\w+') symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Token("["), expression, Token("]"), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+")) oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1) unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1) group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":")) retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&")) flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option) Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor)) term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term)))) expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1) directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
definition = Series(symbol, Token("="), expression, mandatory=1) Alternative(regexp, literal, list_), mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2) definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax root__ = syntax
...@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler): ...@@ -382,9 +385,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process regular expressions found in the current parsing process
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__" WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__" RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__" WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD} RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \ AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation." "Potentially due to erroneous AST transformation."
...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler): ...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'): elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]' transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations) transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,') transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', ''] transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable) return '\n'.join(transtable)
...@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler): ...@@ -778,7 +781,6 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal. name for the particular non-terminal.
""" """
arguments = [self.compile(r) for r in node.children] + custom_args arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')' return parser_class + '(' + ', '.join(arguments) + ')'
...@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler): ...@@ -921,12 +923,22 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str: def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')' center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str: def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \ tk = node.content.replace('\\', r'\\')
+ ", wL='', wR='')" rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
...@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler): ...@@ -935,7 +947,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/': if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp(' parser = 'RegExp('
else: else:
parser = 'RE(' parser = '_RE('
if rx[:2] == '~/': if rx[:2] == '~/':
if not 'left' in self.directives['literalws']: if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name name = ['wL=' + self.WHITESPACE_KEYWORD] + name
...@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler): ...@@ -961,7 +973,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str: def on_whitespace(self, node: Node) -> str:
return 'whitespace__' return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]: def on_list_(self, node) -> Set[str]:
......
This diff is collapsed.
...@@ -35,7 +35,6 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl ...@@ -35,7 +35,6 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl
__all__ = ('ParserBase', __all__ = ('ParserBase',
'WHITESPACE_PTYPE', 'WHITESPACE_PTYPE',
'PLAINTEXT_PTYPE',
'TOKEN_PTYPE', 'TOKEN_PTYPE',
'MockParser', 'MockParser',
'ZombieParser', 'ZombieParser',
...@@ -62,11 +61,11 @@ class ParserBase: ...@@ -62,11 +61,11 @@ class ParserBase:
It is defined here, because Node objects require a parser object It is defined here, because Node objects require a parser object
for instantiation. for instantiation.
""" """
__slots__ = '_name', '_ptype' __slots__ = 'name', 'ptype'
def __init__(self, name=''): # , pbases=frozenset()): def __init__(self,): # , pbases=frozenset()):
self._name = name # type: str self.name = '' # type: str
self._ptype = ':' + self.__class__.__name__ # type: str self.ptype = ':' + self.__class__.__name__ # type: str
def __repr__(self): def __repr__(self):
return self.name + self.ptype return self.name + self.ptype
...@@ -77,17 +76,17 @@ class ParserBase: ...@@ -77,17 +76,17 @@ class ParserBase:
def __call__(self, text: StringView) -> Tuple[Optional['Node'], StringView]: def __call__(self, text: StringView) -> Tuple[Optional['Node'], StringView]:
return None, text return None, text
@property # @property
def name(self): # def name(self):
"""Returns the name of the parser or the empty string '' for unnamed # """Returns the name of the parser or the empty string '' for unnamed
parsers.""" # parsers."""
return self._name # return self._name
#
@property # @property
def ptype(self) -> str: # def ptype(self) -> str:
"""Returns the type of the parser. By default this is the parser's # """Returns the type of the parser. By default this is the parser's
class name preceded by a colon, e.g. ':ZeroOrMore'.""" # class name preceded by a colon, e.g. ':ZeroOrMore'."""
return self._ptype # return self._ptype
@property @property
def repr(self) -> str: def repr(self) -> str:
...@@ -111,7 +110,6 @@ class ParserBase: ...@@ -111,7 +110,6 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace' WHITESPACE_PTYPE = ':Whitespace'
PLAINTEXT_PTYPE = ':PlainText'
TOKEN_PTYPE = ':Token' TOKEN_PTYPE = ':Token'
...@@ -129,8 +127,10 @@ class MockParser(ParserBase): ...@@ -129,8 +127,10 @@ class MockParser(ParserBase):
def __init__(self, name='', ptype=''): # , pbases=frozenset()): def __init__(self, name='', ptype=''): # , pbases=frozenset()):
assert not ptype or ptype[0] == ':' assert not ptype or ptype[0] == ':'
super().__init__(name) super().__init__()
self._ptype = ptype or ':' + self.__class__.__name__ self.name = name
if ptype:
self.ptype = ptype # or ':' + self.__class__.__name__
class ZombieParser(MockParser): class ZombieParser(MockParser):
...@@ -147,9 +147,10 @@ class ZombieParser(MockParser): ...@@ -147,9 +147,10 @@ class ZombieParser(MockParser):
__slots__ = () __slots__ = ()
def __init__(self): def __init__(self):
super(ZombieParser, self).__init__("__ZOMBIE__") super(ZombieParser, self).__init__()
assert not self.__class__.alive, "There can be only one!" assert not self.__class__.alive, "There can be only one!"
assert self.__class__ == ZombieParser, "No derivatives, please!" assert self.__class__ == ZombieParser, "No derivatives, please!"
self.name = "__ZOMBIE__"
self.__class__.alive = True self.__class__.alive = True
def __copy__(self): def __copy__(self):
...@@ -935,8 +936,8 @@ def parse_xml(xml: str) -> Node: ...@@ -935,8 +936,8 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source. Generates a tree of nodes from a (Pseudo-)XML-source.
""" """
xml = StringView(xml) xml = StringView(xml)
PlainText = MockParser('', PLAINTEXT_PTYPE) PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {PLAINTEXT_PTYPE: PlainText} mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]: def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice """Parses a sqeuence of XML-Attributes. Returns the string-slice
...@@ -996,7 +997,7 @@ def parse_xml(xml: str) -> Node: ...@@ -996,7 +997,7 @@ def parse_xml(xml: str) -> Node:
result.append(child) result.append(child)
s, closing_tagname = parse_closing_tag(s) s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname assert tagname == closing_tagname
if len(result) == 1 and result[0].parser.ptype == PLAINTEXT_PTYPE: if len(result) == 1 and result[0].parser.ptype == TOKEN_PTYPE:
result = result[0].result result = result[0].result
else: else:
result = tuple(result) result = tuple(result)
......
...@@ -97,7 +97,8 @@ __all__ = ('TransformationDict', ...@@ -97,7 +97,8 @@ __all__ = ('TransformationDict',
'assert_content', 'assert_content',
'error_on', 'error_on',
'warn_on', 'warn_on',
'assert_has_children') 'assert_has_children',
'peek')
TransformationProc = Callable[[List[Node]], None] TransformationProc = Callable[[List[Node]], None]
...@@ -409,19 +410,21 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo ...@@ -409,19 +410,21 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
whitespace-tokens will be ignored. In case an empty set of tokens is passed, whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match. any token is a match.
""" """
def stripped(nd: Node) -> str: # def stripped(nd: Node) -> str:
"""Removes leading and trailing whitespace-nodes from content.""" # """Removes leading and trailing whitespace-nodes from content."""
# assert node.parser.ptype == TOKEN_PTYPE # # assert node.parser.ptype == TOKEN_PTYPE
if nd.children: # if nd.children:
i, k = 0, len(nd.children) # i, k = 0, len(nd.children)
while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE: # while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
i += 1 # i += 1
while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE: # while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
k -= 1 # k -= 1
return "".join(child.content for child in node.children[i:k]) # return "".join(child.content for child in node.children[i:k])
return nd.content # return nd.content
# node = context[-1]
# return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
node = context[-1] node = context[-1]
return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens) return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.content in tokens)
@transformation_factory(collections.abc.Set) @transformation_factory(collections.abc.Set)
...@@ -983,3 +986,8 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]): ...@@ -983,3 +986,8 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if child.tag_name in child_tags: if child.tag_name in child_tags:
context[0].new_error(node, 'Element "%s" cannot be nested inside "%s".' % context[0].new_error(node, 'Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name)) (child.parser.name, node.parser.name))
def peek(context: List[Node]):
"""For debugging: Prints the last node in the context as S-expression."""
print(context[-1].as_sxpr())
...@@ -637,7 +637,7 @@ can easily write your own. How does this look like? :: ...@@ -637,7 +637,7 @@ can easily write your own. How does this look like? ::
"part": [], "part": [],
"WORD": [], "WORD": [],
"EOF": [], "EOF": [],
":Token, :RE": reduce_single_child, ":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child "*": replace_by_single_child
} }
...@@ -654,7 +654,7 @@ As you can see, the transformation-table contains an entry for every known ...@@ -654,7 +654,7 @@ As you can see, the transformation-table contains an entry for every known
parser, i.e. "document", "sentence", "part", "WORD", "EOF". (If any of these are parser, i.e. "document", "sentence", "part", "WORD", "EOF". (If any of these are
missing in the table of your ``poetryCompiler.py``, add them now!) In the missing in the table of your ``poetryCompiler.py``, add them now!) In the
template you'll also find transformations for two anonymous parsers, i.e. template you'll also find transformations for two anonymous parsers, i.e.
":Token" and ":RE" as well as some curious entries such as "*" and "+". The ":_Token" and ":_RE" as well as some curious entries such as "*" and "+". The
latter are considered to be "jokers". The transformations related to the latter are considered to be "jokers". The transformations related to the
"+"-sign will be applied on any node, before any other transformation is "+"-sign will be applied on any node, before any other transformation is
applied. In this case, all empty nodes will be removed first (transformation: applied. In this case, all empty nodes will be removed first (transformation:
...@@ -722,10 +722,10 @@ Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields:: ...@@ -722,10 +722,10 @@ Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields::
<WORD>shadow</WORD> <WORD>shadow</WORD>
</part> </part>
<:Series> <:Series>
<:Token> <:_Token>
<:PlainText>,</:PlainText> <:PlainText>,</:PlainText>
<:Whitespace> </:Whitespace> <:Whitespace> </:Whitespace>
</:Token> </:_Token>
<part> <part>
<WORD>a</WORD> <WORD>a</WORD>
... ...
...@@ -734,11 +734,11 @@ It starts to become more readable and concise, but there are sill some oddities. ...@@ -734,11 +734,11 @@ It starts to become more readable and concise, but there are sill some oddities.
Firstly, the Tokens that deliminate parts of sentences still contain whitespace. Firstly, the Tokens that deliminate parts of sentences still contain whitespace.
Secondly, if several <part>-nodes follow each other in a <sentence>-node, the Secondly, if several <part>-nodes follow each other in a <sentence>-node, the
<part>-nodes after the first one are enclosed by a <:Series>-node or even a <part>-nodes after the first one are enclosed by a <:Series>-node or even a
cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:Token>-nodes, have cascade of <:ZeroOrMore> and <:Series>-nodes. As for the <:_Token>-nodes, have
can do the same trick as with the WORD-nodes:: can do the same trick as with the WORD-nodes::
":Token": [remove_whitespace, reduce_single_child], ":_Token": [remove_whitespace, reduce_single_child],
":RE": reduce_single_child, ":_RE": reduce_single_child,
As to the nested structure of the <part>-nodes within the <sentence>-node, this As to the nested structure of the <part>-nodes within the <sentence>-node, this
a rather typical case of syntactic artefacts that can be found in concrete a rather typical case of syntactic artefacts that can be found in concrete
...@@ -807,7 +807,7 @@ Now that everything is set, let's have a look at the result:: ...@@ -807,7 +807,7 @@ Now that everything is set, let's have a look at the result::
<WORD>walking</WORD> <WORD>walking</WORD>
<WORD>shadow</WORD> <WORD>shadow</WORD>
</part> </part>
<:Token>,</:Token> <:_Token>,</:_Token>
<part> <part>
<WORD>a</WORD> <WORD>a</WORD>
<WORD>poor</WORD> <WORD>poor</WORD>
...@@ -816,8 +816,8 @@ Now that everything is set, let's have a look at the result:: ...@@ -816,8 +816,8 @@ Now that everything is set, let's have a look at the result::
That is much better. There is but one slight blemish in the output: While all That is much better. There is but one slight blemish in the output: While all
nodes left a named nodes, i.e. nodes associated with a named parser, there are a nodes left a named nodes, i.e. nodes associated with a named parser, there are a
few anonymous <:Token> nodes. Here is a little exercise: Do away with those few anonymous <:_Token> nodes. Here is a little exercise: Do away with those
<:Token>-nodes by replacing them by something semantically more meaningful. <:_Token>-nodes by replacing them by something semantically more meaningful.
Hint: Add a new symbol "delimiter" in the grammar definition "poetry.ebnf". An Hint: Add a new symbol "delimiter" in the grammar definition "poetry.ebnf". An
alternative strategy to extending the grammar would be to use the alternative strategy to extending the grammar would be to use the
``replace_parser`` operator. Which of the strategy is the better one? Explain ``replace_parser`` operator. Which of the strategy is the better one? Explain
......
...@@ -17,11 +17,11 @@ except ImportError: ...@@ -17,11 +17,11 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \ from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \ Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \ last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \ Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, merge_children, is_anonymous, \ traverse, remove_children_if, merge_children, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \ reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \ is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
...@@ -64,20 +64,21 @@ class ArithmeticGrammar(Grammar): ...@@ -64,20 +64,21 @@ class ArithmeticGrammar(Grammar):
digit = Forward() digit = Forward()
expression = Forward() expression = Forward()
variable = Forward() variable = Forward()
source_hash__ = "3064cea87c9ceb59ade35566a31c3d75" source_hash__ = "385a94a70cb629d46a13e15305692667"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'' COMMENT__ = r''
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
test = Series(digit, constant, variable) test = Series(digit, constant, variable)
digit.set(Alternative(Token("0"), Token("1"), Token("..."), Token("9"))) digit.set(Alternative(Series(Token("0"), wsp__), Series(Token("1"), wsp__), Series(Token("..."), wsp__), Series(Token("9"), wsp__)))
constant.set(Series(digit, ZeroOrMore(digit))) constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(Token("x"), Token("y"), Token("z"))) variable.set(Alternative(Series(Token("x"), wsp__), Series(Token("y"), wsp__), Series(Token("z"), wsp__)))
factor = Alternative(constant, variable, Series(Token("("), expression, Token(")"))) factor = Alternative(constant, variable, Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__)))
term = Series(factor, ZeroOrMore(Series(Alternative(Token("*"), Token("/")), factor))) term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), wsp__), Series(Token("/"), wsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Token("+"), Token("-")), term)))) expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), wsp__), Series(Token("-"), wsp__