Commit 6b5ee7ca authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- minor changes

parent 19c83afc
......@@ -33,9 +33,9 @@ from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional,
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.configuration import THREAD_LOCALS, get_config_value
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, DropWhitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
GrammarError
from DHParser.parse import Grammar, mixin_comment, mixin_noempty, Forward, RegExp, \
DropWhitespace, NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, \
Token, GrammarError
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
......@@ -753,7 +753,10 @@ class EBNFCompiler(Compiler):
an empty string in case the node is neither regexp nor literal.
"""
if nd.tag_name == 'regexp':
search_regex = self._extract_regex(nd).replace(r'\~', self.directives.super_ws)
super_ws = self.directives.super_ws
noempty_ws = mixin_noempty(super_ws)
search_regex = self._extract_regex(nd)\
.replace(r'\~!', noempty_ws).replace(r'\~', super_ws)
return unrepr("re.compile(r'%s')" % search_regex)
elif nd.tag_name == 'literal':
s = nd.content[1:-1] # remove quotation marks
......
......@@ -61,6 +61,7 @@ __all__ = ('Parser',
'Whitespace',
'DropWhitespace',
'mixin_comment',
'mixin_noempty',
'MetaParser',
'UnaryParser',
'NaryParser',
......@@ -591,8 +592,8 @@ def mixin_comment(whitespace: str, comment: str) -> str:
return whitespace
def non_empty(whitespace: str) -> str:
"""
def mixin_noempty(whitespace: str) -> str:
r"""
Returns a regular expression pattern that matches only if the regular
expression pattern `whitespace` matches AND if the match is not empty.
......@@ -607,7 +608,11 @@ def non_empty(whitespace: str) -> str:
that ist / / or / / or /\t/ won't work, but / */ or /\s*/ or /\s+/
do work. There is no test for this. Fixed sizes regular expressions
run through `non_empty_ws` will not match at any more if they are applied
to the beginning or the middle of a sequence of whitespaces!!!
to the beginning or the middle of a sequence of whitespaces!
In order to be safe, you whitespace regular expressions should follow
the rule: "Whitespace cannot be followed by whitespace" or "Either
grab it all or leave it all".
:param whitespace: a regular expression pattern
:return: new regular expression pattern that does not match the empty
......
......@@ -614,17 +614,16 @@ that the output is rather verbose. Just looking at the beginning of the
output, we find::
<document>
<:ZeroOrMore>
<sentence>
<part>
<WORD>
<:RegExp>Life’s</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<WORD>
<:RegExp>but</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<sentence>
<part>
<WORD>
<:RegExp>Life’s</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<WORD>
<:RegExp>but</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
...
But why do we need to know all those details! Why would we need a
......@@ -665,14 +664,13 @@ rich set of predefined operators. Should these not suffice, you
can easily write your own. How does this look like? ::
poetry_AST_transformation_table = {
"<": remove_empty,
"document": [],
"sentence": [],
"part": [],
"WORD": [],
"EOF": [],
":Token": reduce_single_child,
"*": replace_by_single_child
"<": flatten,
"document": [],
"sentence": [],
"part": [],
"WORD": [],
"EOF": [],
"*": replace_by_single_child
}
You'll find this table in the script ``poetryCompiler.py``, which is also the
......@@ -747,22 +745,19 @@ in the compiler-script should be changed as follows::
Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields::
<document>
<:ZeroOrMore>
<sentence>
<part>
<WORD>Life’s</WORD>
<WORD>but</WORD>
<WORD>a</WORD>
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Series>
<:Token>
<:PlainText>,</:PlainText>
<:Whitespace> </:Whitespace>
</:Token>
<part>
<WORD>a</WORD>
<sentence>
<part>
<WORD>Life’s</WORD>
<WORD>but</WORD>
<WORD>a</WORD>
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Token>,</:Token>
<:Whitespace> </:Whitespace>
<part>
<WORD>a</WORD>
...
It starts to become more readable and concise, but there are sill some oddities.
......
......@@ -645,7 +645,8 @@ class TestCustomizedResumeParsing:
@comment = /(?:\/\*(?:.|\n)*?\*\/)/ # c-style comments
document = ~ { word }
# @ word_resume = /(?:(?:\s\~)|(?:\~(?<=\s)))(?=.)|$/
@word_resume = /(?=(.|\n))\~(?!\1)(?=.)|$/
# @word_resume = /(?=(.|\n))\~(?!\1)(?=.)|$/
@word_resume = /\~!(?=.)|$/
# @ word_resume = /\~(?=.)|$/
word = !EOF §/\w+/ ~
EOF = !/./
......
......@@ -46,10 +46,9 @@ class TestWhitespace:
def test_whitespace_comment_mangling(self):
pass
def test_non_emptify_version(self):
def test_non_empty_derivation(self):
pass
class TestParserError:
def test_parser_error_str(self):
pe = ParserError(Node('TAG', 'test').with_pos(0), StringView('Beispiel'), None, True)
......@@ -252,7 +251,7 @@ class TestRegex:
[+] # followed by a plus sign
\w* # possibly followed by more alpha chracters/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
result, messages, _ = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
......@@ -268,7 +267,7 @@ class TestRegex:
[+]
\w* /
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
result, messages, _ = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
......@@ -283,7 +282,7 @@ class TestRegex:
@ ignorecase = True
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
result, messages, _ = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
......@@ -299,7 +298,7 @@ class TestRegex:
@ ignorecase = False
regex = /alpha/
"""
result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
result, messages, _ = compile_source(mlregex, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
......@@ -320,7 +319,7 @@ class TestRegex:
test
\end{document}
"""
result, messages, syntax_tree = compile_source(
result, messages, _ = compile_source(
tokenlang, None, get_ebnf_grammar(), get_ebnf_transformer(),
get_ebnf_compiler("TokenTest"))
assert result
......@@ -339,8 +338,8 @@ class TestGrammar:
WORT = /[^ \t]+/~
LEERZEILE = /\n[ \t]*(?=\n)/~
"""
self.pyparser, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
self.pyparser, messages, _ = compile_source(grammar, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
assert self.pyparser
assert not messages
......@@ -397,11 +396,11 @@ class TestSeries:
series = "A" "B" "C" "D"
"""
parser = grammar_provider(lang)()
st = parser("ABCD");
st = parser("ABCD")
assert not st.error_flag
st = parser("A_CD");
st = parser("A_CD")
assert not st.error_flag
st = parser("AB_D");
st = parser("AB_D")
assert not st.error_flag
def test_mandatory(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment