Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
6b5ee7ca
Commit
6b5ee7ca
authored
Nov 04, 2019
by
Eckhart Arnold
Browse files
- minor changes
parent
19c83afc
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
6b5ee7ca
...
...
@@ -33,9 +33,9 @@ from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional,
from
DHParser.compile
import
CompilerError
,
Compiler
,
compile_source
,
visitor_name
from
DHParser.configuration
import
THREAD_LOCALS
,
get_config_value
from
DHParser.error
import
Error
from
DHParser.parse
import
Grammar
,
mixin_comment
,
Forward
,
RegExp
,
DropWhitespace
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
GrammarError
from
DHParser.parse
import
Grammar
,
mixin_comment
,
mixin_noempty
,
Forward
,
RegExp
,
\
DropWhitespace
,
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
\
Token
,
GrammarError
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
expand_table
,
\
...
...
@@ -753,7 +753,10 @@ class EBNFCompiler(Compiler):
an empty string in case the node is neither regexp nor literal.
"""
if
nd
.
tag_name
==
'regexp'
:
search_regex
=
self
.
_extract_regex
(
nd
).
replace
(
r
'\~'
,
self
.
directives
.
super_ws
)
super_ws
=
self
.
directives
.
super_ws
noempty_ws
=
mixin_noempty
(
super_ws
)
search_regex
=
self
.
_extract_regex
(
nd
)
\
.
replace
(
r
'\~!'
,
noempty_ws
).
replace
(
r
'\~'
,
super_ws
)
return
unrepr
(
"re.compile(r'%s')"
%
search_regex
)
elif
nd
.
tag_name
==
'literal'
:
s
=
nd
.
content
[
1
:
-
1
]
# remove quotation marks
...
...
DHParser/parse.py
View file @
6b5ee7ca
...
...
@@ -61,6 +61,7 @@ __all__ = ('Parser',
'Whitespace'
,
'DropWhitespace'
,
'mixin_comment'
,
'mixin_noempty'
,
'MetaParser'
,
'UnaryParser'
,
'NaryParser'
,
...
...
@@ -591,8 +592,8 @@ def mixin_comment(whitespace: str, comment: str) -> str:
return
whitespace
def
no
n_
empty
(
whitespace
:
str
)
->
str
:
"""
def
mixin_
noempty
(
whitespace
:
str
)
->
str
:
r
"""
Returns a regular expression pattern that matches only if the regular
expression pattern `whitespace` matches AND if the match is not empty.
...
...
@@ -607,7 +608,11 @@ def non_empty(whitespace: str) -> str:
that ist / / or / / or /\t/ won't work, but / */ or /\s*/ or /\s+/
do work. There is no test for this. Fixed sizes regular expressions
run through `non_empty_ws` will not match at any more if they are applied
to the beginning or the middle of a sequence of whitespaces!!!
to the beginning or the middle of a sequence of whitespaces!
In order to be safe, you whitespace regular expressions should follow
the rule: "Whitespace cannot be followed by whitespace" or "Either
grab it all or leave it all".
:param whitespace: a regular expression pattern
:return: new regular expression pattern that does not match the empty
...
...
documentation_src/StepByStepGuide.rst
View file @
6b5ee7ca
...
...
@@ -614,17 +614,16 @@ that the output is rather verbose. Just looking at the beginning of the
output, we find::
<document>
<:ZeroOrMore>
<sentence>
<part>
<WORD>
<:RegExp>Life’s</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<WORD>
<:RegExp>but</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<sentence>
<part>
<WORD>
<:RegExp>Life’s</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
<WORD>
<:RegExp>but</:RegExp>
<:Whitespace> </:Whitespace>
</WORD>
...
But why do we need to know all those details! Why would we need a
...
...
@@ -665,14 +664,13 @@ rich set of predefined operators. Should these not suffice, you
can easily write your own. How does this look like? ::
poetry_AST_transformation_table = {
"<": remove_empty,
"document": [],
"sentence": [],
"part": [],
"WORD": [],
"EOF": [],
":Token": reduce_single_child,
"*": replace_by_single_child
"<": flatten,
"document": [],
"sentence": [],
"part": [],
"WORD": [],
"EOF": [],
"*": replace_by_single_child
}
You'll find this table in the script ``poetryCompiler.py``, which is also the
...
...
@@ -747,22 +745,19 @@ in the compiler-script should be changed as follows::
Running the "poetryCompiler.py"-script on "macbeth.dsl" again, yields::
<document>
<:ZeroOrMore>
<sentence>
<part>
<WORD>Life’s</WORD>
<WORD>but</WORD>
<WORD>a</WORD>
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Series>
<:Token>
<:PlainText>,</:PlainText>
<:Whitespace> </:Whitespace>
</:Token>
<part>
<WORD>a</WORD>
<sentence>
<part>
<WORD>Life’s</WORD>
<WORD>but</WORD>
<WORD>a</WORD>
<WORD>walking</WORD>
<WORD>shadow</WORD>
</part>
<:Token>,</:Token>
<:Whitespace> </:Whitespace>
<part>
<WORD>a</WORD>
...
It starts to become more readable and concise, but there are sill some oddities.
...
...
test/test_ebnf.py
View file @
6b5ee7ca
...
...
@@ -645,7 +645,8 @@ class TestCustomizedResumeParsing:
@comment = /(?:\/\*(?:.|\n)*?\*\/)/ # c-style comments
document = ~ { word }
# @ word_resume = /(?:(?:\s\~)|(?:\~(?<=\s)))(?=.)|$/
@word_resume = /(?=(.|\n))\~(?!\1)(?=.)|$/
# @word_resume = /(?=(.|\n))\~(?!\1)(?=.)|$/
@word_resume = /\~!(?=.)|$/
# @ word_resume = /\~(?=.)|$/
word = !EOF §/\w+/ ~
EOF = !/./
...
...
test/test_parse.py
View file @
6b5ee7ca
...
...
@@ -46,10 +46,9 @@ class TestWhitespace:
def
test_whitespace_comment_mangling
(
self
):
pass
def
test_non_empt
if
y_
v
er
s
ion
(
self
):
def
test_non_empty_
d
er
ivat
ion
(
self
):
pass
class
TestParserError
:
def
test_parser_error_str
(
self
):
pe
=
ParserError
(
Node
(
'TAG'
,
'test'
).
with_pos
(
0
),
StringView
(
'Beispiel'
),
None
,
True
)
...
...
@@ -252,7 +251,7 @@ class TestRegex:
[+] # followed by a plus sign
\w* # possibly followed by more alpha chracters/
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
result
,
messages
,
_
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
,
str
(
messages
)
...
...
@@ -268,7 +267,7 @@ class TestRegex:
[+]
\w* /
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
result
,
messages
,
_
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
,
str
(
messages
)
...
...
@@ -283,7 +282,7 @@ class TestRegex:
@ ignorecase = True
regex = /alpha/
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
result
,
messages
,
_
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
...
...
@@ -299,7 +298,7 @@ class TestRegex:
@ ignorecase = False
regex = /alpha/
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
result
,
messages
,
_
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
...
...
@@ -320,7 +319,7 @@ class TestRegex:
test
\end{document}
"""
result
,
messages
,
syntax_tree
=
compile_source
(
result
,
messages
,
_
=
compile_source
(
tokenlang
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
"TokenTest"
))
assert
result
...
...
@@ -339,8 +338,8 @@ class TestGrammar:
WORT = /[^ \t]+/~
LEERZEILE = /\n[ \t]*(?=\n)/~
"""
self
.
pyparser
,
messages
,
syntax_tree
=
compile_source
(
grammar
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
"PosTest"
))
self
.
pyparser
,
messages
,
_
=
compile_source
(
grammar
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
"PosTest"
))
assert
self
.
pyparser
assert
not
messages
...
...
@@ -397,11 +396,11 @@ class TestSeries:
series = "A" "B" "C" "D"
"""
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"ABCD"
)
;
st
=
parser
(
"ABCD"
)
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
)
;
st
=
parser
(
"A_CD"
)
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
)
;
st
=
parser
(
"AB_D"
)
assert
not
st
.
error_flag
def
test_mandatory
(
self
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment