Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
e3a60af3
Commit
e3a60af3
authored
Apr 13, 2017
by
di68kap
Browse files
- more unit tests and important bug fixes
parent
e0512e77
Changes
9
Hide whitespace changes
Inline
Side-by-side
DHParser/DSLsupport.py
View file @
e3a60af3
...
...
@@ -310,8 +310,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
print
(
result
)
finally
:
if
f
:
f
.
close
()
if
IS_LOGGING
():
print
(
ast
)
return
[]
...
...
DHParser/EBNFcompiler.py
View file @
e3a60af3
...
...
@@ -130,10 +130,12 @@ EBNF_ASTTransform = {
[
remove_enclosing_delimiters
,
replace_by_single_child
],
"oneormore, repetition, option, regexchain"
:
[
reduce_single_child
,
remove_enclosing_delimiters
],
"symbol, literal, regexp
, list_
"
:
"symbol, literal, regexp"
:
[
remove_expendables
,
reduce_single_child
],
(
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
):
[
remove_expendables
,
reduce_single_child
],
"list_"
:
[
partial
(
remove_tokens
,
tokens
=
{
','
})],
""
:
[
remove_expendables
,
replace_by_single_child
]
}
...
...
@@ -157,7 +159,7 @@ class EBNFCompiler(CompilerBase):
in EBNF-Notation.
"""
COMMENT_KEYWORD
=
"COMMENT__"
DEFAULT_WHITESPACE
=
'[
\t
]*'
DEFAULT_WHITESPACE
=
r
'[\t ]*'
RESERVED_SYMBOLS
=
{
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
,
COMMENT_KEYWORD
}
KNOWN_DIRECTIVES
=
{
'comment'
,
'whitespace'
,
'tokens'
,
'literalws'
}
VOWELS
=
{
'A'
,
'E'
,
'I'
,
'O'
,
'U'
}
# what about cases like 'hour', 'universe' etc.?
...
...
@@ -240,7 +242,6 @@ class EBNFCompiler(CompilerBase):
if
'right'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
'wspL__'
,
WHITESPACE_KEYWORD
if
'left'
in
self
.
directives
[
'literalws'
]
else
"''"
))
print
(
self
.
directives
)
####
definitions
.
append
((
WHITESPACE_KEYWORD
,
(
"mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')"
).
...
...
@@ -349,17 +350,24 @@ class EBNFCompiler(CompilerBase):
key
=
node
.
result
[
0
].
result
.
lower
()
assert
key
not
in
self
.
scanner_tokens
if
key
in
{
'comment'
,
'whitespace'
}:
value
=
node
.
result
[
1
].
result
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
if
node
.
result
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
result
[
1
].
result
)
!=
1
:
node
.
add_error
(
"Directive %s must have one, but not %i values"
%
(
key
,
len
(
node
.
result
[
1
])))
value
=
self
.
compile__
(
node
.
result
[
1
]).
pop
()
if
value
in
{
'linefeed'
,
'standard'
}
and
key
==
'whitespace'
:
value
=
'\s*'
if
value
==
"linefeed"
else
self
.
DEFAULT_WHITESPACE
else
:
node
.
add_error
(
'Value "%" not allowed for directive %s'
%
(
value
,
key
))
else
:
if
value
==
"linefeed"
:
value
=
'\s*'
elif
value
==
"standard"
:
value
=
self
.
DEFAULT_WHITESPACE
value
=
self
.
_check_rx
(
node
,
value
)
value
=
node
.
result
[
1
].
result
.
strip
(
"~"
)
if
value
!=
node
.
result
[
1
].
result
:
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
"%s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
self
.
directives
[
key
]
=
value
elif
key
==
'literalws'
:
value
=
{
item
.
lower
()
for
item
in
self
.
compile__
(
node
.
result
[
1
])}
...
...
@@ -478,4 +486,5 @@ class EBNFCompiler(CompilerBase):
return
'RE('
+
', '
.
join
([
arg
]
+
name
)
+
')'
def
list_
(
self
,
node
):
return
set
(
item
.
strip
()
for
item
in
node
.
result
.
split
(
','
))
assert
node
.
children
return
set
(
item
.
result
.
strip
()
for
item
in
node
.
result
)
DHParser/parsercombinators.py
View file @
e3a60af3
...
...
@@ -286,11 +286,13 @@ class GrammarBase:
if
self
.
wspL__
:
self
.
wsp_left_parser__
=
RegExp
(
self
.
wspL__
,
WHITESPACE_KEYWORD
)
self
.
wsp_left_parser__
.
grammar
=
self
self
.
all_parsers
.
add
(
self
.
wsp_left_parser__
)
else
:
self
.
wsp_left_parser__
=
ZOMBIE_PARSER
if
self
.
wspR__
:
self
.
wsp_right_parser__
=
RegExp
(
self
.
wspR__
,
WHITESPACE_KEYWORD
)
self
.
wsp_right_parser__
.
grammar
=
self
self
.
all_parsers
.
add
(
self
.
wsp_right_parser__
)
else
:
self
.
wsp_right_parser__
=
ZOMBIE_PARSER
self
.
root__
.
apply
(
self
.
_add_parser
)
...
...
dhparser.py
View file @
e3a60af3
...
...
@@ -28,7 +28,6 @@ from DHParser.DSLsupport import compileDSL, run_compiler
from
DHParser.EBNFcompiler
import
EBNFGrammar
,
EBNF_ASTPipeline
,
EBNFCompiler
from
DHParser.parsercombinators
import
full_compilation
def
selftest
(
file_name
):
print
(
file_name
)
with
open
(
'examples/'
+
file_name
,
encoding
=
"utf-8"
)
as
f
:
...
...
examples/EBNF/EBNF.ebnf
View file @
e3a60af3
...
...
@@ -37,6 +37,6 @@ literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+
\s*(?:,\s*\w+\s*)*/~
# comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
list_ = /\w+
/~ { "," /\w+/~ }
# comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
examples/MLW/
VERALTET
/MLW_kopie.ebnf
→
examples/MLW/
OLDSTUFF
/MLW_kopie.ebnf
View file @
e3a60af3
File moved
examples/MLW/
VERALTET
/MLW_kopie2.ebnf
→
examples/MLW/
OLDSTUFF
/MLW_kopie2.ebnf
View file @
e3a60af3
File moved
tests/no_unit_tests/PopRetrieve_compiler.py
View file @
e3a60af3
...
...
@@ -49,10 +49,10 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__
=
"
a418b812a36733a4713eb4e06322e1b
5"
source_hash__
=
"
1312f8befacbc4d03bcc320644f3701
5"
parser_initialization__
=
"upon instatiation"
COMMENT__
=
r
''
WSP__
=
mixin_comment
(
whitespace
=
r
'[
]*'
,
comment
=
r
''
)
WSP__
=
mixin_comment
(
whitespace
=
r
'[
\t
]*'
,
comment
=
r
''
)
wspL__
=
''
wspR__
=
WSP__
text
=
RE
(
'[^`]+'
,
wR
=
''
)
...
...
tests/test_EBNFcompiler.py
View file @
e3a60af3
...
...
@@ -29,6 +29,36 @@ from DHParser.DSLsupport import compileEBNF
WRITE_LOGS
=
True
class
TestDirectives
:
mini_language
=
"""
expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor }
factor = constant | "(" expression ")"
constant = digit { digit } [ //~ ]
digit = /0/ | /1/ | /2/ | /3/ | /4/ | /5/ | /6/ | /7/ | /8/ | /9/
"""
def
test_whitespace_linefeed
(
self
):
lang
=
"@ whitespace = linefeed
\n
"
+
self
.
mini_language
MinilangParser
=
compileEBNF
(
lang
)
parser
=
MinilangParser
()
assert
parser
syntax_tree
=
parser
.
parse
(
"3 + 4 * 12"
)
parser
.
log_parsing_history
(
'WSP1'
)
assert
not
syntax_tree
.
collect_errors
()
syntax_tree
=
parser
.
parse
(
"3 + 4
\n
* 12"
)
parser
.
log_parsing_history
(
'WSP2'
)
assert
not
syntax_tree
.
collect_errors
()
def
test_whitespace_standard
(
self
):
lang
=
"@ whitespace = standard
\n
"
+
self
.
mini_language
parser
=
compileEBNF
(
lang
)()
assert
parser
syntax_tree
=
parser
.
parse
(
"3 + 4 * 12"
)
assert
not
syntax_tree
.
collect_errors
()
syntax_tree
=
parser
.
parse
(
"3 + 4
\n
* 12"
)
assert
syntax_tree
.
collect_errors
()
class
TestPopRetrieve
:
mini_language
=
"""
document = { text | codeblock }
...
...
@@ -70,4 +100,4 @@ class TestPopRetrieve:
if
__name__
==
"__main__"
:
from
run
import
run_tests
run_tests
(
"TestPopRetrieve"
,
globals
())
\ No newline at end of file
run_tests
(
"TestDirectives TestPopRetrieve"
,
globals
())
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment