Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
c230b2e2
Commit
c230b2e2
authored
Apr 20, 2017
by
di68kap
Browse files
- Complement-Filter for Retrieve-Operator added (not yet tested)
parent
a0ac8cf6
Changes
7
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
c230b2e2
...
...
@@ -172,10 +172,10 @@ class EBNFCompiler(CompilerBase):
RESERVED_SYMBOLS
=
{
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
,
COMMENT_KEYWORD
}
AST_ERROR
=
"Badly structured syntax tree. "
\
"Potentially due to erroneuos AST transformation."
PREFIX_TABLE
=
[(
'§'
,
'Required'
),
(
'&'
,
'Lookahead'
)
,
(
'!'
,
'Negative
Lookahead'
)
,
(
'-&'
,
'Lookbehin
d'
)
,
(
'-!'
,
'NegativeLookbehind'
),
(
'::'
,
'Pop'
)
,
(
':'
,
'Retrieve'
)]
PREFIX_TABLE
=
{
'§'
:
'Required'
,
'&'
:
'
Lookahead'
,
'!'
:
'NegativeLookahea
d'
,
'-&'
:
'Lookbehind'
,
'-!'
:
'NegativeLookbehind'
,
'::'
:
'Pop'
,
':'
:
'Retrieve'
}
WHITESPACE
=
{
'horizontal'
:
r
'[\t ]*'
,
# default: horizontal
'linefeed'
:
r
'[ \t]*\n?(?!\s*\n)[ \t]*'
,
'vertical'
:
r
'\s*'
}
...
...
@@ -197,7 +197,8 @@ class EBNFCompiler(CompilerBase):
self
.
directives
=
{
'whitespace'
:
self
.
WHITESPACE
[
'horizontal'
],
'comment'
:
''
,
'literalws'
:
[
'right'
],
'tokens'
:
set
()}
'tokens'
:
set
(),
# alt. 'scanner_tokens'
'complement'
:
set
()}
# alt. 'retrieve_complement'
def
gen_scanner_skeleton
(
self
):
name
=
self
.
grammar_name
+
"Scanner"
...
...
@@ -244,8 +245,7 @@ class EBNFCompiler(CompilerBase):
if
self
.
variables
:
for
i
in
range
(
len
(
definitions
)):
if
definitions
[
i
][
0
]
in
self
.
variables
:
definitions
[
i
]
=
(
definitions
[
i
][
0
],
'Capture(%s, "%s")'
%
(
definitions
[
1
],
definitions
[
0
]))
definitions
[
i
]
=
(
definitions
[
i
][
0
],
'Capture(%s)'
%
definitions
[
1
])
self
.
definition_names
=
[
defn
[
0
]
for
defn
in
definitions
]
definitions
.
append
((
'wspR__'
,
WHITESPACE_KEYWORD
...
...
@@ -276,6 +276,13 @@ class EBNFCompiler(CompilerBase):
declarations
=
declarations
[:
-
1
]
declarations
.
append
(
'"""'
)
# add default functions for complement filters of pop or retrieve operators
for
symbol
in
self
.
directives
[
'complement'
]:
declarations
.
append
(
'@staticmethod
\n
'
'def complement_%s(value):
\n
'
%
symbol
+
' return value.replace("(", ")").replace("[", "]")'
'.replace("{", "}").replace(">", "<")
\n
'
)
# turn definitions into declarations in reverse order
self
.
root
=
definitions
[
0
][
0
]
if
definitions
else
""
definitions
.
reverse
()
...
...
@@ -332,7 +339,7 @@ class EBNFCompiler(CompilerBase):
self
.
rules
.
add
(
rule
)
defn
=
self
.
compile__
(
node
.
result
[
1
])
if
rule
in
self
.
variables
:
defn
=
'Capture(%s
, "%s"
)'
%
(
defn
,
rule
)
defn
=
'Capture(%s)'
%
defn
self
.
variables
.
remove
(
rule
)
except
TypeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_sexpr
()
...
...
@@ -392,20 +399,23 @@ class EBNFCompiler(CompilerBase):
else
{}
if
'none'
in
value
else
value
self
.
directives
[
key
]
=
list
(
ws
)
elif
key
==
'tokens'
:
elif
key
in
{
'tokens'
,
'scanner_tokens'
}
:
self
.
directives
[
'tokens'
]
|=
self
.
compile__
(
node
.
result
[
1
])
elif
key
in
{
'complement'
,
'retrieve_complement'
}:
self
.
directives
[
'complement'
]
|=
self
.
compile__
(
node
.
result
[
1
])
else
:
node
.
add_error
(
'Unknown directive %s ! (Known ones are %s .)'
%
(
key
,
', '
.
join
(
list
(
self
.
directives
.
keys
()))))
return
""
def
non_terminal
(
self
,
node
,
parser_class
):
def
non_terminal
(
self
,
node
,
parser_class
,
custom_args
=
[]
):
"""Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
arguments
=
filter
(
lambda
arg
:
arg
,
[
self
.
compile__
(
r
)
for
r
in
node
.
result
])
arguments
=
[
self
.
compile__
(
r
)
for
r
in
node
.
result
]
+
custom_args
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
def
expression
(
self
,
node
):
...
...
@@ -419,31 +429,34 @@ class EBNFCompiler(CompilerBase):
assert
node
.
children
assert
len
(
node
.
result
)
>=
2
,
node
.
as_sexpr
()
prefix
=
node
.
result
[
0
].
result
custom_args
=
[]
arg
=
node
.
result
[
-
1
]
if
prefix
in
{
'::'
,
':'
}:
assert
len
(
node
.
result
)
==
2
arg
=
node
.
result
[
-
1
]
argstr
=
str
(
arg
)
if
arg
.
parser
.
name
!=
'symbol'
:
node
.
add_error
((
'Retrieve Operator "%s" requires a symbols, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'complement'
]:
custom_args
=
[
'complement=%s_complement'
%
str
(
arg
)]
self
.
variables
.
add
(
arg
.
result
)
if
len
(
node
.
result
)
>
2
:
el
if
len
(
node
.
result
)
>
2
:
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node
.
result
[
1
].
result
=
(
Node
(
node
.
result
[
1
].
parser
,
node
.
result
[
1
].
result
),)
\
node
.
result
[
1
].
result
=
(
Node
(
node
.
result
[
1
].
parser
,
node
.
result
[
1
].
result
),)
\
+
node
.
result
[
2
:]
node
.
result
[
1
].
parser
=
node
.
parser
node
.
result
=
(
node
.
result
[
0
],
node
.
result
[
1
])
node
.
result
=
node
.
result
[
1
:]
for
match
,
parser_class
in
self
.
PREFIX_TABLE
:
if
prefix
==
match
:
return
self
.
non_terminal
(
node
,
parser_class
)
assert
False
,
(
"
Unknown prefix %s
\n
"
%
prefix
)
+
node
.
as_sexpr
()
try
:
parser_class
=
self
.
PREFIX_TABLE
[
prefix
]
return
self
.
non_terminal
(
node
,
parser_class
,
custom_args
)
except
KeyError
:
node
.
add_error
(
'
Unknown prefix
"
%s
".'
%
prefix
)
def
option
(
self
,
node
):
return
self
.
non_terminal
(
node
,
'Optional'
)
...
...
DHParser/parsers.py
View file @
c230b2e2
...
...
@@ -48,10 +48,9 @@ Berlin Heidelberg 2008.
Juancarlo Añez: grako, a PEG parser generator in Python,
https://bitbucket.org/apalala/grako
"""
import
copy
import
os
try
:
...
...
@@ -864,18 +863,19 @@ class Capture(UnaryOperator):
class
Retrieve
(
Parser
):
def
__init__
(
self
,
symbol
,
name
=
None
):
def
__init__
(
self
,
symbol
,
complement
=
None
,
name
=
None
):
if
not
name
:
name
=
symbol
.
name
super
(
Retrieve
,
self
).
__init__
(
name
)
self
.
symbol
=
symbol
# if isinstance(symbol, str) else symbol.name
self
.
symbol
=
symbol
self
.
complement
=
complement
if
complement
else
lambda
value
:
value
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
name
)
return
self
.
__class__
(
self
.
symbol
,
self
.
complement
,
self
.
name
)
def
__call__
(
self
,
text
):
symbol
=
self
.
symbol
if
isinstance
(
self
.
symbol
,
str
)
\
else
self
.
symbol
.
name
stack
=
self
.
grammar
.
variables
[
symbol
]
value
=
self
.
pick_value
(
stack
)
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
value
=
self
.
complement
(
self
.
pick_value
(
stack
))
if
text
.
startswith
(
value
):
return
Node
(
self
,
value
),
text
[
len
(
value
):]
else
:
...
...
examples/EBNF/EBNF.ebnf
View file @
c230b2e2
...
...
@@ -24,7 +24,7 @@ flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&'
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
regexchain = "
<
" expression §"
>
" # compiles "expression" into a singular regular expression
regexchain = "
>
" expression §"
<
" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
...
...
tests/no_unit_tests/PopRetrieveTest.xml
View file @
c230b2e2
...
...
@@ -40,9 +40,9 @@
</text>
</Alternative>
</ZeroOrMore>
<
Pop
>
<
delimiter
>
```
</
Pop
>
</
delimiter
>
</codeblock>
</Alternative>
<Alternative>
...
...
tests/no_unit_tests/PopRetrieveTest2.xml
View file @
c230b2e2
...
...
@@ -40,9 +40,9 @@
</text>
</Alternative>
</ZeroOrMore>
<
Pop
>
<
delimiter
>
```
</
Pop
>
</
delimiter
>
</codeblock>
</Alternative>
<Alternative>
...
...
@@ -75,9 +75,9 @@
</text>
</Alternative>
</ZeroOrMore>
<
Pop
>
<
delimiter
>
```
</
Pop
>
</
delimiter
>
</codeblock>
</Alternative>
<Alternative>
...
...
tests/no_unit_tests/PopRetrieve_compiler.py
View file @
c230b2e2
...
...
@@ -57,7 +57,7 @@ class PopRetrieveGrammar(GrammarBase):
wspR__
=
WSP__
text
=
RE
(
'[^`]+'
,
wR
=
''
)
delimiter_sign
=
RE
(
'`+'
,
wR
=
''
)
delimiter
=
Capture
(
delimiter_sign
,
"delimiter"
)
delimiter
=
Capture
(
delimiter_sign
)
codeblock
=
Sequence
(
delimiter
,
ZeroOrMore
(
Alternative
(
text
,
Sequence
(
NegativeLookahead
(
Retrieve
(
delimiter
)),
delimiter_sign
))),
Pop
(
delimiter
))
document
=
ZeroOrMore
(
Alternative
(
text
,
codeblock
))
root__
=
document
...
...
tests/test_ebnf.py
View file @
c230b2e2
...
...
@@ -20,11 +20,12 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from
functools
import
partial
import
os
import
sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../../'
))
from
DHParser.syntaxtree
import
traverse
from
DHParser.parsers
import
full_compilation
,
WHITESPACE_KEYWORD
from
DHParser.parsers
import
full_compilation
,
Retrieve
,
WHITESPACE_KEYWORD
from
DHParser.ebnf
import
EBNFGrammar
,
EBNF_ASTPipeline
,
EBNFCompiler
from
DHParser.dsl
import
compileEBNF
...
...
@@ -103,10 +104,26 @@ class TestPopRetrieve:
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2
=
"""
@retrieve_filter = delimiter
document = { text | codeblock }
codeblock = braces { text | (!:braces closing_braces) } ::braces
braces = /\{+/
closing_braces = /\}+/
text = /[^`]+/
"""
def
setup
(
self
):
self
.
minilang_parser
=
compileEBNF
(
self
.
mini_language
)()
@
staticmethod
def
opening_delimiter
(
node
,
name
):
return
node
.
tag_name
==
name
and
not
isinstance
(
node
.
parser
,
Retrieve
)
@
staticmethod
def
closing_delimiter
(
node
):
return
isinstance
(
node
.
parser
,
Retrieve
)
def
test_compile_mini_language
(
self
):
assert
self
.
minilang_parser
...
...
@@ -114,8 +131,8 @@ class TestPopRetrieve:
teststr
=
"Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
syntax_tree
=
self
.
minilang_parser
.
parse
(
teststr
)
assert
not
syntax_tree
.
collect_errors
()
delim
=
str
(
next
(
syntax_tree
.
find
(
lambda
node
:
node
.
tag_name
==
"delimiter"
)))
pop
=
str
(
next
(
syntax_tree
.
find
(
lambda
node
:
node
.
tag_name
==
"Pop"
)))
delim
=
str
(
next
(
syntax_tree
.
find
(
partial
(
self
.
opening_delimiter
,
name
=
"delimiter"
)))
)
pop
=
str
(
next
(
syntax_tree
.
find
(
self
.
closing_delimiter
)))
assert
delim
==
pop
if
WRITE_LOGS
:
syntax_tree
.
log
(
"test_PopRetrieve_single_line"
,
'.cst'
)
...
...
@@ -132,8 +149,8 @@ class TestPopRetrieve:
"""
syntax_tree
=
self
.
minilang_parser
.
parse
(
teststr
)
assert
not
syntax_tree
.
collect_errors
()
delim
=
str
(
next
(
syntax_tree
.
find
(
lambda
node
:
node
.
tag_name
==
"delimiter"
)))
pop
=
str
(
next
(
syntax_tree
.
find
(
lambda
node
:
node
.
tag_name
==
"Pop"
)))
delim
=
str
(
next
(
syntax_tree
.
find
(
partial
(
self
.
opening_delimiter
,
name
=
"delimiter"
)))
)
pop
=
str
(
next
(
syntax_tree
.
find
(
self
.
closing_delimiter
)))
assert
delim
==
pop
if
WRITE_LOGS
:
syntax_tree
.
log
(
"test_PopRetrieve_multi_line"
,
'.cst'
)
...
...
@@ -172,4 +189,4 @@ class TestCompilerErrors:
if
__name__
==
"__main__"
:
from
run
import
runner
runner
(
"Test
EBNFParser
"
,
globals
())
runner
(
"Test
PopRetrieve
"
,
globals
())
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment