Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
ca3dc76b
Commit
ca3dc76b
authored
May 18, 2017
by
Eckhart Arnold
Browse files
- more general approach for retrieve-filter-functions
parent
a217669e
Changes
9
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
ca3dc76b
...
...
@@ -75,7 +75,8 @@ from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
nop_filter, counterpart_filter, accumulating_filter
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters,
\\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace,
\\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable,
\\
...
...
DHParser/ebnf.py
View file @
ca3dc76b
...
...
@@ -318,7 +318,7 @@ class EBNFCompiler(CompilerBase):
'comment'
:
''
,
'literalws'
:
[
'right'
],
'tokens'
:
set
(),
# alt. 'scanner_tokens'
'
counterpart'
:
se
t
()}
# alt. 'retrieve_
counterpart
'
'
filter'
:
dic
t
()}
# alt. 'retrieve_
filter
'
@
property
def
result
(
self
):
...
...
@@ -407,14 +407,11 @@ class EBNFCompiler(CompilerBase):
declarations
=
declarations
[:
-
1
]
declarations
.
append
(
'"""'
)
# add default functions for
counterpart
filters of pop or retrieve operators
# add default functions for
retrieve_filter
filters of pop or retrieve operators
for
symbol
in
self
.
directives
[
'counterpart'
]:
# declarations.append('def %s_counterpart(value): \n' % symbol +
# ' return value.replace("(", ")").replace("[", "]")'
# '.replace("{", "}").replace(">", "<")\n')
declarations
.
append
(
symbol
+
'_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")'
)
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
# '.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
...
...
@@ -540,8 +537,12 @@ class EBNFCompiler(CompilerBase):
elif
key
in
{
'tokens'
,
'scanner_tokens'
}:
self
.
directives
[
'tokens'
]
|=
self
.
_compile
(
node
.
result
[
1
])
elif
key
in
{
'counterpart'
,
'retrieve_counterpart'
}:
self
.
directives
[
'counterpart'
]
|=
self
.
_compile
(
node
.
result
[
1
])
elif
key
.
endswith
(
'_filter'
):
filter_set
=
self
.
_compile
(
node
.
result
[
1
])
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
node
.
add_error
(
'Directive "%s" accepts exactly on symbol, not %s'
%
(
key
,
str
(
filter_set
)))
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
filter_set
.
pop
()
else
:
node
.
add_error
(
'Unknown directive %s ! (Known ones are %s .)'
%
...
...
@@ -575,8 +576,8 @@ class EBNFCompiler(CompilerBase):
node
.
add_error
((
'Retrieve Operator "%s" requires a symbol, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'
counterpart
'
]:
custom_args
=
[
'
counterpart=%s_counterpart'
%
str
(
arg
)]
if
str
(
arg
)
in
self
.
directives
[
'
filter
'
]:
custom_args
=
[
'
retrieve_filter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]
]
self
.
variables
.
add
(
arg
.
result
)
elif
len
(
node
.
result
)
>
2
:
...
...
DHParser/parsers.py
View file @
ca3dc76b
...
...
@@ -875,7 +875,6 @@ class NegativeLookbehind(Lookbehind):
class
Capture
(
UnaryOperator
):
def
__init__
(
self
,
parser
,
name
=
''
):
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
print
(
"WARNING: Capture operator is experimental"
)
def
__call__
(
self
,
text
):
node
,
text
=
self
.
parser
(
text
)
...
...
@@ -887,21 +886,38 @@ class Capture(UnaryOperator):
return
None
,
text
def
nop_filter
(
stack
):
return
stack
[
-
1
]
def
counterpart_filter
(
stack
):
value
=
stack
[
-
1
]
return
value
.
replace
(
"("
,
")"
).
replace
(
"["
,
"]"
).
replace
(
"{"
,
"}"
).
replace
(
">"
,
"<"
)
def
accumulating_filter
(
stack
):
return
""
.
join
(
stack
)
class
Retrieve
(
Parser
):
def
__init__
(
self
,
symbol
,
counterpart
=
None
,
name
=
''
):
def
__init__
(
self
,
symbol
,
retrieve_filter
=
None
,
name
=
''
):
if
not
name
:
name
=
symbol
.
name
super
(
Retrieve
,
self
).
__init__
(
name
)
self
.
symbol
=
symbol
self
.
counterpart
=
counterpart
if
counterpart
else
lambda
value
:
value
print
(
"WARNING: Retrieve operator is experimental"
)
self
.
retrieve_filter
=
retrieve_filter
if
retrieve_filter
else
nop_filter
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
counterpart
,
self
.
name
)
return
self
.
__class__
(
self
.
symbol
,
self
.
retrieve_filter
,
self
.
name
)
def
__call__
(
self
,
text
):
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
value
=
self
.
counterpart
(
self
.
pick_value
(
stack
))
try
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
value
=
self
.
retrieve_filter
(
stack
)
self
.
pick_value
(
stack
)
except
(
KeyError
,
IndexError
):
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
"%s undefined or exhausted"
%
self
.
symbol
.
name
)),
text
if
text
.
startswith
(
value
):
return
Node
(
self
,
value
),
text
[
len
(
value
):]
else
:
...
...
DHParser/syntaxtree.py
View file @
ca3dc76b
...
...
@@ -168,7 +168,7 @@ class Node:
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
result
)
if
self
.
children
else
False
self
.
_len
=
len
(
self
.
result
)
if
not
self
.
children
else
\
sum
(
child
.
_len
for
child
in
self
.
children
)
# self.pos = 0 # coninuous updating of pos values
# self.pos = 0 # con
t
inuous updating of pos values
self
.
_pos
=
-
1
def
__str__
(
self
):
...
...
DHParser/testing.py
View file @
ca3dc76b
...
...
@@ -47,7 +47,7 @@ def mock_syntax_tree(sexpr):
while
s
[
0
]
!=
')'
:
if
s
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
s
[:
10
])
# assert s[0] == '(', s
level
=
1
;
level
=
1
i
=
1
while
level
>
0
:
if
s
[
i
]
==
'('
:
...
...
experimental/PopRetrieveTest/PopRetrieveComplement.ebnf
View file @
ca3dc76b
@
retrieve_
counterpart
= braces
@
braces_filter =
counterpart
_filter
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
...
...
experimental/PopRetrieveTest/PopRetrieveTest.xml
View file @
ca3dc76b
<document>
<Alternative>
<
:
Alternative>
<text>
<RegExp>
Anfang
</RegExp>
<:RegExp>
Anfang
</:RegExp>
</text>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>
```
</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<
:
ZeroOrMore>
<
:
Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>
code block
</:RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
</
:
Alternative>
<
:
Alternative>
<
:
Sequence>
<delimiter_sign>
<RegExp>
``
</RegExp>
<:RegExp>
``
</:RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
</
:
Sequence>
</
:
Alternative>
<
:
Alternative>
<text>
<RegExp>
<
- keine Ende-Zeichen !
</RegExp>
<:RegExp>
<
- keine Ende-Zeichen !
</:RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<text>
<RegExp>
<
:
RegExp>
Ende
</RegExp>
</
:
RegExp>
</text>
</Alternative>
</
:
Alternative>
</document>
\ No newline at end of file
experimental/PopRetrieveTest/PopRetrieveTest2.xml
View file @
ca3dc76b
<document>
<Alternative>
<
:
Alternative>
<text>
<RegExp>
Anfang
</RegExp>
<:RegExp>
Anfang
</:RegExp>
</text>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>
```
</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<
:
ZeroOrMore>
<
:
Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>
code block
</:RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
</
:
Alternative>
<
:
Alternative>
<
:
Sequence>
<delimiter_sign>
<RegExp>
``
</RegExp>
<:RegExp>
``
</:RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
</
:
Sequence>
</
:
Alternative>
<
:
Alternative>
<text>
<RegExp>
<
- keine Ende-Zeichen !
</RegExp>
<:RegExp>
<
- keine Ende-Zeichen !
</:RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<text>
<RegExp>
<
:
RegExp>
Ende
Absatz ohne
</RegExp>
</
:
RegExp>
</text>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>
```
</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<
:
ZeroOrMore>
<
:
Alternative>
<text>
<RegExp>
<
:
RegExp>
codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger
</RegExp>
</
:
RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
</
:
Alternative>
<
:
Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>
code block
</:RegExp>
</text>
</Alternative>
</
:
Alternative>
</document>
\ No newline at end of file
test/test_ebnf.py
View file @
ca3dc76b
...
...
@@ -121,7 +121,7 @@ class TestPopRetrieve:
text = /[^`]+/
"""
mini_lang2
=
"""
@
retrieve_
counterpart
= braces
@
braces_filter=
counterpart
_filter
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment