Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
fc96335e
Commit
fc96335e
authored
Jun 30, 2017
by
Eckhart Arnold
Browse files
- some bug fixes, mostly related to capture-retrieve
parent
31331ec8
Changes
12
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
fc96335e
...
...
@@ -76,7 +76,7 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
nop_filter
, counterpart
_filter
, accumulat
ing_filter
, ScannerFunc
last_value
, counterpart, accumulat
e
, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters,
\\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace,
\\
no_transformation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable,
\\
...
...
DHParser/ebnf.py
View file @
fc96335e
...
...
@@ -196,8 +196,10 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_transformation_table
=
{
# AST Transformations for EBNF-grammar
"
syntax
"
:
"
+
"
:
remove_expendables
,
"syntax"
:
[],
"directive, definition"
:
remove_tokens
(
'@'
,
'='
),
"expression"
:
...
...
@@ -211,13 +213,13 @@ EBNF_transformation_table = {
"oneormore, repetition, option, regexchain"
:
[
reduce_single_child
,
remove_enclosing_delimiters
],
"symbol, literal, regexp"
:
[
remove_expendables
,
reduce_single_child
],
[
reduce_single_child
],
(
TOKEN_PTYPE
,
WHITESPACE_PTYPE
):
[
remove_expendables
,
reduce_single_child
],
[
reduce_single_child
],
"list_"
:
[
flatten
,
remove_tokens
(
','
)],
"*"
:
[
remove_expendables
,
replace_by_single_child
]
[
replace_by_single_child
]
}
...
...
DHParser/parsers.py
View file @
fc96335e
...
...
@@ -204,7 +204,6 @@ def add_parser_guard(parser_func):
node
=
Node
(
None
,
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))]
+
" ..."
)
node
.
add_error
(
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
node
.
error_flag
=
True
rest
=
''
return
node
,
rest
...
...
@@ -355,10 +354,6 @@ class Grammar:
except
KeyError
:
parser
=
getattr
(
self
,
key
,
None
)
if
parser
:
# if toolkit.warnings():
# raise KeyError(('Parser "%s" inaccesible, because it is not connected '
# 'to the root parser "%s" !') % (key, self.root__.name))
# print('Parser "%s" not connected to root parser.' % key)
# add parser to grammar object on the fly...
setattr
(
self
,
key
,
copy
.
deepcopy
(
parser
))
self
[
key
].
apply
(
self
.
_add_parser
)
...
...
@@ -449,6 +444,9 @@ class Grammar:
if
rest
:
stitches
.
append
(
Node
(
None
,
rest
))
result
=
Node
(
None
,
tuple
(
stitches
))
if
any
(
self
.
variables
.
values
()):
result
.
add_error
(
"Capture-retrieve-stack not empty after end of parsing: "
+
str
(
self
.
variables
))
result
.
pos
=
0
# calculate all positions
return
result
...
...
@@ -478,7 +476,7 @@ class Grammar:
full_history
.
append
(
line
)
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
match_history
.
append
(
line
)
if
record
.
node
.
error
s
:
if
record
.
node
.
error
_flag
:
errors_only
.
append
(
line
)
write_log
(
full_history
,
log_file_name
+
'_full'
)
write_log
(
match_history
,
log_file_name
+
'_match'
)
...
...
@@ -842,9 +840,8 @@ class Sequence(NaryOperator):
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text_
)
if
not
node
:
return
node
,
text
if
node
.
result
:
# Nodes with zero-length result are silently omitted
results
+=
(
node
,)
return
None
,
text
results
+=
(
node
,)
if
node
.
error_flag
:
break
assert
len
(
results
)
<=
len
(
self
.
parsers
)
...
...
@@ -1009,6 +1006,8 @@ class NegativeLookbehind(Lookbehind):
class
Capture
(
UnaryOperator
):
"""STILL EXPERIMENTAL!"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
...
...
@@ -1025,50 +1024,54 @@ class Capture(UnaryOperator):
RetrieveFilter
=
Callable
[[
List
[
str
]],
str
]
def
nop_filter
(
stack
:
List
[
str
])
->
str
:
def
last_value
(
stack
:
List
[
str
])
->
str
:
return
stack
[
-
1
]
def
counterpart
_filter
(
stack
:
List
[
str
])
->
str
:
def
counterpart
(
stack
:
List
[
str
])
->
str
:
value
=
stack
[
-
1
]
return
value
.
replace
(
"("
,
")"
).
replace
(
"["
,
"]"
).
replace
(
"{"
,
"}"
).
replace
(
">"
,
"<"
)
def
accumulat
ing_filter
(
stack
:
List
[
str
])
->
str
:
return
""
.
join
(
stack
)
def
accumulat
e
(
stack
:
List
[
str
])
->
str
:
return
""
.
join
(
stack
)
if
len
(
stack
)
>
1
else
stack
[
-
1
]
# provoke IndexError if stack empty
class
Retrieve
(
Parser
):
"""STILL EXPERIMENTAL!"""
def
__init__
(
self
,
symbol
:
Parser
,
filter
:
RetrieveFilter
=
None
,
name
:
str
=
''
)
->
None
:
if
not
name
:
name
=
symbol
.
name
super
(
Retrieve
,
self
).
__init__
(
name
)
self
.
symbol
=
symbol
self
.
filter
=
filter
if
filter
else
nop_filter
self
.
filter
=
filter
if
filter
else
last_value
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
filter
,
self
.
name
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
return
self
.
call
(
text
)
# allow call method to be called from subclass circumventing the parser guard
def
call
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
try
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
self
.
pick_value
(
stack
)
except
(
KeyError
,
IndexError
):
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
"%s undefined or exhausted"
%
self
.
symbol
.
name
)),
text
"
'
%s
'
undefined or exhausted
.
"
%
self
.
symbol
.
name
)),
text
if
text
.
startswith
(
value
):
return
Node
(
self
,
value
),
text
[
len
(
value
):]
else
:
return
None
,
text
def
pick_value
(
self
,
stack
:
List
[
str
])
->
str
:
return
stack
[
-
1
]
class
Pop
(
Retrieve
):
def
pick_value
(
self
,
stack
:
List
[
str
])
->
str
:
return
stack
.
pop
()
"""STILL EXPERIMENTAL!!!"""
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
nd
,
txt
=
super
(
Pop
,
self
).
call
(
text
)
# call() instead of __call__() to avoid parser guard
if
nd
and
not
nd
.
error_flag
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
stack
.
pop
()
return
nd
,
txt
########################################################################
...
...
@@ -1105,7 +1108,7 @@ class Forward(Parser):
def
set
(
self
,
parser
:
Parser
):
# assert isinstance(parser, Parser)
self
.
name
=
parser
.
name
# redundant, see Grammar-constructor
#
self.name = parser.name # redundant, see Grammar-constructor
self
.
parser
=
parser
def
apply
(
self
,
func
:
Parser
.
ApplyFunc
):
...
...
examples/LaTeX/LaTeX.ebnf
View file @
fc96335e
...
...
@@ -9,15 +9,17 @@ preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF
genericenv = beginenv sequence §endenv
beginenv = "\begin" §( "{" NAME "}" )
endenv = "\end" §( "{" ::NAME "}" )
blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [ config ] block
config = "[" cfgtext §"]"
block = "{" { text | block } §"}"
...
...
experimental/PopRetrieveTest/PopRetrieveComplement.ebnf
View file @
fc96335e
@braces_filter = counterpart
_filter
@braces_filter = counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
...
...
experimental/PopRetrieveTest/PopRetrieveConfusion.ebnf
0 → 100644
View file @
fc96335e
document = { text | env }
env = (openenv | altopen) { text } [closeenv | altclose]
openenv = "\begin{" name "}"
altopen = "\begin{" name "*}"
closeenv = "\end{" ::name "}"
altclose = "\end{" ::name "*}"
text = /[^\\]+/
name = /\w+/
experimental/PopRetrieveTest/PopRetrieveConfusion.txt
0 → 100644
View file @
fc96335e
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside should not leave any symbols on the stack
experimental/PopRetrieveTest/PopRetrieveConfusion2.txt
0 → 100644
View file @
fc96335e
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside \end{env} should not leave any symbols on the stack
experimental/PopRetrieveTest/PopRetrieveTest.xml
View file @
fc96335e
...
...
@@ -19,6 +19,7 @@
</:Alternative>
<:Alternative>
<:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign>
<:RegExp>
``
</:RegExp>
</delimiter_sign>
...
...
experimental/PopRetrieveTest/PopRetrieveTest2.xml
View file @
fc96335e
...
...
@@ -19,6 +19,7 @@
</:Alternative>
<:Alternative>
<:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign>
<:RegExp>
``
</:RegExp>
</delimiter_sign>
...
...
experimental/PopRetrieveTest/compile_PopRetrieve_EBNF.py
View file @
fc96335e
...
...
@@ -25,13 +25,14 @@ import sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../../'
))
from
DHParser.dsl
import
compile_on_disk
,
is_outdated
if
(
not
os
.
path
.
exists
(
'PopRetrieveCompiler.py'
)
or
is_outdated
(
'PopRetrieveCompiler.py'
,
'PopRetrieve.ebnf'
)):
print
(
"recompiling PopRetrieve parser"
)
errors
=
compile_on_disk
(
"PopRetrieve.ebnf"
)
if
errors
:
print
(
'
\n\n
'
.
join
(
errors
))
sys
.
exit
(
1
)
#
# if (not os.path.exists('PopRetrieveCompiler.py') or
# is_outdated('PopRetrieveCompiler.py', 'PopRetrieve.ebnf')):
# print("recompiling PopRetrieve parser")
# errors = compile_on_disk("PopRetrieve.ebnf")
# if errors:
# print('\n\n'.join(errors))
# sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve
...
...
@@ -53,43 +54,65 @@ if (not os.path.exists('PopRetrieveCompiler.py') or
# print(result)
print
(
"PopRetrieveTest 1"
)
errors
=
compile_on_disk
(
"PopRetrieveTest.txt"
,
'PopRetrieveCompiler.py'
)
if
errors
:
print
(
errors
)
sys
.
exit
(
1
)
print
(
"PopRetrieveTest 2"
)
errors
=
compile_on_disk
(
"PopRetrieveTest2.txt"
,
'PopRetrieveCompiler.py'
)
if
errors
:
print
(
errors
)
sys
.
exit
(
1
)
# print("PopRetrieveTest 1")
# errors = compile_on_disk("PopRetrieveTest.txt", 'PopRetrieveCompiler.py')
# if errors:
# print(errors)
# sys.exit(1)
#
# print("PopRetrieveTest 2")
# errors = compile_on_disk("PopRetrieveTest2.txt", 'PopRetrieveCompiler.py')
# if errors:
# print(errors)
# sys.exit(1)
#
#
#
# if (not os.path.exists('PopRetrieveComplementCompiler.py') or
# is_outdated('PopRetrieveComplementCompiler.py', 'PopRetrieveComplement.ebnf')):
# print("recompiling PopRetrieveComplement parser")
# errors = compile_on_disk("PopRetrieveComplement.ebnf")
# if errors:
# print('\n\n'.join(errors))
# sys.exit(1)
#
#
# from PopRetrieveComplementCompiler import compile_src
#
# print("PopRetrieveComplement Test 1")
# result, errors, ast = compile_src("PopRetrieveComplementTest.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
#
# print("PopRetrieveComplement Test 2")
# result, errors, ast = compile_src("PopRetrieveComplementTest2.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
if
(
not
os
.
path
.
exists
(
'PopRetrieveCo
mplement
Compiler.py'
)
or
is_outdated
(
'PopRetrieveCo
mplement
Compiler.py'
,
'PopRetrieveCo
mplement
.ebnf'
)):
print
(
"recompiling PopRetrieveCo
mplement
parser"
)
errors
=
compile_on_disk
(
"PopRetrieveCo
mplement
.ebnf"
)
if
(
not
os
.
path
.
exists
(
'PopRetrieveCo
nfusion
Compiler.py'
)
or
is_outdated
(
'PopRetrieveCo
nfusion
Compiler.py'
,
'PopRetrieveCo
nfusion
.ebnf'
)):
print
(
"recompiling PopRetrieveCo
nfusion
parser"
)
errors
=
compile_on_disk
(
"PopRetrieveCo
nfusion
.ebnf"
)
if
errors
:
print
(
'
\n\n
'
.
join
(
errors
))
sys
.
exit
(
1
)
from
PopRetrieveConfusionCompiler
import
compile_src
from
PopRetrieveComplementCompiler
import
compile_src
print
(
"PopRetrieveComplement Test 1"
)
result
,
errors
,
ast
=
compile_src
(
"PopRetrieveComplementTest.txt"
)
if
errors
:
print
(
errors
)
sys
.
exit
(
1
)
else
:
print
(
result
)
print
(
"PopRetrieveComplement Test 2"
)
result
,
errors
,
ast
=
compile_src
(
"PopRetrieveComplementTest2.txt"
)
print
(
"PopRetrieveConfusion Test 1"
)
result
,
errors
,
ast
=
compile_src
(
"PopRetrieveConfusion.txt"
)
print
(
ast
.
as_sexpr
())
if
errors
:
print
(
errors
)
for
e
in
errors
:
print
(
e
)
sys
.
exit
(
1
)
else
:
print
(
result
)
test/test_ebnf.py
View file @
fc96335e
...
...
@@ -122,7 +122,7 @@ class TestPopRetrieve:
text = /[^`]+/
"""
mini_lang2
=
"""
@braces_filter=counterpart
_filter
@braces_filter=counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment