Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
821cb67c
Commit
821cb67c
authored
Jul 29, 2017
by
Eckhart Arnold
Browse files
bugfixes and tests for Lookbehind-Operator
parent
2e5f466d
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
821cb67c
...
...
@@ -670,12 +670,20 @@ class EBNFCompiler(Compiler):
if
prefix
[:
1
]
==
'-'
:
def
check
(
node
):
nd
=
node
while
len
(
nd
.
children
)
==
1
and
nd
.
children
[
1
].
parser
.
name
==
"symbol"
:
nd
=
nd
.
children
[
1
]
if
len
(
nd
.
children
)
>=
1
:
nd
=
nd
.
children
[
0
]
while
nd
.
parser
.
name
==
"symbol"
:
symlist
=
self
.
rules
.
get
(
str
(
nd
),
[])
if
len
(
symlist
)
==
2
:
nd
=
symlist
[
1
]
else
:
if
len
(
symlist
)
==
1
:
nd
=
symlist
[
0
].
children
[
1
]
break
if
(
nd
.
parser
.
name
!=
"regexp"
or
str
(
nd
)[:
1
]
!=
'/'
or
str
(
nd
)[
-
1
:]
!=
'/'
):
node
.
add_error
(
"Lookbehind-parser can only be used with plain RegExp-"
"parsers, not with: "
+
str
(
nd
)
)
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
if
not
result
.
startswith
(
'RegExp('
):
self
.
deferred_tasks
.
append
(
lambda
:
check
(
node
))
...
...
@@ -713,7 +721,7 @@ class EBNFCompiler(Compiler):
else
:
self
.
current_symbols
.
append
(
node
)
if
symbol
not
in
self
.
symbols
:
self
.
symbols
[
symbol
]
=
node
self
.
symbols
[
symbol
]
=
node
# remember first use of symbol
if
symbol
in
self
.
rules
:
self
.
recursive
.
add
(
symbol
)
return
symbol
...
...
@@ -726,18 +734,22 @@ class EBNFCompiler(Compiler):
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
rx
=
str
(
node
)
name
=
[]
# type: List[str]
if
rx
[:
2
]
==
'~/'
:
if
not
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
'wL='
+
self
.
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[
1
:]
elif
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
"wL=''"
]
+
name
if
rx
[
-
2
:]
==
'/~'
:
if
'right'
not
in
self
.
directives
[
'literalws'
]:
name
=
[
'wR='
+
self
.
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[:
-
1
]
elif
'right'
in
self
.
directives
[
'literalws'
]:
name
=
[
"wR=''"
]
+
name
if
rx
[
0
]
==
'/'
and
rx
[
-
1
]
==
'/'
:
parser
=
'RegExp('
else
:
parser
=
'RE('
if
rx
[:
2
]
==
'~/'
:
if
not
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
'wL='
+
self
.
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[
1
:]
elif
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
"wL=''"
]
+
name
if
rx
[
-
2
:]
==
'/~'
:
if
'right'
not
in
self
.
directives
[
'literalws'
]:
name
=
[
'wR='
+
self
.
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[:
-
1
]
elif
'right'
in
self
.
directives
[
'literalws'
]:
name
=
[
"wR=''"
]
+
name
try
:
arg
=
repr
(
self
.
_check_rx
(
node
,
rx
[
1
:
-
1
].
replace
(
r
'\/'
,
'/'
)))
except
AttributeError
as
error
:
...
...
@@ -745,7 +757,7 @@ class EBNFCompiler(Compiler):
node
.
as_sxpr
()
node
.
add_error
(
errmsg
)
return
'"'
+
errmsg
+
'"'
return
'RE('
+
', '
.
join
([
arg
]
+
name
)
+
')'
return
parser
+
', '
.
join
([
arg
]
+
name
)
+
')'
def
on_list_
(
self
,
node
)
->
Set
[
str
]:
...
...
DHParser/parser.py
View file @
821cb67c
...
...
@@ -900,8 +900,8 @@ class RE(Parser):
return
None
,
text
def
__repr__
(
self
):
wL
=
'~'
if
self
.
wspLeft
else
''
wR
=
'~'
if
self
.
wspRight
else
''
wL
=
'~'
if
self
.
wspLeft
!=
ZOMBIE_PARSER
else
''
wR
=
'~'
if
self
.
wspRight
!=
ZOMBIE_PARSER
else
''
return
wL
+
'/%s/'
%
self
.
main
.
regexp
.
pattern
+
wR
def
_grammar_assigned_notifier
(
self
):
...
...
@@ -1240,7 +1240,11 @@ class NegativeLookahead(Lookahead):
class
Lookbehind
(
FlowOperator
):
"""EXPERIMENTAL!!!"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
assert
isinstance
(
parser
,
RegExp
)
p
=
parser
while
isinstance
(
p
,
Synonym
):
p
=
p
.
parser
assert
isinstance
(
p
,
RegExp
),
str
(
type
(
p
))
self
.
regexp
=
p
.
main
.
regexp
if
isinstance
(
p
,
RE
)
else
p
.
regexp
super
(
Lookbehind
,
self
).
__init__
(
parser
,
name
)
print
(
"WARNING: Lookbehind Operator is experimental!"
)
...
...
@@ -1258,7 +1262,7 @@ class Lookbehind(FlowOperator):
def
condition
(
self
):
node
=
self
.
grammar
.
last_node__
return
node
and
self
.
parser
.
regexp
.
match
(
str
(
node
))
return
node
and
self
.
regexp
.
match
(
str
(
node
))
class
NegativeLookbehind
(
Lookbehind
):
...
...
@@ -1336,7 +1340,7 @@ class Retrieve(Parser):
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
except
(
KeyError
,
IndexError
):
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
\
"'%s' undefined or exhausted."
%
self
.
symbol
.
name
)),
text
if
text
.
startswith
(
value
):
return
Node
(
self
,
value
),
text
[
len
(
value
):]
...
...
DHParser/syntaxtree.py
View file @
821cb67c
...
...
@@ -524,7 +524,8 @@ def compact_sxpr(s) -> str:
TransformationFunc
=
Union
[
Callable
[[
Node
],
Any
],
partial
]
if
__name__
==
"__main__"
:
st
=
mock_syntax_tree
(
"(alpha (beta (gamma i
\n
j
\n
k) (delta y)) (epsilon z))"
)
print
(
st
.
as_sxpr
())
print
(
st
.
as_xml
())
# if __name__ == "__main__":
# st = mock_syntax_tree("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
# print(st.as_sxpr())
# print(st.as_xml())
DevScripts/Readme-DevScripts.md
View file @
821cb67c
Folder "DevScripts"
===================
This folder contains helper scripts for the
development of DHParser.
This folder contains helper scripts for the development of DHParser.
*
collect_symbols.py - Lists all exported symbols from DHParser modules
...
...
examples/LaTeX/LaTeX.ebnf
View file @
821cb67c
...
...
@@ -53,12 +53,12 @@ Index = "\printindex" [PARSEP]
#### block environments ####
# TODO: ambiguity between generic block envieronments and generic inline environments
block_environment = known_environment | generic_environment
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_environment = begin_environment sequence §end_environment
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment &PRED_LB
end_generic_block = -&SUCC_LB end_environment &PRED_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -85,7 +85,9 @@ text_elements = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_environment { text_elements }+ §end_environment
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment !PRED_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment !PRED_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
...
...
@@ -139,7 +141,10 @@ BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ #
LF
but not an empty line
LF = !PARSEP /[ \t]*\n[ \t]*/ #
linefeed
but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
SUCC_LB = /(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
PRED_LB = /\s*?\n/ # linebreak preeceding any text
test/test_ebnf.py
View file @
821cb67c
...
...
@@ -305,6 +305,37 @@ class TestSynonymDetection:
assert
grammar
(
'b'
).
as_sxpr
().
count
(
'b'
)
==
2
class
TestFlowControlOperators
:
def
setup
(
self
):
self
.
t1
=
"""
All work and no play
makes Jack a dull boy
END
"""
self
.
t2
=
"All word and not play makes Jack a dull boy END
\n
"
def
test_lookbehind_indirect
(
self
):
lang
=
r
"""
document = ws sequence doc_end ws
sequence = { !end word ws }+
doc_end = -&SUCC_LB end
ws = /\s*/
end = /END/
word = /\w+/
SUCC_LB = indirection
indirection = /(?:.*\n)+\s*$/
"""
# result, messages, syntax_tree = compile_source(lang, None, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler('LookbehindTest'))
# print(result)
parser
=
grammar_provider
(
lang
)()
cst
=
parser
(
self
.
t1
)
assert
not
cst
.
error_flag
,
cst
.
as_sxpr
()
cst
=
parser
(
self
.
t2
)
# this should fail, because 'END' is not preceeded by a line feed
assert
cst
.
error_flag
,
cst
.
as_sxpr
()
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
test/test_parser
s
.py
→
test/test_parser.py
View file @
821cb67c
#!/usr/bin/python3
"""test_parser
s
.py - tests of the parsers-module of DHParser
"""test_parser.py - tests of the parsers-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
...
...
@@ -26,7 +26,7 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
is_logging
,
logging
,
compile_python_object
from
DHParser.parser
import
compile_source
,
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
Lookahead
,
NegativeLookahead
,
OneOrMore
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
...
...
@@ -99,6 +99,14 @@ class TestInfiLoopsAndRecursion:
class
TestFlowControl
:
def
setup
(
self
):
self
.
t1
=
"""
All work and no play
makes Jack a dull boy
END
"""
self
.
t2
=
"All word and not play makes Jack a dull boy END
\n
"
def
test_lookbehind
(
self
):
ws
=
RegExp
(
'\s*'
)
end
=
RegExp
(
"END"
)
...
...
@@ -108,15 +116,27 @@ class TestFlowControl:
document
=
ws
+
sequence
+
doc_end
+
ws
parser
=
Grammar
(
document
)
t1
=
"""
All work and no play
makes Jack a dull boy
END
"""
cst
=
parser
(
t1
)
cst
=
parser
(
self
.
t1
)
assert
not
cst
.
error_flag
,
cst
.
as_sxpr
()
cst
=
parser
(
self
.
t2
)
assert
cst
.
error_flag
,
cst
.
as_sxpr
()
def
test_lookbehind_indirect
(
self
):
class
LookbehindTestGrammar
(
Grammar
):
parser_initialization__
=
"upon instantiation"
ws
=
RegExp
(
'
\\
s*'
)
end
=
RegExp
(
'END'
)
SUCC_LB
=
RegExp
(
'(?:.*
\\
n)+
\\
s*$'
)
doc_end
=
Series
(
Lookbehind
(
SUCC_LB
),
end
)
word
=
RegExp
(
'\w+'
)
sequence
=
OneOrMore
(
Series
(
NegativeLookahead
(
end
),
word
,
ws
))
document
=
Series
(
ws
,
sequence
,
doc_end
,
ws
)
root__
=
document
parser
=
LookbehindTestGrammar
()
cst
=
parser
(
self
.
t1
)
assert
not
cst
.
error_flag
,
cst
.
as_sxpr
()
t2
=
"All word and not play makes Jack a dull boy END
\n
"
cst
=
parser
(
t2
)
cst
=
parser
(
self
.
t2
)
assert
cst
.
error_flag
,
cst
.
as_sxpr
()
...
...
test/test_testing.py
View file @
821cb67c
#!/usr/bin/python3
"""test_
parsers
.py - tests of the
parsers
-module of DHParser
"""test_
testing
.py - tests of the
testing
-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment