Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
ab1f1788
Commit
ab1f1788
authored
Jun 18, 2017
by
Eckhart Arnold
Browse files
- LaTeX more tests, Bug encountered (see TODO in class parsers.Grammar!)
parent
d1967501
Changes
7
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
ab1f1788
...
...
@@ -326,8 +326,8 @@ class EBNFCompiler(Compiler):
self
.
directives
=
{
'whitespace'
:
self
.
WHITESPACE
[
'horizontal'
],
'comment'
:
''
,
'literalws'
:
[
'right'
],
'tokens'
:
set
(),
# alt. 'scanner_tokens'
'filter'
:
dict
()}
# alt. '
retrieve_
filter'
'tokens'
:
set
(),
# alt. 'scanner_tokens'
'filter'
:
dict
()}
# alt. 'filter'
@
property
def
result
(
self
)
->
str
:
...
...
@@ -416,7 +416,7 @@ class EBNFCompiler(Compiler):
declarations
=
declarations
[:
-
1
]
declarations
.
append
(
'"""'
)
# add default functions for
retrieve_
filter filters of pop or retrieve operators
# add default functions for filter filters of pop or retrieve operators
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
...
...
@@ -586,7 +586,7 @@ class EBNFCompiler(Compiler):
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'filter'
]:
custom_args
=
[
'
retrieve_
filter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
custom_args
=
[
'filter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
self
.
variables
.
add
(
str
(
arg
))
# cast(str, arg.result)
elif
len
(
node
.
children
)
>
2
:
...
...
DHParser/parsers.py
View file @
ab1f1788
...
...
@@ -53,12 +53,11 @@ import abc
import
copy
import
os
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
from
typing
import
Any
,
Callable
,
Collection
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
from
typing
import
Any
,
Callable
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
ParserBase
,
\
...
...
@@ -342,7 +341,7 @@ class Grammar:
self
.
root__
.
apply
(
self
.
_add_parser
)
def
__getitem__
(
self
,
key
):
return
self
.
__dict__
[
key
]
return
getattr
(
self
,
key
)
def
_reset
(
self
):
# variables stored and recalled by Capture and Retrieve parsers
...
...
@@ -357,6 +356,7 @@ class Grammar:
# also needed for call stack tracing
self
.
moving_forward
=
True
# TODO: Either make sure not to miss out unconnected parsers or raise an error! Actually, the EBNF-Compiler should keep track of this!
def
_add_parser
(
self
,
parser
:
Parser
)
->
None
:
"""Adds the copy of the classes parser object to this
particular instance of Grammar.
...
...
@@ -389,7 +389,8 @@ class Grammar:
self
.
history_tracking
=
is_logging
()
self
.
document
=
document
parser
=
self
[
start_parser
]
if
isinstance
(
start_parser
,
str
)
else
start_parser
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a differen grammar object!"
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a different grammar object!"
\
" %s vs. %s"
%
(
str
(
self
),
str
(
parser
.
grammar
))
stitches
=
[]
# type: List[Node]
rest
=
document
if
not
rest
:
...
...
@@ -721,7 +722,7 @@ class NaryOperator(Parser):
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
NaryOperator
,
self
).
__init__
(
name
)
# assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self
.
parsers
=
parsers
# type: Co
llection
## [Parser]
self
.
parsers
=
parsers
# type: Co
ntainer
## [Parser]
def
__deepcopy__
(
self
,
memo
):
parsers
=
copy
.
deepcopy
(
self
.
parsers
,
memo
)
...
...
@@ -981,37 +982,37 @@ class Capture(UnaryOperator):
return
None
,
text
def
nop_filter
(
stack
):
RetrieveFilter
=
Callable
[[
List
[
str
]],
str
]
def
nop_filter
(
stack
:
List
[
str
])
->
str
:
return
stack
[
-
1
]
def
counterpart_filter
(
stack
)
:
def
counterpart_filter
(
stack
:
List
[
str
])
->
str
:
value
=
stack
[
-
1
]
return
value
.
replace
(
"("
,
")"
).
replace
(
"["
,
"]"
).
replace
(
"{"
,
"}"
).
replace
(
">"
,
"<"
)
def
accumulating_filter
(
stack
)
:
def
accumulating_filter
(
stack
:
List
[
str
])
->
str
:
return
""
.
join
(
stack
)
RetrFilter
=
Callable
[[
List
[
str
]],
str
]
class
Retrieve
(
Parser
):
def
__init__
(
self
,
symbol
:
Parser
,
retrieve_
filter
:
RetrFilter
=
None
,
name
:
str
=
''
)
->
None
:
def
__init__
(
self
,
symbol
:
Parser
,
filter
:
Retr
ieve
Filter
=
None
,
name
:
str
=
''
)
->
None
:
if
not
name
:
name
=
symbol
.
name
super
(
Retrieve
,
self
).
__init__
(
name
)
self
.
symbol
=
symbol
self
.
retrieve_filter
=
retrieve_
filter
if
retrieve_
filter
else
nop_filter
self
.
filter
=
filter
if
filter
else
nop_filter
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
symbol
,
self
.
retrieve_
filter
,
self
.
name
)
return
self
.
__class__
(
self
.
symbol
,
self
.
filter
,
self
.
name
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
try
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
value
=
self
.
retrieve_
filter
(
stack
)
value
=
self
.
filter
(
stack
)
self
.
pick_value
(
stack
)
except
(
KeyError
,
IndexError
):
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
...
...
DHParser/testing.py
View file @
ab1f1788
...
...
@@ -27,7 +27,7 @@ except ImportError:
import
re
from
DHParser
import
Node
,
error_messages
from
DHParser.toolkit
import
compact_sexpr
,
is_logging
,
log_dir
from
DHParser.toolkit
import
compact_sexpr
,
is_logging
from
DHParser.syntaxtree
import
MockParser
from
DHParser.ebnf
import
grammar_changed
from
DHParser.dsl
import
compile_on_disk
...
...
@@ -154,10 +154,10 @@ def unit_from_configfile(config_filename):
return
unit
def
unit_from_json
(
config
_filename
):
def
unit_from_json
(
json
_filename
):
"""Reads a grammar unit test from a json file.
"""
with
open
(
config
_filename
,
'r'
)
as
f
:
with
open
(
json
_filename
,
'r'
)
as
f
:
unit
=
json
.
load
(
f
)
for
symbol
in
unit
:
for
stage
in
unit
[
symbol
]:
...
...
@@ -168,17 +168,16 @@ def unit_from_json(config_filename):
# TODO: add support for yaml, cson, toml
def
unit_from_file
(
config_
filename
):
def
unit_from_file
(
filename
):
"""Reads a grammar unit test from a file. The format of the file is
determined by the ending of its name.
"""
fname
=
config_filename
if
fname
.
endswith
(
".json"
):
return
unit_from_json
(
fname
)
elif
fname
.
endswith
(
".ini"
):
return
unit_from_configfile
(
fname
)
if
filename
.
endswith
(
".json"
):
return
unit_from_json
(
filename
)
elif
filename
.
endswith
(
".ini"
):
return
unit_from_configfile
(
filename
)
else
:
raise
ValueError
(
"Unknown unit test file type: "
+
fname
[
fname
.
rfind
(
'.'
):])
raise
ValueError
(
"Unknown unit test file type: "
+
f
ile
name
[
f
ile
name
.
rfind
(
'.'
):])
def
report
(
test_unit
):
...
...
@@ -208,21 +207,27 @@ def report(test_unit):
return
'
\n
'
.
join
(
report
)
def
grammar_unit
(
test_unit
,
parser_factory
,
transformer_factory
):
def
grammar_unit
(
test_unit
,
parser_factory
,
transformer_factory
,
verbose
=
False
):
"""Unit tests for a grammar-parser and ast transformations.
"""
if
isinstance
(
test_unit
,
str
):
unit_name
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
test_unit
)[
0
])
unit_dir
,
unit_name
=
os
.
path
.
split
(
os
.
path
.
splitext
(
test_unit
)[
0
])
test_unit
=
unit_from_file
(
test_unit
)
else
:
unit_name
=
str
(
id
(
test_unit
))
if
verbose
:
print
(
"
\n
Unit: "
+
unit_name
)
errata
=
[]
parser
=
parser_factory
()
transform
=
transformer_factory
()
for
parser_name
,
tests
in
test_unit
.
items
():
assert
set
(
tests
.
keys
()).
issubset
(
UNIT_STAGES
)
if
verbose
:
print
(
' Match-Tests for parser "'
+
parser_name
+
'"'
)
for
test_name
,
test_code
in
tests
.
get
(
'match'
,
dict
()).
items
():
if
verbose
:
infostr
=
' match-test "'
+
test_name
+
'" ... '
errflag
=
len
(
errata
)
cst
=
parser
(
test_code
,
parser_name
)
tests
.
setdefault
(
'__cst__'
,
{})[
test_name
]
=
cst
if
"ast"
in
tests
or
is_logging
():
...
...
@@ -246,32 +251,48 @@ def grammar_unit(test_unit, parser_factory, transformer_factory):
compact_sexpr
(
compare
.
as_sexpr
()),
compact_sexpr
(
ast
.
as_sexpr
())))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
verbose
:
print
(
infostr
+
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
)
if
verbose
:
print
(
' Fail-Tests for parser "'
+
parser_name
+
'"'
)
for
test_name
,
test_code
in
tests
.
get
(
'fail'
,
dict
()).
items
():
if
verbose
:
infostr
=
' fail-test "'
+
test_name
+
'" ... '
errflag
=
len
(
errata
)
cst
=
parser
(
test_code
,
parser_name
)
if
not
cst
.
error_flag
:
errata
.
append
(
'Fail test "%s" for parser "%s" yields match instead of '
'expected failure!'
%
(
test_name
,
parser_name
))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
verbose
:
print
(
infostr
+
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
)
if
is_logging
():
with
open
(
os
.
path
.
join
(
log_dir
(),
unit_name
+
'.report'
),
'w'
)
as
f
:
report_dir
=
os
.
path
.
join
(
unit_dir
,
"REPORT"
)
if
not
os
.
path
.
exists
(
report_dir
):
os
.
mkdir
(
report_dir
)
with
open
(
os
.
path
.
join
(
report_dir
,
unit_name
+
'.report'
),
'w'
)
as
f
:
f
.
write
(
report
(
test_unit
))
return
errata
def
grammar_suite
(
directory
,
parser_factory
,
transformer_factory
,
ignore_unknown_filetypes
=
False
):
def
grammar_suite
(
directory
,
parser_factory
,
transformer_factory
,
ignore_unknown_filetypes
=
False
,
verbose
=
False
):
"""Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name.
"""
all_errors
=
collections
.
OrderedDict
()
if
verbose
:
print
(
"
\n
Scanning test-directory: "
+
directory
)
for
filename
in
sorted
(
os
.
listdir
(
directory
)):
if
filename
.
lower
().
find
(
"test"
)
>=
0
:
try
:
print
(
"Running grammar tests in: "
+
filename
)
if
verbose
:
print
(
"
\n
Running grammar tests from: "
+
filename
)
errata
=
grammar_unit
(
os
.
path
.
join
(
directory
,
filename
),
parser_factory
,
transformer_factory
)
parser_factory
,
transformer_factory
,
verbose
)
if
errata
:
all_errors
[
filename
]
=
errata
except
ValueError
as
e
:
...
...
@@ -284,7 +305,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown
for
error
in
all_errors
[
filename
]:
error_report
.
append
(
'
\t
'
+
'
\n\t
'
.
join
(
error
.
split
(
'
\n
'
)))
if
error_report
:
return
(
'Test suite "%s" revealed some errors:
\n
'
%
directory
)
+
'
\n
'
.
join
(
error_report
)
return
(
'Test suite "%s" revealed some errors:
\n
'
%
directory
)
+
'
\n
'
.
join
(
error_report
)
return
''
...
...
examples/LaTeX/LaTeX.ebnf
View file @
ab1f1788
...
...
@@ -29,7 +29,7 @@ word_sequence = { TEXTCHUNK WSPC }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\begin{figure}"
| "\begin{itemize}" |
"\item" |
"\begin{figure}"
CMDNAME = /\\\w+/~
NAME = /\w+/~
...
...
examples/LaTeX/grammar_tests/test_paragraph.ini
View file @
ab1f1788
...
...
@@ -3,5 +3,16 @@
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
[fail:paragraph]
1
:
\begin{enumerate}
2
:
\item
3
:
und
Vieh
; \paragraph
[match:sequence]
1
:
Im
allgemeinen
werden
die
Bewohner
Göttingens
eingeteilt
in
Studenten,
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
Im
allgemeinen
werden
die
Bewohner
Göttingens
eingeteilt
in
Studenten,
Professoren,
Philister
und
Vieh
; welche vier Stände doch nichts weniger
als
streng
geschieden
sind.
Der
Viehstand
ist
der
bedeutendste.
\ No newline at end of file
examples/LaTeX/t
e
st_grammar.py
→
examples/LaTeX/tst_grammar.py
View file @
ab1f1788
#!/usr/bin/python3
"""t
e
st_grammar.py - runs the unit tests for the LaTeX grammar
"""tst_grammar.py - runs the unit tests for the LaTeX grammar
Author: Eckhart Arnold <arnold@badw.de>
...
...
@@ -26,8 +26,8 @@ from DHParser import testing
from
DHParser
import
toolkit
from
LaTeXCompiler
import
get_grammar
,
get_transformer
with
toolkit
.
logging
():
error_report
=
testing
.
grammar_suite
(
'grammar_tests'
,
get_grammar
,
get_transformer
)
with
toolkit
.
logging
(
True
):
error_report
=
testing
.
grammar_suite
(
'grammar_tests'
,
get_grammar
,
get_transformer
,
verbose
=
True
)
assert
not
error_report
,
error_report
test/test_parsers.py
View file @
ab1f1788
...
...
@@ -120,25 +120,34 @@ class TestRegex:
class
TestGrammar
:
def
test_pos_values_initialized
(
self
):
# checks whether pos values in the parsing result and in the
# history record have been initialized
def
setup
(
self
):
grammar
=
r
"""@whitespace = horizontal
haupt = textzeile LEERZEILE
textzeile = { WORT }+
WORT = /[^ \t]+/~
LEERZEILE = /\n[ \t]*(?=\n)/~
"""
result
,
messages
,
syntax_tree
=
compile_source
(
grammar
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
"PosTest"
))
assert
result
self
.
pyparser
,
messages
,
syntax_tree
=
compile_source
(
grammar
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
"PosTest"
))
assert
self
.
pyparser
assert
not
messages
def
test_pos_values_initialized
(
self
):
# checks whether pos values in the parsing result and in the
# history record have been initialized
with
logging
(
"LOGS"
):
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
result
=
parser
(
"no_file_name*"
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
'\w+Grammar$'
)()
parser
(
"no_file_name*"
)
for
record
in
parser
.
history
:
assert
not
record
.
node
or
record
.
node
.
pos
>=
0
def
test_select_parsing
(
self
):
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
'\w+Grammar$'
)()
parser
(
"wort"
,
"WORT"
)
parser
(
"eine Zeile"
,
"textzeile"
)
parser
(
"kein Haupt"
,
"haupt"
)
parser
(
"so ist es richtig"
,
"haupt"
)
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment