Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
a88ec311
Commit
a88ec311
authored
Feb 16, 2018
by
Eckhart Arnold
Browse files
- added custom parser for init file reading to preserve indentation.
parent
244e7067
Changes
18
Hide whitespace changes
Inline
Side-by-side
DHParser/testing.py
View file @
a88ec311
...
...
@@ -16,18 +16,19 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import
collections
import
configparser
#
import configparser
import
copy
import
fnmatch
import
inspect
import
json
import
os
import
sys
from
DHParser.
toolkit
import
re
from
DHParser.
error
import
is_error
,
adjust_error_locations
from
DHParser.log
import
is_logging
,
clear_logs
,
log_ST
,
log_parsing_history
from
DHParser.syntaxtree
import
Node
,
mock_syntax_tree
,
flatten_sxpr
,
ZOMBIE_PARSER
from
DHParser.parse
import
UnknownParserError
from
DHParser.error
import
is_error
,
adjust_error_locations
from
DHParser.syntaxtree
import
Node
,
mock_syntax_tree
,
flatten_sxpr
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
re
__all__
=
(
'unit_from_configfile'
,
'unit_from_json'
,
...
...
@@ -39,31 +40,86 @@ __all__ = ('unit_from_configfile',
UNIT_STAGES
=
{
'match'
,
'fail'
,
'ast'
,
'cst'
,
'__ast__'
,
'__cst__'
}
# def unit_from_configfile(config_filename):
# """
# Reads a grammar unit test from a config file.
# """
# cfg = configparser.ConfigParser(interpolation=None)
# cfg.read(config_filename, encoding="utf8")
# OD = collections.OrderedDict
# unit = OD()
# for section in cfg.sections():
# symbol, stage = section.split(':')
# if stage not in UNIT_STAGES:
# if symbol in UNIT_STAGES:
# symbol, stage = stage, symbol
# else:
# raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
# for testkey, testcode in cfg[section].items():
# if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
# testcode = testcode[3:-3]
# # testcode = testcode.replace('\\#', '#')
# testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
# elif testcode[:1] + testcode[-1:] in {"''", '""'}:
# testcode = testcode[1:-1]
# unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
# # print(json.dumps(unit, sort_keys=True, indent=4))
# return unit
RX_SECTION
=
re
.
compile
(
'\s*\[(?P<stage>\w+):(?P<symbol>\w+)\]'
)
RE_VALUE
=
'(?:"""((?s:.*?))""")|'
+
"(?:'''((?s:.*?))''')|"
+
\
'(?:"(.*?)")|'
+
"(?:'(.*?)')|"
+
'(.*(?:
\n
(?:\s*
\n
)* .*)*)'
RX_ENTRY
=
re
.
compile
(
'\s*(\w+)\s*:\s*(?:{value})\s*'
.
format
(
value
=
RE_VALUE
))
RX_COMMENT
=
re
.
compile
(
'\s*#.*
\n
'
)
def
unit_from_configfile
(
config_filename
):
"""
Reads a grammar unit test from a config file.
"""
cfg
=
configparser
.
ConfigParser
(
interpolation
=
None
)
cfg
.
read
(
config_filename
,
encoding
=
"utf8"
)
def
eat_comments
(
txt
,
pos
):
m
=
RX_COMMENT
.
match
(
txt
,
pos
)
while
m
:
pos
=
m
.
span
()[
1
]
m
=
RX_COMMENT
.
match
(
txt
,
pos
)
return
pos
with
open
(
config_filename
,
'r'
)
as
f
:
cfg
=
f
.
read
()
cfg
=
cfg
.
replace
(
'
\t
'
,
' '
)
OD
=
collections
.
OrderedDict
unit
=
OD
()
for
section
in
cfg
.
sections
():
symbol
,
stage
=
section
.
split
(
':'
)
pos
=
eat_comments
(
cfg
,
0
)
section_match
=
RX_SECTION
.
match
(
cfg
,
pos
)
while
section_match
:
d
=
section_match
.
groupdict
()
stage
=
d
[
'stage'
]
if
stage
not
in
UNIT_STAGES
:
if
symbol
in
UNIT_STAGES
:
symbol
,
stage
=
stage
,
symbol
else
:
raise
ValueError
(
'Test stage %s not in: '
%
(
stage
,
str
(
UNIT_STAGES
)))
for
testkey
,
testcode
in
cfg
[
section
].
items
():
if
testcode
[:
3
]
+
testcode
[
-
3
:]
in
{
"''''''"
,
'""""""'
}:
testcode
=
testcode
[
3
:
-
3
]
# testcode = testcode.replace('\\#', '#')
testcode
=
re
.
sub
(
r
'(?<!\\)\\#'
,
'#'
,
testcode
).
replace
(
'
\\\\
'
,
'
\\
'
)
elif
testcode
[:
1
]
+
testcode
[
-
1
:]
in
{
"''"
,
'""'
}:
testcode
=
testcode
[
1
:
-
1
]
raise
KeyError
(
'Unknown stage '
+
stage
+
" ! must be one of: "
+
str
(
UNIT_STAGES
))
symbol
=
d
[
'symbol'
]
pos
=
eat_comments
(
cfg
,
section_match
.
span
()[
1
])
entry_match
=
RX_ENTRY
.
match
(
cfg
,
pos
)
if
entry_match
is
None
:
raise
SyntaxError
(
'No entries in section [%s:%s]'
%
(
stage
,
symbol
))
while
entry_match
:
testkey
,
testcode
=
[
group
for
group
in
entry_match
.
groups
()
if
group
is
not
None
]
lines
=
testcode
.
split
(
'
\n
'
)
if
len
(
lines
)
>
1
:
indent
=
sys
.
maxsize
for
line
in
lines
[
1
:]:
indent
=
min
(
indent
,
len
(
line
)
-
len
(
line
.
lstrip
()))
for
i
in
range
(
1
,
len
(
lines
)):
lines
[
i
]
=
lines
[
i
][
indent
:]
testcode
=
'
\n
'
.
join
(
lines
)
unit
.
setdefault
(
symbol
,
OD
()).
setdefault
(
stage
,
OD
())[
testkey
]
=
testcode
# print(json.dumps(unit, sort_keys=True, indent=4))
pos
=
eat_comments
(
cfg
,
entry_match
.
span
()[
1
])
entry_match
=
RX_ENTRY
.
match
(
cfg
,
pos
)
section_match
=
RX_SECTION
.
match
(
cfg
,
pos
)
if
pos
!=
len
(
cfg
):
raise
SyntaxError
(
'in file %s in line %i'
%
(
config_filename
,
cfg
[:
pos
].
count
(
'
\n
'
)
+
1
))
return
unit
...
...
examples/LaTeX/grammar_tests/REPORT/00_test_regexes.md
View file @
a88ec311
...
...
@@ -34,7 +34,7 @@ Match-test "1"
### Test-code:
### AST
(GAP
...
...
@@ -54,8 +54,8 @@ Match-test "2"
### Test-code:
% Comment
% Comment
### AST
(GAP
...
...
@@ -77,7 +77,7 @@ Match-test "3"
### AST
(GAP
...
...
@@ -105,10 +105,10 @@ Fail-test "11"
### Test-code:
% Comment
% Comment
% Comment
% Comment
Test of parser: "PARSEP"
...
...
@@ -121,7 +121,7 @@ Match-test "1"
### Test-code:
### AST
(PARSEP
...
...
@@ -136,8 +136,8 @@ Match-test "2"
### Test-code:
% Comment
% Comment
### AST
(PARSEP
...
...
@@ -153,7 +153,7 @@ Match-test "3"
### AST
(PARSEP
...
...
@@ -168,10 +168,10 @@ Match-test "4"
### Test-code:
% Comment
% Comment
% Comment
% Comment
### AST
(PARSEP
...
...
@@ -185,9 +185,9 @@ Match-test "5"
### Test-code:
% Comment
% Comment
% Comment
% Comment
% Comment
### AST
(PARSEP
...
...
@@ -301,17 +301,17 @@ Match-test "6"
### Test-code:
% Comment
% Comment
% Comment
% Comment
### AST
(WSPC
(:RegExp
""
""
"
"
)
(:RegExp
"% Comment"
...
...
@@ -319,7 +319,7 @@ Match-test "6"
(:RegExp
""
""
""
"
"
)
(:RegExp
"% Comment"
...
...
@@ -327,7 +327,7 @@ Match-test "6"
(:RegExp
""
""
""
"
"
)
)
...
...
@@ -338,14 +338,14 @@ Match-test "7"
### AST
(WSPC
""
""
""
""
"
"
)
Fail-test "10"
...
...
@@ -406,7 +406,7 @@ Match-test "4"
### AST
(NEW_LINE
...
...
@@ -495,4 +495,4 @@ Fail-test "10"
### Test-code:
\ No newline at end of file
\ No newline at end of file
examples/LaTeX/grammar_tests/REPORT/02_test_paragraph.md
View file @
a88ec311
...
...
@@ -118,7 +118,7 @@ Match-test "3"
### Test-code:
Paragraphs are separated only by at least one blank line.
Therefore,
this line still belongs to the same paragraph.
this line still belongs to the same paragraph.
### AST
(paragraph
...
...
@@ -141,7 +141,7 @@ Match-test "4"
% or like thist comment.
Comment lines do not break paragraphs.
% There can even be several comment lines,
% even indented comment lines,
% even indented comment lines,
in sequence.
### AST
...
...
@@ -707,7 +707,7 @@ Fail-test "20"
### Test-code:
Paragraphs are separated by gaps.
Like this one.
Like this one.
Fail-test "21"
--------------
...
...
@@ -748,7 +748,7 @@ Match-test "1"
### Test-code:
Paragraphs are separated by gaps.
Like this one.
Like this one.
### AST
(sequence
...
...
@@ -777,7 +777,7 @@ Match-test "2"
The second paragraph follows after a long gap.
The parser should accept this, too.
The parser should accept this, too.
### AST
(sequence
...
...
@@ -805,13 +805,13 @@ Match-test "3"
### Test-code:
Paragraphs can be delimited by % comment
% sequences of separators
% sequences of separators
% and comments
% or sequences of comment lines
% and comments
% or sequences of comment lines
In the end such a sequence counts
% merely as one comment
In the end such a sequence counts
% merely as one comment
### AST
(sequence
...
...
examples/LaTeX/grammar_tests/REPORT/03_test_environment.md
View file @
a88ec311
...
...
@@ -9,14 +9,14 @@ Match-test "1"
### Test-code:
\b
egin{generic}
A generic block element is a block element
that is unknown to DHParser.
Unknown begin-end-structures are always
considered as block elements and not
as inline elements.
\e
nd{generic}
A generic block element is a block element
that is unknown to DHParser.
Unknown begin-end-structures are always
considered as block elements and not
as inline elements.
\e
nd{generic}
### AST
(generic_block
...
...
@@ -224,15 +224,15 @@ Match-test "2"
### Test-code:
\b
egin{itemize}
\i
tem But items may be
\i
tem But items may be
\i
tem separated by blank lines.
\i
tem separated by blank lines.
\i
tem
\i
tem
Empty lines at the beginning of an item will be ignored.
Empty lines at the beginning of an item will be ignored.
\e
nd{itemize}
\e
nd{itemize}
### AST
(itemize
...
...
@@ -253,11 +253,11 @@ Match-test "3"
### Test-code:
\b
egin{itemize}
\i
tem Items can consist of
\i
tem Items can consist of
several paragraphs.
\i
tem Or of one paragraph
\e
nd{itemize}
several paragraphs.
\i
tem Or of one paragraph
\e
nd{itemize}
### AST
(itemize)
...
...
@@ -268,9 +268,9 @@ Match-test "4"
### Test-code:
\b
egin{itemize}
\i
tem
\b
egin{itemize}
\i
tem Item-lists can be nested!
\e
nd{itemize}
\b
egin{itemize}
\i
tem Item-lists can be nested!
\e
nd{itemize}
\e
nd{itemize}
### AST
...
...
@@ -346,14 +346,14 @@ Match-test "2"
### Test-code:
\b
egin{enumerate}
\i
tem
\b
egin{itemize}
\i
tem Item-lists and
\i
tem Enumeration-lists
\b
egin{enumerate}
\i
tem can be nested
\i
tem arbitrarily
\e
nd{enumerate}
\i
tem Another item
\e
nd{itemize}
\i
tem Item-lists and
\i
tem Enumeration-lists
\b
egin{enumerate}
\i
tem can be nested
\i
tem arbitrarily
\e
nd{enumerate}
\i
tem Another item
\e
nd{itemize}
\i
tem Plain numerated item.
\e
nd{enumerate}
...
...
@@ -367,18 +367,18 @@ Match-test "3"
\b
egin{enumerate} % comment
% more comments and paragraph separators
% yet some more
% more comments and paragraph separators
% yet some more
\i
tem %another comment
finally, the first item
\i
tem %another comment
finally, the first item
% comment
% comment
\e
nd{enumerate}
\e
nd{enumerate}
### AST
(enumerate
...
...
@@ -399,13 +399,13 @@ Match-test "4"
### Test-code:
\b
egin{enumerate}
\i
tem An item
\i
tem An item
\b
egin{itemize}
\i
tem with an enumeration
\e
nd{itemize}
as a separate paragraph
\e
nd{enumerate}
\b
egin{itemize}
\i
tem with an enumeration
\e
nd{itemize}
as a separate paragraph
\e
nd{enumerate}
### AST
(enumerate
...
...
examples/LaTeX/grammar_tests/REPORT/04_test_structure.md
View file @
a88ec311
...
...
@@ -10,9 +10,9 @@ Match-test "1"
### Test-code:
\s
ubparagraph{A subparagraph} with some text
and consisting of several
and consisting of several
real paragraphs
real paragraphs
### AST
(SubParagraph
...
...
@@ -64,15 +64,15 @@ Match-test "1"
### Test-code:
\p
aragraph{A paragraph consisting of several subparagraphs}
Some text ahead
Some text ahead
\s
ubparagraph{subparagraph 1}
\s
ubparagraph{subparagraph 1}
First subparagraph
First subparagraph
\s
ubparagraph{subparagraph 2}
\s
ubparagraph{subparagraph 2}
Second subparagraph
Second subparagraph
### AST
(Paragraph
...
...
@@ -149,25 +149,25 @@ Match-test "1"
### Test-code:
\c
hapter{Chapter 1}
\s
ection{Section 1}
\s
ection{Section 2}
\s
ection{Section 1}
\s
ection{Section 2}
Section 2 contains some text
Section 2 contains some text
\s
ection{Section 3}
\s
ubsection{SubSection 1}
Text for subsection 1
\s
ubsection{SubSection 2}
Text for subsection 2
\s
ection{Section 3}
\s
ubsection{SubSection 1}
Text for subsection 1
\s
ubsection{SubSection 2}
Text for subsection 2
\s
ubsubsection{A subsubsection}
Text for subsubsecion
\s
ubsubsection{A subsubsection}
Text for subsubsecion
\s
ection{Section 4}
\s
ection{Section 4}
\c
hapter{Chapter 2}
\c
hapter{Chapter 2}
Some text for chapter 2
Some text for chapter 2
### AST
(Chapters
...
...
examples/LaTeX/grammar_tests/REPORT/06_test_tabular.md
View file @
a88ec311
...
...
@@ -9,9 +9,9 @@ Match-test "1"
### Test-code:
\b
egin{tabular}{c|c|}
& $S_1$
\\
\c
line{1-2}
$A_1$ & $r_1$
\\
\c
line{1-2}
$A_2$ & $r_2$
\\
\c
line{1-2}
& $S_1$
\\
\c
line{1-2}
$A_1$ & $r_1$
\\
\c
line{1-2}
$A_2$ & $r_2$
\\
\c
line{1-2}
\e
nd{tabular}
### AST
...
...
@@ -152,16 +152,16 @@ Match-test "2"
### Test-code:
\b
egin{tabular}{c|c|c|c|cc|c|c|c|}
\m
ulticolumn{1}{c}{} &
\m
ulticolumn{3}{c}{Tabelle 1:} &
\m
ulticolumn{2}{c}{} &
\m
ulticolumn{3}{c}{Tabelle 2:}
\\
\c
line{2-4}
\c
line{7-9}
$A_1$ & 7 & 0 & 4 & & $A_1$ & 5 & 20 & 6
\\
\c
line{2-4}
\c
line{7-9}
$A_2$ & 5 & 21 & 11 & & $A_2$ & -3 & 8 & 10
\\
\c
line{2-4}
\c
line{7-9}
$A_3$ & 10 & -5 & -1 & & $A_3$ & 4 & 5 & 9
\\
\c
line{2-4}
\c
line{7-9}
\m
ulticolumn{1}{c}{} &
\m
ulticolumn{3}{c}{Tabelle 1:} &
\m
ulticolumn{2}{c}{} &
\m
ulticolumn{3}{c}{Tabelle 2:}
\\
\c
line{2-4}
\c
line{7-9}
$A_1$ & 7 & 0 & 4 & & $A_1$ & 5 & 20 & 6
\\
\c
line{2-4}
\c
line{7-9}
$A_2$ & 5 & 21 & 11 & & $A_2$ & -3 & 8 & 10
\\
\c
line{2-4}
\c
line{7-9}
$A_3$ & 10 & -5 & -1 & & $A_3$ & 4 & 5 & 9
\\
\c
line{2-4}
\c
line{7-9}
\e
nd{tabular}
### AST
...
...
@@ -630,19 +630,19 @@ Match-test "1"
### Test-code:
\b
egin{figure}
\d
oublespacing
\b
egin{center}
\b
egin{tabular}{l|c|c|c|}
\m
ulticolumn{1}{c}{ } &
\m
ulticolumn{1}{c}{ } &
\m
ulticolumn{2}{c}{$
\o
verbrace{
\h
space{7cm}}^{Experiments}$}
\\
\c
line{2-4}
& {
\b
f computer simulation} & {
\b
f analog simulation} & {
\b
f plain experiment}
\\
\h
line
materiality of object
& semantic &
\m
ulticolumn{2}{c|}{material}
\\
\h
line
relation to target
&
\m
ulticolumn{2}{c|}{representation} & representative
\\
\h
line
\m
ulticolumn{1}{c}{ } &
\m
ulticolumn{2}{c}{$
\u
nderbrace{
\h
space{7cm}}_{Simulations}$} &
\m
ulticolumn{1}{c}{ }
\\
\e
nd{tabular}
\e
nd{center}
\c
aption{Conceptual relation of simulations and experiments}
\l
abel{SimulationExperimentsScheme}
\d
oublespacing
\b
egin{center}
\b
egin{tabular}{l|c|c|c|}
\m
ulticolumn{1}{c}{ } &
\m
ulticolumn{1}{c}{ } &
\m
ulticolumn{2}{c}{$
\o
verbrace{
\h
space{7cm}}^{Experiments}$}
\\
\c
line{2-4}
& {
\b
f computer simulation} & {
\b
f analog simulation} & {
\b
f plain experiment}
\\
\h
line
materiality of object
& semantic &
\m
ulticolumn{2}{c|}{material}
\\
\h
line
relation to target