Commit a88ec311 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- added custom parser for init file reading to preserve indentation.

parent 244e7067
......@@ -16,18 +16,19 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import collections
import configparser
# import configparser
import copy
import fnmatch
import inspect
import json
import os
import sys
from DHParser.toolkit import re
from DHParser.error import is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
from DHParser.parse import UnknownParserError
from DHParser.error import is_error, adjust_error_locations
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
from DHParser.toolkit import re
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -39,31 +40,86 @@ __all__ = ('unit_from_configfile',
UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
# def unit_from_configfile(config_filename):
# """
# Reads a grammar unit test from a config file.
# """
# cfg = configparser.ConfigParser(interpolation=None)
# cfg.read(config_filename, encoding="utf8")
# OD = collections.OrderedDict
# unit = OD()
# for section in cfg.sections():
# symbol, stage = section.split(':')
# if stage not in UNIT_STAGES:
# if symbol in UNIT_STAGES:
# symbol, stage = stage, symbol
# else:
# raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
# for testkey, testcode in cfg[section].items():
# if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
# testcode = testcode[3:-3]
# # testcode = testcode.replace('\\#', '#')
# testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
# elif testcode[:1] + testcode[-1:] in {"''", '""'}:
# testcode = testcode[1:-1]
# unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
# # print(json.dumps(unit, sort_keys=True, indent=4))
# return unit
RX_SECTION = re.compile('\s*\[(?P<stage>\w+):(?P<symbol>\w+)\]')
RE_VALUE = '(?:"""((?s:.*?))""")|' + "(?:'''((?s:.*?))''')|" + \
'(?:"(.*?)")|' + "(?:'(.*?)')|" + '(.*(?:\n(?:\s*\n)* .*)*)'
RX_ENTRY = re.compile('\s*(\w+)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
RX_COMMENT = re.compile('\s*#.*\n')
def unit_from_configfile(config_filename):
"""
Reads a grammar unit test from a config file.
"""
cfg = configparser.ConfigParser(interpolation=None)
cfg.read(config_filename, encoding="utf8")
def eat_comments(txt, pos):
m = RX_COMMENT.match(txt, pos)
while m:
pos = m.span()[1]
m = RX_COMMENT.match(txt, pos)
return pos
with open(config_filename, 'r') as f:
cfg = f.read()
cfg = cfg.replace('\t', ' ')
OD = collections.OrderedDict
unit = OD()
for section in cfg.sections():
symbol, stage = section.split(':')
pos = eat_comments(cfg, 0)
section_match = RX_SECTION.match(cfg, pos)
while section_match:
d = section_match.groupdict()
stage = d['stage']
if stage not in UNIT_STAGES:
if symbol in UNIT_STAGES:
symbol, stage = stage, symbol
else:
raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
for testkey, testcode in cfg[section].items():
if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
testcode = testcode[3:-3]
# testcode = testcode.replace('\\#', '#')
testcode = re.sub(r'(?<!\\)\\#', '#', testcode).replace('\\\\', '\\')
elif testcode[:1] + testcode[-1:] in {"''", '""'}:
testcode = testcode[1:-1]
raise KeyError('Unknown stage ' + stage + " ! must be one of: " + str(UNIT_STAGES))
symbol = d['symbol']
pos = eat_comments(cfg, section_match.span()[1])
entry_match = RX_ENTRY.match(cfg, pos)
if entry_match is None:
raise SyntaxError('No entries in section [%s:%s]' % (stage, symbol))
while entry_match:
testkey, testcode = [group for group in entry_match.groups() if group is not None]
lines = testcode.split('\n')
if len(lines) > 1:
indent = sys.maxsize
for line in lines[1:]:
indent = min(indent, len(line) - len(line.lstrip()))
for i in range(1, len(lines)):
lines[i] = lines[i][indent:]
testcode = '\n'.join(lines)
unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
# print(json.dumps(unit, sort_keys=True, indent=4))
pos = eat_comments(cfg, entry_match.span()[1])
entry_match = RX_ENTRY.match(cfg, pos)
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg):
raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))
return unit
......
......@@ -34,7 +34,7 @@ Match-test "1"
### Test-code:
### AST
(GAP
......@@ -54,8 +54,8 @@ Match-test "2"
### Test-code:
% Comment
% Comment
### AST
(GAP
......@@ -77,7 +77,7 @@ Match-test "3"
### AST
(GAP
......@@ -105,10 +105,10 @@ Fail-test "11"
### Test-code:
% Comment
% Comment
% Comment
% Comment
Test of parser: "PARSEP"
......@@ -121,7 +121,7 @@ Match-test "1"
### Test-code:
### AST
(PARSEP
......@@ -136,8 +136,8 @@ Match-test "2"
### Test-code:
% Comment
% Comment
### AST
(PARSEP
......@@ -153,7 +153,7 @@ Match-test "3"
### AST
(PARSEP
......@@ -168,10 +168,10 @@ Match-test "4"
### Test-code:
% Comment
% Comment
% Comment
% Comment
### AST
(PARSEP
......@@ -185,9 +185,9 @@ Match-test "5"
### Test-code:
% Comment
% Comment
% Comment
% Comment
% Comment
### AST
(PARSEP
......@@ -301,17 +301,17 @@ Match-test "6"
### Test-code:
% Comment
% Comment
% Comment
% Comment
### AST
(WSPC
(:RegExp
""
""
" "
)
(:RegExp
"% Comment"
......@@ -319,7 +319,7 @@ Match-test "6"
(:RegExp
""
""
""
" "
)
(:RegExp
"% Comment"
......@@ -327,7 +327,7 @@ Match-test "6"
(:RegExp
""
""
""
" "
)
)
......@@ -338,14 +338,14 @@ Match-test "7"
### AST
(WSPC
""
""
""
""
" "
)
Fail-test "10"
......@@ -406,7 +406,7 @@ Match-test "4"
### AST
(NEW_LINE
......@@ -495,4 +495,4 @@ Fail-test "10"
### Test-code:
\ No newline at end of file
\ No newline at end of file
......@@ -118,7 +118,7 @@ Match-test "3"
### Test-code:
Paragraphs are separated only by at least one blank line.
Therefore,
this line still belongs to the same paragraph.
this line still belongs to the same paragraph.
### AST
(paragraph
......@@ -141,7 +141,7 @@ Match-test "4"
% or like thist comment.
Comment lines do not break paragraphs.
% There can even be several comment lines,
% even indented comment lines,
% even indented comment lines,
in sequence.
### AST
......@@ -707,7 +707,7 @@ Fail-test "20"
### Test-code:
Paragraphs are separated by gaps.
Like this one.
Like this one.
Fail-test "21"
--------------
......@@ -748,7 +748,7 @@ Match-test "1"
### Test-code:
Paragraphs are separated by gaps.
Like this one.
Like this one.
### AST
(sequence
......@@ -777,7 +777,7 @@ Match-test "2"
The second paragraph follows after a long gap.
The parser should accept this, too.
The parser should accept this, too.
### AST
(sequence
......@@ -805,13 +805,13 @@ Match-test "3"
### Test-code:
Paragraphs can be delimited by % comment
% sequences of separators
% sequences of separators
% and comments
% or sequences of comment lines
% and comments
% or sequences of comment lines
In the end such a sequence counts
% merely as one comment
In the end such a sequence counts
% merely as one comment
### AST
(sequence
......
......@@ -9,14 +9,14 @@ Match-test "1"
### Test-code:
\begin{generic}
A generic block element is a block element
that is unknown to DHParser.
Unknown begin-end-structures are always
considered as block elements and not
as inline elements.
\end{generic}
A generic block element is a block element
that is unknown to DHParser.
Unknown begin-end-structures are always
considered as block elements and not
as inline elements.
\end{generic}
### AST
(generic_block
......@@ -224,15 +224,15 @@ Match-test "2"
### Test-code:
\begin{itemize}
\item But items may be
\item But items may be
\item separated by blank lines.
\item separated by blank lines.
\item
\item
Empty lines at the beginning of an item will be ignored.
Empty lines at the beginning of an item will be ignored.
\end{itemize}
\end{itemize}
### AST
(itemize
......@@ -253,11 +253,11 @@ Match-test "3"
### Test-code:
\begin{itemize}
\item Items can consist of
\item Items can consist of
several paragraphs.
\item Or of one paragraph
\end{itemize}
several paragraphs.
\item Or of one paragraph
\end{itemize}
### AST
(itemize)
......@@ -268,9 +268,9 @@ Match-test "4"
### Test-code:
\begin{itemize}
\item
\begin{itemize}
\item Item-lists can be nested!
\end{itemize}
\begin{itemize}
\item Item-lists can be nested!
\end{itemize}
\end{itemize}
### AST
......@@ -346,14 +346,14 @@ Match-test "2"
### Test-code:
\begin{enumerate}
\item \begin{itemize}
\item Item-lists and
\item Enumeration-lists
\begin{enumerate}
\item can be nested
\item arbitrarily
\end{enumerate}
\item Another item
\end{itemize}
\item Item-lists and
\item Enumeration-lists
\begin{enumerate}
\item can be nested
\item arbitrarily
\end{enumerate}
\item Another item
\end{itemize}
\item Plain numerated item.
\end{enumerate}
......@@ -367,18 +367,18 @@ Match-test "3"
\begin{enumerate} % comment
% more comments and paragraph separators
% yet some more
% more comments and paragraph separators
% yet some more
\item %another comment
finally, the first item
\item %another comment
finally, the first item
% comment
% comment
\end{enumerate}
\end{enumerate}
### AST
(enumerate
......@@ -399,13 +399,13 @@ Match-test "4"
### Test-code:
\begin{enumerate}
\item An item
\item An item
\begin{itemize}
\item with an enumeration
\end{itemize}
as a separate paragraph
\end{enumerate}
\begin{itemize}
\item with an enumeration
\end{itemize}
as a separate paragraph
\end{enumerate}
### AST
(enumerate
......
......@@ -10,9 +10,9 @@ Match-test "1"
### Test-code:
\subparagraph{A subparagraph} with some text
and consisting of several
and consisting of several
real paragraphs
real paragraphs
### AST
(SubParagraph
......@@ -64,15 +64,15 @@ Match-test "1"
### Test-code:
\paragraph{A paragraph consisting of several subparagraphs}
Some text ahead
Some text ahead
\subparagraph{subparagraph 1}
\subparagraph{subparagraph 1}
First subparagraph
First subparagraph
\subparagraph{subparagraph 2}
\subparagraph{subparagraph 2}
Second subparagraph
Second subparagraph
### AST
(Paragraph
......@@ -149,25 +149,25 @@ Match-test "1"
### Test-code:
\chapter{Chapter 1}
\section{Section 1}
\section{Section 2}
\section{Section 1}
\section{Section 2}
Section 2 contains some text
Section 2 contains some text
\section{Section 3}
\subsection{SubSection 1}
Text for subsection 1
\subsection{SubSection 2}
Text for subsection 2
\section{Section 3}
\subsection{SubSection 1}
Text for subsection 1
\subsection{SubSection 2}
Text for subsection 2
\subsubsection{A subsubsection}
Text for subsubsecion
\subsubsection{A subsubsection}
Text for subsubsecion
\section{Section 4}
\section{Section 4}
\chapter{Chapter 2}
\chapter{Chapter 2}
Some text for chapter 2
Some text for chapter 2
### AST
(Chapters
......
......@@ -9,9 +9,9 @@ Match-test "1"
### Test-code:
\begin{tabular}{c|c|}
& $S_1$ \\ \cline{1-2}
$A_1$ & $r_1$ \\ \cline{1-2}
$A_2$ & $r_2$ \\ \cline{1-2}
& $S_1$ \\ \cline{1-2}
$A_1$ & $r_1$ \\ \cline{1-2}
$A_2$ & $r_2$ \\ \cline{1-2}
\end{tabular}
### AST
......@@ -152,16 +152,16 @@ Match-test "2"
### Test-code:
\begin{tabular}{c|c|c|c|cc|c|c|c|}
\multicolumn{1}{c}{} & \multicolumn{3}{c}{Tabelle 1:} &
\multicolumn{2}{c}{} & \multicolumn{3}{c}{Tabelle 2:}
\\
\cline{2-4} \cline{7-9}
$A_1$ & 7 & 0 & 4 & & $A_1$ & 5 & 20 & 6 \\
\cline{2-4} \cline{7-9}
$A_2$ & 5 & 21 & 11 & & $A_2$ & -3 & 8 & 10 \\
\cline{2-4} \cline{7-9}
$A_3$ & 10 & -5 & -1 & & $A_3$ & 4 & 5 & 9 \\
\cline{2-4} \cline{7-9}
\multicolumn{1}{c}{} & \multicolumn{3}{c}{Tabelle 1:} &
\multicolumn{2}{c}{} & \multicolumn{3}{c}{Tabelle 2:}
\\
\cline{2-4} \cline{7-9}
$A_1$ & 7 & 0 & 4 & & $A_1$ & 5 & 20 & 6 \\
\cline{2-4} \cline{7-9}
$A_2$ & 5 & 21 & 11 & & $A_2$ & -3 & 8 & 10 \\
\cline{2-4} \cline{7-9}
$A_3$ & 10 & -5 & -1 & & $A_3$ & 4 & 5 & 9 \\
\cline{2-4} \cline{7-9}
\end{tabular}
### AST
......@@ -630,19 +630,19 @@ Match-test "1"
### Test-code:
\begin{figure}
\doublespacing
\begin{center}
\begin{tabular}{l|c|c|c|}
\multicolumn{1}{c}{ } & \multicolumn{1}{c}{ } & \multicolumn{2}{c}{$\overbrace{\hspace{7cm}}^{Experiments}$} \\ \cline{2-4}
& {\bf computer simulation} & {\bf analog simulation} & {\bf plain experiment} \\ \hline
materiality of object
& semantic & \multicolumn{2}{c|}{material} \\ \hline
relation to target
& \multicolumn{2}{c|}{representation} & representative \\ \hline
\multicolumn{1}{c}{ } & \multicolumn{2}{c}{$\underbrace{\hspace{7cm}}_{Simulations}$} & \multicolumn{1}{c}{ } \\
\end{tabular}
\end{center}
\caption{Conceptual relation of simulations and experiments}\label{SimulationExperimentsScheme}
\doublespacing
\begin{center}
\begin{tabular}{l|c|c|c|}
\multicolumn{1}{c}{ } & \multicolumn{1}{c}{ } & \multicolumn{2}{c}{$\overbrace{\hspace{7cm}}^{Experiments}$} \\ \cline{2-4}
& {\bf computer simulation} & {\bf analog simulation} & {\bf plain experiment} \\ \hline
materiality of object
& semantic & \multicolumn{2}{c|}{material} \\ \hline
relation to target