05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit a4bed6e6 authored by Eckhart Arnold's avatar Eckhart Arnold

- Refactoring LaTeX.ebnf

parent a765df39
......@@ -133,12 +133,12 @@ StrictResultType = Union[ChildrenType, str]
ResultType = Union[ChildrenType, 'Node', str, None]
def oneliner_sxpr(sxpr: str) -> str:
"""Returns S-expression `sxpr` as a one liner without unnecessary
def flatten_sxpr(sxpr: str) -> str:
"""Returns S-expression `sxpr` as a one-liner without unnecessary
whitespace.
Example:
>>> oneliner_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
>>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', sxpr)).strip()
......@@ -199,11 +199,13 @@ class Node:
self._pos = -1 # type: int
self.parser = parser or ZOMBIE_PARSER
def __str__(self):
if self.children:
return "".join(str(child) for child in self.children)
return str(self.result)
def __repr__(self):
mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype}
parg = "MockParser({name}, {ptype})".format(**mpargs)
......@@ -211,30 +213,35 @@ class Node:
"(" + ", ".join(repr(child) for child in self.children) + ")"
return "Node(%s, %s)" % (parg, rarg)
def __eq__(self, other):
# return str(self.parser) == str(other.parser) and self.result == other.result
return self.tag_name == other.tag_name and self.result == other.result
def __hash__(self):
return hash(self.tag_name)
def __deepcopy__(self, memodict={}):
result = copy.deepcopy(self.result)
other = Node(self.parser, result)
other._pos = self._pos
return other
@property # this needs to be a (dynamic) property, in case sef.parser gets updated
def tag_name(self) -> str:
return self.parser.name or self.parser.ptype
@property
def result(self) -> StrictResultType:
return self._result
@result.setter
def result(self, result: ResultType):
# # made obsolete by static type checking with mypy is done
# # made obsolete by static type checking with mypy
# assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result))
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
......@@ -244,15 +251,18 @@ class Node:
self.error_flag = any(r.error_flag for r in self._children) \
if self._children else False # type: bool
@property
def children(self) -> ChildrenType:
return self._children
@property
def len(self) -> int:
# DEBUGGING: print(self.tag_name, str(self.pos), str(self._len), str(self)[:10].replace('\n','.'))
return self._len
@property
def pos(self) -> int:
assert self._pos >= 0, "position value not initialized!"
......@@ -267,16 +277,19 @@ class Node:
child.pos = pos + offset
offset += child.len
@property
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str)
self.error_flag = True
return self
def propagate_error_flags(self) -> None:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
......@@ -286,6 +299,7 @@ class Node:
child.propagate_error_flags()
self.error_flag = self.error_flag or child.error_flag
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Returns all errors of this node or any child node in the form
......@@ -301,6 +315,7 @@ class Node:
errors.extend(child.collect_errors(clear_errors))
return errors
def _tree_repr(self, tab, openF, closeF, dataF=identity, density=0) -> str:
"""
Generates a tree representation of this node and its children
......@@ -346,6 +361,7 @@ class Node:
else:
return head + '\n'.join([tab + dataF(s) for s in res.split('\n')]) + tail.lstrip(D)
def as_sxpr(self, src: str=None) -> str:
"""
Returns content as S-expression, i.e. in lisp-like form.
......@@ -373,6 +389,7 @@ class Node:
return self._tree_repr(' ', opening, lambda node: '\n)', pretty, density=0)
def as_xml(self, src: str=None) -> str:
"""
Returns content as XML-tree.
......@@ -397,10 +414,12 @@ class Node:
return self._tree_repr(' ', opening, closing, density=1)
def structure(self) -> str:
"""Return structure (and content) as S-expression on a single line
without any line breaks."""
return oneliner_sxpr(self.as_sxpr())
return flatten_sxpr(self.as_sxpr())
def content(self) -> str:
"""
......@@ -412,6 +431,7 @@ class Node:
return (
' <<< Error on "%s" | %s >>> ' % (s, '; '.join(self._errors))) if self._errors else s
def find(self, match_function: Callable) -> Iterator['Node']:
"""Finds nodes in the tree that match a specific criterion.
......@@ -433,6 +453,7 @@ class Node:
for nd in child.find(match_function):
yield nd
# def range(self, match_first, match_last):
# """Iterates over the range of nodes, starting from the first
# node for which ``match_first`` becomes True until the first node
......@@ -473,13 +494,14 @@ class Node:
# return self.result,
# return nav(path.split('/'))
def log(self, log_file_name):
if is_logging():
st_file_name = log_file_name
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
with open(os.path.join(log_dir(), log_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr())
def mock_syntax_tree(sxpr):
"""
Generates a tree of nodes from an S-expression.
......
......@@ -27,8 +27,7 @@ except ImportError:
import re
from DHParser import error_messages
from DHParser.toolkit import is_logging
from DHParser.syntaxtree import mock_syntax_tree, oneliner_sxpr
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -150,15 +149,15 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr = ' match-test "' + test_name + '" ... '
errflag = len(errata)
cst = parser(test_code, parser_name)
cst.log("match_%s_%s.cst" % (parser_name, test_name))
cst.log("%s_match_%s_%s.cst" % (unit_name, parser_name, test_name))
tests.setdefault('__cst__', {})[test_name] = cst
if "ast" in tests or report:
ast = copy.deepcopy(cst)
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
ast.log("match_%s_%s.ast" % (parser_name, test_name))
ast.log("%s_match_%s_%s.ast" % (unit_name, parser_name, test_name))
if cst.error_flag:
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s' %
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(m.replace('\n', '\n\t\t') for m in
error_messages(test_code, cst.collect_errors()))))
......@@ -174,8 +173,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
oneliner_sxpr(compare.as_sxpr()),
oneliner_sxpr(ast.as_sxpr())))
flatten_sxpr(compare.as_sxpr()),
flatten_sxpr(ast.as_sxpr())))
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose:
print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
......@@ -187,8 +186,6 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr = ' fail-test "' + test_name + '" ... '
errflag = len(errata)
cst = parser(test_code, parser_name)
# doesn't make sense to write cst for fail-tests
# cst.log("fail_%s_%s.cst" % (parser_name, test_name))
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
......
......@@ -54,11 +54,11 @@ Index = "\printindex" [PARSEP]
#### block environments ####
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
known_environment = itemize | enumerate | figure | tabular | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment -&LB
begin_generic_block = -&LB begin_environment LFF
end_generic_block = -&LB end_environment LFF
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -68,13 +68,13 @@ figure = "\begin{figure}" sequence §"\end{figure}"
quotation = ("\begin{quotation}" sequence §"\end{quotation}")
| ("\begin{quote}" sequence §"\end{quote}")
verbatim = "\begin{verbatim}" sequence §"\end{verbatim}"
table = "\begin{tabular}" table_config sequence §"\end{tabular}"
table_config = "{" /[lcr|]+/~ §"}"
tabular = "\begin{tabular}" tabular_config sequence §"\end{tabular}"
tabular_config = "{" /[lcr|]+/~ §"}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
block_of_paragraphs = /{/~ sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element //~ }+
......@@ -85,20 +85,21 @@ text_element = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = (begin_inline_env { text_element }+ §end_inline_env)
begin_inline_env = (-!LB begin_environment) | (begin_environment -!LB)
generic_inline_env = begin_inline_env //~ paragraph §end_inline_env
begin_inline_env = (-!LB begin_environment) | (begin_environment !LFF)
end_inline_env = end_environment
# (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
## (-!LB end_environment) | (end_environment !LFF) # ambiguity with genric_block when EOF
begin_environment = /\\begin{/ §NAME §/}/
end_environment = /\\end{/ §::NAME §/}/
inline_math = "$" /[^$]*/ §"$"
inline_math = /\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | generic_command
command = known_command | text_command | generic_command
known_command = footnote | includegraphics | caption
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
footnote = "\footnote" block_of_paragraphs
......@@ -113,12 +114,9 @@ caption = "\caption" block
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
config = "[" text §"]"
block = /{/ //~ { !blockcmd text_element //~ } §/}/
text = TEXTCHUNK { //~ TEXTCHUNK }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
......@@ -138,13 +136,17 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
CMDNAME = /\\(?:(?!_)\w)+/~
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
NAME = /\w+/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = //~ -&LB WSPC # at least one linefeed
WSPC = { ~/\s+/~ } # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
......
This diff is collapsed.
# latex Grammar
@ whitespace = /[ \t]*\n?(?!\s*\n)[ \t]*/ # whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
genericenv = beginenv sequence §endenv
beginenv = "\begin" §( "{" name "}" )
endenv = "\end" §( "{" ::name "}" )
name = /\w+/~
comand = cmdname [ config ] block
cmdname = /\\\w+/
config = "[" cfgtext §"]"
sequence = { partext | parblock }
parblock = "{" { partext | parblock } §"}"
block = "{" { text | block } §"}"
partext = text | PARSEP
text = cfgtext | brackets
cfgtext = chunk | escaped | WSPC
ESCAPED = /\\[%$&]/
BRACKET = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]*\n?(?!\s*\n)[ \t]*/ # whitespace, including at most one linefeed
LF = /[ \t]*\n(?!\s*\n)/ # a linefeed, but not an empty line (i.e. par)
PARSEP = /\s*\n\s*\n/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
[match:text]
1 : Some plain text
[fail:text]
1 : Low-level text must not contain \& escaped characters.
2 : Low-level text must not contain ] [ brackets.
3 : Low-level text must not contain { environments }.
4 : Low-level text must not contain any \commands.
[match:text_element]
1 : \command
2 : \textbackslash
3 : \footnote{footnote}
4 : [
5 : \begin{generic} unknown inline environment \end{generic}
6 : \begin{small} known inline environment \end{small}
7: {\em block}
......@@ -26,6 +26,11 @@
8 : Unknwon \xy commands within paragraphs may be simple
or \xy{complex}.
9 : paragraphs may contain all of these: \{ escaped \} characters,
{\bf blocks}, [ brackets ], \begin{tiny} environments \end{tiny}
and \textbackslash text-commands or other commands like this
\footnote{footnote}
[fail:paragraph]
1 : \begin{enumerate}
......
......@@ -34,20 +34,27 @@
[match:inline_environment]
1 : """\begin{generic}inline environment\end{generic}
"""
1 : """\begin{generic}inline environment\end{generic}"""
2 : """\begin{generic}inline environment
\end{generic}
"""
\end{generic}"""
3 : "$ inline math $"
[fail:inline_environment]
3 : """\begin{generic}
invalid enivronment \end{generic}
"""
[match:paragraph]
1 : """\begin{generic}inline environment\end{generic}
"""
2 : """\begin{generic}inline environment
\end{generic}
"""
[match:itemize]
1 : \begin{itemize}
\item Items doe not need to be
......@@ -133,4 +140,3 @@
\end{itemize}
as a separate paragraph
\end{enumerate}
[match:tabular]
1 : \begin{tabular}{c|c|}
& $S_1$ \\ \cline{1-2}
$A_1$ & $r_1$ \\ \cline{1-2}
$A_2$ & $r_2$ \\ \cline{1-2}
\end{tabular}}
2 : \begin{tabular}{c|c|c|c|cc|c|c|c|}
\multicolumn{1}{c}{} & \multicolumn{3}{c}{Tabelle 1:} &
\multicolumn{2}{c}{} & \multicolumn{3}{c}{Tabelle 2:}
\\
\cline{2-4} \cline{7-9}
$A_1$ & 7 & 0 & 4 & & $A_1$ & 5 & 20 & 6 \\
\cline{2-4} \cline{7-9}
$A_2$ & 5 & 21 & 11 & & $A_2$ & -3 & 8 & 10 \\
\cline{2-4} \cline{7-9}
$A_3$ & 10 & -5 & -1 & & $A_3$ & 4 & 5 & 9 \\
\cline{2-4} \cline{7-9}
\end{tabular}
[match:figure]
1 : \begin{figure}
\doublespacing
\begin{center}
\begin{tabular}{l|c|c|c|}
\multicolumn{1}{c}{ } & \multicolumn{1}{c}{ } & \multicolumn{2}{c}{$\overbrace{\hspace{7cm}}^{Experiments}$} \\ \cline{2-4}
& {\bf computer simulation} & {\bf analog simulation} & {\bf plain experiment} \\ \hline
materiality of object
& semantic & \multicolumn{2}{c|}{material} \\ \hline
relation to target
& \multicolumn{2}{c|}{representation} & representative \\ \hline
\multicolumn{1}{c}{ } & \multicolumn{2}{c}{$\underbrace{\hspace{7cm}}_{Simulations}$} & \multicolumn{1}{c}{ } \\
\end{tabular}
\end{center}
\caption{Conceptual relation of simulations and experiments}\label{SimulationExperimentsScheme}
\end{figure}
[match:text]
1: Some plain text
2: Text containing [ brackets ] is distinguished from cfgtext
3: Text can also "escaped" characters like \&, \% or \_ .
4: Text can contain both brackets [] and "esacped" characters \& .
......@@ -35,7 +35,7 @@ if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=True): # recompiles G
from DHParser import toolkit
from LaTeXCompiler import get_grammar, get_transformer
with toolkit.logging(False):
with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar,
get_transformer, report=True, verbose=True)
if error_report:
......
......@@ -24,7 +24,7 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.syntaxtree import TOKEN_PTYPE, mock_syntax_tree, oneliner_sxpr
from DHParser.syntaxtree import TOKEN_PTYPE, mock_syntax_tree, flatten_sxpr
from DHParser.transform import traverse, remove_expendables, \
replace_by_single_child, reduce_single_child, flatten
from DHParser.dsl import grammar_provider
......@@ -118,25 +118,25 @@ class TestSExpr:
Tests for S-expression handling.
"""
def test_compact_sexpr(self):
assert oneliner_sxpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
assert flatten_sxpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
def test_mock_syntax_tree(self):
sexpr = '(a (b c) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
assert oneliner_sxpr(tree.as_sxpr().replace('"', '')) == sexpr
assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr
# test different quotation marks
sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
assert oneliner_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped
assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped
sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
tree = mock_syntax_tree(sexpr_clean)
assert oneliner_sxpr(tree.as_sxpr()) == sexpr_clean
assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean
tree = mock_syntax_tree(sexpr_stripped)
assert oneliner_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
def test_mock_syntax_tree_with_classes(self):
sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment