11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d434f8ae authored by Eckhart Arnold's avatar Eckhart Arnold

- LaText test_blockenv.ini: bugfix

parent 9b9293f1
......@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment (EOF | -&LB)
end_generic_block = -&LB end_environment (EOF | -&LB)
begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -104,12 +104,14 @@ footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" config block
caption = "\caption" block
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/
......@@ -133,6 +135,7 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
......@@ -147,4 +150,5 @@ PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
\ No newline at end of file
......@@ -108,8 +108,8 @@ class LaTeXGrammar(Grammar):
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment (EOF | -&LB)
end_generic_block = -&LB end_environment (EOF | -&LB)
begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -155,12 +155,14 @@ class LaTeXGrammar(Grammar):
includegraphics = "\includegraphics" config block
caption = "\caption" block
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/
......@@ -184,6 +186,7 @@ class LaTeXGrammar(Grammar):
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
......@@ -198,20 +201,21 @@ class LaTeXGrammar(Grammar):
# [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
"""
begin_generic_block = Forward()
block_environment = Forward()
block_of_paragraphs = Forward()
end_generic_block = Forward()
text_elements = Forward()
source_hash__ = "f941997b8aca0a8aa2d2f38cb52818eb"
source_hash__ = "7f03d711d094ceb016614cec9e954fe3"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
wspL__ = ''
wspR__ = WSP__
LB = RegExp('\\s*?\\n|\\s*?$')
LB = RegExp('\\s*?\\n|$')
EOF = RegExp('(?!.)')
PARSEP = RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n[ \\t]*')
LF = Series(NegativeLookahead(PARSEP), RegExp('[ \\t]*\\n[ \\t]*'))
......@@ -256,8 +260,8 @@ class LaTeXGrammar(Grammar):
item = Series(Token("\\item"), Optional(PARSEP), sequence)
enumerate = Series(Token("\\begin{enumerate}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{itemize}")))
end_generic_block.set(Series(Lookbehind(LB), end_environment, Alternative(EOF, Lookbehind(LB))))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, Alternative(EOF, Lookbehind(LB))))
end_generic_block.set(Series(Lookbehind(LB), end_environment, Lookbehind(LB)))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, Lookbehind(LB)))
generic_block = Series(begin_generic_block, sequence, Required(end_generic_block))
known_environment = Alternative(itemize, enumerate, figure, table, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block))
......
[match:block_environment]
1 : \begin{generic}
1 : """\begin{generic}
A generic block element is a block element
that is unknown to DHParser.
......@@ -7,12 +7,14 @@
considered as block elements and not
as inline elements.
\end{generic}
"""
2 : \begin{generic}
2 : """\begin{generic}
a single block paragraph
\end{generic}
\end{generic} % ending with
% a comment
"""
3 : \begin{quote}
a known block element
\end{quote}
#!/usr/bin/python
"""This testcase exposes a performance bug in the `re`-module of
the python standard-library:
>>> import re, timeit
>>> rx = re.compile('(\\s*(#.*)?\\s*)*X')
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
24.814577618999465
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
291.2432912450022
Please note the number of repetitions: number=1 !!!
"""
import timeit
try:
import regex
rx = regex.compile('(\\s*(#.*)?\\s*)*X')
print("The 'new' regex module:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
except ImportError:
pass
import re
rx = re.compile('(\\s*(#.*)?\\s*)*X')
print("The re module of the Python standard library:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment