2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d434f8ae authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- LaText test_blockenv.ini: bugfix

parent 9b9293f1
...@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block ...@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation known_environment = itemize | enumerate | figure | table | quotation
| verbatim | verbatim
generic_block = begin_generic_block sequence §end_generic_block generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment (EOF | -&LB) begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment (EOF | -&LB) end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}" itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}" enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...@@ -104,12 +104,14 @@ footnote = "\footnote" block_of_paragraphs ...@@ -104,12 +104,14 @@ footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" config block includegraphics = "\includegraphics" config block
caption = "\caption" block caption = "\caption" block
####################################################################### #######################################################################
# #
# low-level text and character sequences # low-level text and character sequences
# #
####################################################################### #######################################################################
config = "[" cfgtext §"]" config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/ block = /{/ { text_elements } §/}/
...@@ -133,6 +135,7 @@ structural = "subsection" | "section" | "chapter" | "subsubsection" ...@@ -133,6 +135,7 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
# #
####################################################################### #######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~ CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~ NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~ MATH = /[\w_^{}[\]]*/~
...@@ -147,4 +150,5 @@ PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e. ...@@ -147,4 +150,5 @@ PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed # [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/ EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
\ No newline at end of file
...@@ -108,8 +108,8 @@ class LaTeXGrammar(Grammar): ...@@ -108,8 +108,8 @@ class LaTeXGrammar(Grammar):
known_environment = itemize | enumerate | figure | table | quotation known_environment = itemize | enumerate | figure | table | quotation
| verbatim | verbatim
generic_block = begin_generic_block sequence §end_generic_block generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment (EOF | -&LB) begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment (EOF | -&LB) end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}" itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}" enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...@@ -155,12 +155,14 @@ class LaTeXGrammar(Grammar): ...@@ -155,12 +155,14 @@ class LaTeXGrammar(Grammar):
includegraphics = "\includegraphics" config block includegraphics = "\includegraphics" config block
caption = "\caption" block caption = "\caption" block
####################################################################### #######################################################################
# #
# low-level text and character sequences # low-level text and character sequences
# #
####################################################################### #######################################################################
config = "[" cfgtext §"]" config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/ block = /{/ { text_elements } §/}/
...@@ -184,6 +186,7 @@ class LaTeXGrammar(Grammar): ...@@ -184,6 +186,7 @@ class LaTeXGrammar(Grammar):
# #
####################################################################### #######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~ CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~ NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~ MATH = /[\w_^{}[\]]*/~
...@@ -198,20 +201,21 @@ class LaTeXGrammar(Grammar): ...@@ -198,20 +201,21 @@ class LaTeXGrammar(Grammar):
# [whitespace] linefeed [whitespace] linefeed # [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/ EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
""" """
begin_generic_block = Forward() begin_generic_block = Forward()
block_environment = Forward() block_environment = Forward()
block_of_paragraphs = Forward() block_of_paragraphs = Forward()
end_generic_block = Forward() end_generic_block = Forward()
text_elements = Forward() text_elements = Forward()
source_hash__ = "f941997b8aca0a8aa2d2f38cb52818eb" source_hash__ = "7f03d711d094ceb016614cec9e954fe3"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)' COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)') WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
LB = RegExp('\\s*?\\n|\\s*?$') LB = RegExp('\\s*?\\n|$')
EOF = RegExp('(?!.)') EOF = RegExp('(?!.)')
PARSEP = RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n[ \\t]*') PARSEP = RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n[ \\t]*')
LF = Series(NegativeLookahead(PARSEP), RegExp('[ \\t]*\\n[ \\t]*')) LF = Series(NegativeLookahead(PARSEP), RegExp('[ \\t]*\\n[ \\t]*'))
...@@ -256,8 +260,8 @@ class LaTeXGrammar(Grammar): ...@@ -256,8 +260,8 @@ class LaTeXGrammar(Grammar):
item = Series(Token("\\item"), Optional(PARSEP), sequence) item = Series(Token("\\item"), Optional(PARSEP), sequence)
enumerate = Series(Token("\\begin{enumerate}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{enumerate}"))) enumerate = Series(Token("\\begin{enumerate}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{itemize}"))) itemize = Series(Token("\\begin{itemize}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{itemize}")))
end_generic_block.set(Series(Lookbehind(LB), end_environment, Alternative(EOF, Lookbehind(LB)))) end_generic_block.set(Series(Lookbehind(LB), end_environment, Lookbehind(LB)))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, Alternative(EOF, Lookbehind(LB)))) begin_generic_block.set(Series(Lookbehind(LB), begin_environment, Lookbehind(LB)))
generic_block = Series(begin_generic_block, sequence, Required(end_generic_block)) generic_block = Series(begin_generic_block, sequence, Required(end_generic_block))
known_environment = Alternative(itemize, enumerate, figure, table, quotation, verbatim) known_environment = Alternative(itemize, enumerate, figure, table, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block)) block_environment.set(Alternative(known_environment, generic_block))
......
[match:block_environment] [match:block_environment]
1 : \begin{generic} 1 : """\begin{generic}
A generic block element is a block element A generic block element is a block element
that is unknown to DHParser. that is unknown to DHParser.
...@@ -7,12 +7,14 @@ ...@@ -7,12 +7,14 @@
considered as block elements and not considered as block elements and not
as inline elements. as inline elements.
\end{generic} \end{generic}
"""
2 : \begin{generic} 2 : """\begin{generic}
a single block paragraph a single block paragraph
\end{generic} \end{generic} % ending with
% a comment
"""
3 : \begin{quote} 3 : \begin{quote}
a known block element a known block element
\end{quote} \end{quote}
#!/usr/bin/python
"""This testcase exposes a performance bug in the `re`-module of
the python standard-library:
>>> import re, timeit
>>> rx = re.compile('(\\s*(#.*)?\\s*)*X')
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
24.814577618999465
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
291.2432912450022
Please note the number of repetitions: number=1 !!!
"""
import timeit
try:
import regex
rx = regex.compile('(\\s*(#.*)?\\s*)*X')
print("The 'new' regex module:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
except ImportError:
pass
import re
rx = re.compile('(\\s*(#.*)?\\s*)*X')
print("The re module of the Python standard library:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment