Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
d434f8ae
Commit
d434f8ae
authored
Aug 02, 2017
by
Eckhart Arnold
Browse files
- LaText test_blockenv.ini: bugfix
parent
9b9293f1
Changes
4
Hide whitespace changes
Inline
Side-by-side
examples/LaTeX/LaTeX.ebnf
View file @
d434f8ae
...
...
@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment
(EOF |
-&LB
)
end_generic_block = -&LB end_environment
(EOF |
-&LB
)
begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -104,12 +104,14 @@ footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" config block
caption = "\caption" block
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/
...
...
@@ -133,6 +135,7 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
...
...
@@ -147,4 +150,5 @@ PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
\ No newline at end of file
examples/LaTeX/LaTeXCompiler.py
View file @
d434f8ae
...
...
@@ -108,8 +108,8 @@ class LaTeXGrammar(Grammar):
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment
(EOF |
-&LB
)
end_generic_block = -&LB end_environment
(EOF |
-&LB
)
begin_generic_block = -&LB begin_environment -&LB
end_generic_block = -&LB end_environment -&LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -155,12 +155,14 @@ class LaTeXGrammar(Grammar):
includegraphics = "\includegraphics" config block
caption = "\caption" block
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_elements } §/}/
...
...
@@ -184,6 +186,7 @@ class LaTeXGrammar(Grammar):
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
...
...
@@ -198,20 +201,21 @@ class LaTeXGrammar(Grammar):
# [whitespace] linefeed [whitespace] linefeed
EOF = /(?!.)/
LB = /\s*?\n|\s*?$/ # backwards line break for Lookbehind-Operator
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
"""
begin_generic_block
=
Forward
()
block_environment
=
Forward
()
block_of_paragraphs
=
Forward
()
end_generic_block
=
Forward
()
text_elements
=
Forward
()
source_hash__
=
"
f941997b8aca0a8aa2d2f38cb52818eb
"
source_hash__
=
"
7f03d711d094ceb016614cec9e954fe3
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
wspL__
=
''
wspR__
=
WSP__
LB
=
RegExp
(
'
\\
s*?
\\
n|
\\
s*?
$'
)
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
EOF
=
RegExp
(
'(?!.)'
)
PARSEP
=
RegExp
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n[
\\
t]*'
)
LF
=
Series
(
NegativeLookahead
(
PARSEP
),
RegExp
(
'[
\\
t]*
\\
n[
\\
t]*'
))
...
...
@@ -256,8 +260,8 @@ class LaTeXGrammar(Grammar):
item
=
Series
(
Token
(
"
\\
item"
),
Optional
(
PARSEP
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
end_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
end_environment
,
Alternative
(
EOF
,
Lookbehind
(
LB
)))
)
begin_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
begin_environment
,
Alternative
(
EOF
,
Lookbehind
(
LB
)))
)
end_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
end_environment
,
Lookbehind
(
LB
)))
begin_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
begin_environment
,
Lookbehind
(
LB
)))
generic_block
=
Series
(
begin_generic_block
,
sequence
,
Required
(
end_generic_block
))
known_environment
=
Alternative
(
itemize
,
enumerate
,
figure
,
table
,
quotation
,
verbatim
)
block_environment
.
set
(
Alternative
(
known_environment
,
generic_block
))
...
...
examples/LaTeX/grammar_tests/test_blockenv.ini
View file @
d434f8ae
[match:block_environment]
1
:
\begin{generic}
1
:
"""
\begin{generic}
A
generic
block
element
is
a
block
element
that
is
unknown
to
DHParser.
...
...
@@ -7,12 +7,14 @@
considered
as
block
elements
and
not
as
inline
elements.
\end{generic}
"""
2
:
\begin{generic}
2
:
"""
\begin{generic}
a
single
block
paragraph
\end{generic}
\end{generic}
%
ending
with
%
a
comment
"""
3
:
\begin{quote}
a
known
block
element
\end{quote}
test/re_slow_testcase.py
0 → 100755
View file @
d434f8ae
#!/usr/bin/python
"""This testcase exposes a performance bug in the `re`-module of
the python standard-library:
>>> import re, timeit
>>> rx = re.compile('(
\\
s*(#.*)?
\\
s*)*X')
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
24.814577618999465
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
291.2432912450022
Please note the number of repetitions: number=1 !!!
"""
import
timeit
try
:
import
regex
rx
=
regex
.
compile
(
'(
\\
s*(#.*)?
\\
s*)*X'
)
print
(
"The 'new' regex module:"
)
print
(
timeit
.
timeit
(
"rx.match(' # ')"
,
number
=
1
,
globals
=
globals
()))
except
ImportError
:
pass
import
re
rx
=
re
.
compile
(
'(
\\
s*(#.*)?
\\
s*)*X'
)
print
(
"The re module of the Python standard library:"
)
print
(
timeit
.
timeit
(
"rx.match(' # ')"
,
number
=
1
,
globals
=
globals
()))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment