Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
8caf886d
Commit
8caf886d
authored
Jul 10, 2017
by
Eckhart Arnold
Browse files
- LaTeX ebnf zusätzlich erweitert
parent
2cb7b778
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/toolkit.py
View file @
8caf886d
...
...
@@ -176,7 +176,7 @@ def repr_call(f, parameter_list) -> str:
def
line_col
(
text
:
str
,
pos
:
int
)
->
Tuple
[
int
,
int
]:
"""Returns the position within a text as (line, column)-tuple.
"""
assert
pos
<
len
(
text
),
str
(
pos
)
+
" >
=
"
+
str
(
len
(
text
))
assert
pos
<
=
len
(
text
),
str
(
pos
)
+
" > "
+
str
(
len
(
text
))
# can point one character after EOF
line
=
text
.
count
(
"
\n
"
,
0
,
pos
)
+
1
column
=
pos
-
text
.
rfind
(
"
\n
"
,
0
,
pos
)
return
line
,
column
...
...
examples/LaTeX/LaTeX.ebnf
View file @
8caf886d
...
...
@@ -53,13 +53,15 @@ Index = "\printindex" [PARSEP]
#### block environments ####
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table | quotation
# TODO: ambiguity between generic bock envieronments and generic inline environments
block_environment = known_environment | generic_environment
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_en
riv
onment = begin_en
riv
onment sequence §end_en
riv
onment
generic_en
vir
onment = begin_en
vir
onment sequence §end_en
vir
onment
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"
\
end{enumerate}"
item = "\item" [PARSEP] sequence
figure = "\begin{figure}" sequence "\end{figure}"
...
...
@@ -73,19 +75,19 @@ table_config = "{" /[lcr|]+/~ "}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_en
riv
onment ) [PARSEP] }+
sequence = { (paragraph | block_en
vir
onment ) [PARSEP] }+
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_en
riv
onment
text_elements = command | text | block | inline_en
vir
onment
#### inline enivronments ####
inline_en
riv
onment = known_inline_env | generic_inline_env
inline_en
vir
onment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_en
riv
onment { text_elements }+ §end_en
riv
onment
begin_en
riv
onment = "\begin{" §NAME §"}"
end_en
riv
onment = "\end{" §::NAME §"}"
generic_inline_env = begin_en
vir
onment { text_elements }+ §end_en
vir
onment
begin_en
vir
onment = "\begin{" §NAME §"}"
end_en
vir
onment = "\end{" §::NAME §"}"
inline_math = "$" MATH "$"
...
...
@@ -113,10 +115,13 @@ text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = /[\\]/ ("begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
| "subsection" | "section" | "chapter" | "subsubsection"
| "paragraph" | "subparagraph" | "item")
blockcmd = /[\\]/ ( ( "begin{" | "end{" )
( "enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
| structural)
structural = "subsection" | "section" | "chapter" | "subsubsection"
| "paragraph" | "subparagraph" | "item"
#######################################################################
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
8caf886d
...
...
@@ -103,13 +103,15 @@ class LaTeXGrammar(Grammar):
#### block environments ####
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table | quotation
# TODO: ambiguity between generic bock envieronments and generic inline environments
block_environment = known_environment | generic_environment
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_en
riv
onment = begin_en
riv
onment sequence §end_en
riv
onment
generic_en
vir
onment = begin_en
vir
onment sequence §end_en
vir
onment
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"
\
end{enumerate}"
item = "\item" [PARSEP] sequence
figure = "\begin{figure}" sequence "\end{figure}"
...
...
@@ -123,19 +125,19 @@ class LaTeXGrammar(Grammar):
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_en
riv
onment ) [PARSEP] }+
sequence = { (paragraph | block_en
vir
onment ) [PARSEP] }+
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_en
riv
onment
text_elements = command | text | block | inline_en
vir
onment
#### inline enivronments ####
inline_en
riv
onment = known_inline_env | generic_inline_env
inline_en
vir
onment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_en
riv
onment { text_elements }+ §end_en
riv
onment
begin_en
riv
onment = "\begin{" §NAME §"}"
end_en
riv
onment = "\end{" §::NAME §"}"
generic_inline_env = begin_en
vir
onment { text_elements }+ §end_en
vir
onment
begin_en
vir
onment = "\begin{" §NAME §"}"
end_en
vir
onment = "\end{" §::NAME §"}"
inline_math = "$" MATH "$"
...
...
@@ -163,10 +165,13 @@ class LaTeXGrammar(Grammar):
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = /[\\]/ ("begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
| "subsection" | "section" | "chapter" | "subsubsection"
| "paragraph" | "subparagraph" | "item")
blockcmd = /[\\]/ ( ( "begin{" | "end{" )
( "enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
| structural)
structural = "subsection" | "section" | "chapter" | "subsubsection"
| "paragraph" | "subparagraph" | "item"
#######################################################################
...
...
@@ -189,10 +194,10 @@ class LaTeXGrammar(Grammar):
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
"""
block_en
riv
onment
=
Forward
()
block_en
vir
onment
=
Forward
()
block_of_paragraphs
=
Forward
()
text_elements
=
Forward
()
source_hash__
=
"9
f1579db1994211dc53dd4a8f317bfb6
"
source_hash__
=
"9
a8cba2b425d276af78e141d7dda162c
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
...
...
@@ -208,7 +213,13 @@ class LaTeXGrammar(Grammar):
MATH
=
RE
(
'[
\\
w_^{}[
\\
]]*'
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
CMDNAME
=
RE
(
'
\\\\
(?:(?!_)
\\
w)+'
)
blockcmd
=
Series
(
RE
(
'[
\\\\
]'
,
wR
=
''
),
Alternative
(
Series
(
Token
(
"begin{"
),
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"figure"
),
Token
(
"quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)),
Token
(
"}"
)),
Token
(
"subsection"
),
Token
(
"section"
),
Token
(
"chapter"
),
Token
(
"subsubsection"
),
Token
(
"paragraph"
),
Token
(
"subparagraph"
),
Token
(
"item"
)))
structural
=
Alternative
(
Token
(
"subsection"
),
Token
(
"section"
),
Token
(
"chapter"
),
Token
(
"subsubsection"
),
Token
(
"paragraph"
),
Token
(
"subparagraph"
),
Token
(
"item"
))
blockcmd
=
Series
(
RE
(
'[
\\\\
]'
,
wR
=
''
),
Alternative
(
Series
(
Alternative
(
Token
(
"begin{"
),
Token
(
"end{"
)),
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"figure"
),
Token
(
"quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)),
Token
(
"}"
)),
structural
))
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
...
...
@@ -221,14 +232,14 @@ class LaTeXGrammar(Grammar):
known_command
=
Alternative
(
footnote
,
includegraphics
,
caption
)
command
=
Alternative
(
known_command
,
generic_command
)
inline_math
=
Series
(
Token
(
"$"
),
MATH
,
Token
(
"$"
))
end_en
riv
onment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_en
riv
onment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
generic_inline_env
=
Series
(
begin_en
riv
onment
,
OneOrMore
(
text_elements
),
Required
(
end_en
riv
onment
))
end_en
vir
onment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_en
vir
onment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
generic_inline_env
=
Series
(
begin_en
vir
onment
,
OneOrMore
(
text_elements
),
Required
(
end_en
vir
onment
))
known_inline_env
=
Synonym
(
inline_math
)
inline_en
riv
onment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_elements
.
set
(
Alternative
(
command
,
text
,
block
,
inline_en
riv
onment
))
inline_en
vir
onment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_elements
.
set
(
Alternative
(
command
,
text
,
block
,
inline_en
vir
onment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_elements
,
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_en
riv
onment
),
Optional
(
PARSEP
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_en
vir
onment
),
Optional
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
RE
(
'{'
,
wR
=
''
),
sequence
,
Required
(
RE
(
'}'
,
wR
=
''
))))
table_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Token
(
"}"
))
table
=
Series
(
Token
(
"
\\
begin{tabular}"
),
table_config
,
sequence
,
Token
(
"
\\
end{tabular}"
))
...
...
@@ -236,11 +247,12 @@ class LaTeXGrammar(Grammar):
quotation
=
Alternative
(
Series
(
Token
(
"
\\
begin{quotation}"
),
sequence
,
Token
(
"
\\
end{quotation}"
)),
Series
(
Token
(
"
\\
begin{quote}"
),
sequence
,
Token
(
"
\\
end{quote}"
)))
figure
=
Series
(
Token
(
"
\\
begin{figure}"
),
sequence
,
Token
(
"
\\
end{figure}"
))
item
=
Series
(
Token
(
"
\\
item"
),
Optional
(
PARSEP
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"end{enumerate}"
)))
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
generic_en
riv
onment
=
Series
(
begin_en
riv
onment
,
sequence
,
Required
(
end_en
riv
onment
))
known_en
riv
onment
=
Alternative
(
itemize
,
enumerate
,
figure
,
table
,
quotation
,
verbatim
)
block_en
riv
onment
.
set
(
Alternative
(
known_en
riv
onment
,
generic_en
riv
onment
))
generic_en
vir
onment
=
Series
(
begin_en
vir
onment
,
sequence
,
Required
(
end_en
vir
onment
))
known_en
vir
onment
=
Alternative
(
itemize
,
enumerate
,
figure
,
table
,
quotation
,
verbatim
)
block_en
vir
onment
.
set
(
Alternative
(
known_en
vir
onment
,
generic_en
vir
onment
))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Optional
(
PARSEP
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Optional
(
PARSEP
))
SubParagraph
=
Series
(
Token
(
"
\\
subparagpaph"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
sequence
))
...
...
examples/LaTeX/grammar_tests/test_blockenv.ini
0 → 100644
View file @
8caf886d
[match:block_environment]
1
:
\begin{generic}
A
generic
block
element
is
a
block
element
that
is
unknown
to
DHParser.LaTeX.
Unknown
begin-end-structures
are
always
considered
as
block
elements
and
not
as
inline
elements.
\end{generic}
2
:
\begin{generic}
a
single
block
paragraph
\end{generic}
3
:
\begin{quote}
a
known
block
element
\end{quote}
examples/LaTeX/grammar_tests/test_paragraph.ini
View file @
8caf886d
...
...
@@ -20,6 +20,11 @@
1
:
\begin{enumerate}
2
:
\item
3
:
und
Vieh
; \paragraph
4
:
Paragraphs
will
end
\begin{quotation}
at
block
environments
\end{quotation}
like
block
quotes.
[match:sequence]
...
...
@@ -33,3 +38,9 @@
The
parser
should
accept
this,
too.
3
:
Sequences
of
paragraphs
may
\begin{quotation}
include
block
environments
\end{quotation}
like
block
quotes.
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment