Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
efb8af4d
Commit
efb8af4d
authored
Jul 09, 2017
by
Eckhart Arnold
Browse files
- LaTeX ebnf zusätzlich erweitert
parent
968f1acc
Changes
2
Hide whitespace changes
Inline
Side-by-side
examples/LaTeX/LaTeX.ebnf
View file @
efb8af4d
...
...
@@ -10,8 +10,8 @@ preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP]
frontpages [PARSEP]
(
c
hapters |
s
ections) [PARSEP]
[
b
ibliography] [
i
ndex] [PARSEP]
(
C
hapters |
S
ections) [PARSEP]
[
B
ibliography] [
I
ndex] [PARSEP]
"\end{document}" [PARSEP] §EOF
frontpages = sequence
...
...
@@ -31,7 +31,7 @@ Section = "\Section" block [PARSEP] { sequence | SubSections }
SubSections = { SubSection [PARSEP] }+
SubSection = "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSub
s
ections = { SubSubSection [PARSEP] }+
SubSub
S
ections = { SubSubSection [PARSEP] }+
SubSubSection = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }
Paragraphs = { Paragraph [PARSEP] }+
...
...
@@ -40,8 +40,8 @@ Paragraph = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [PARSEP] }+
SubParagraph = "\subparagpaph" block [PARSEP] { sequence }
b
ibliography = "\bibliography" block [PARSEP]
i
ndex = "\printindex" [PARSEP
B
ibliography = "\bibliography" block [PARSEP]
I
ndex = "\printindex" [PARSEP
]
#######################################################################
...
...
@@ -54,37 +54,45 @@ index = "\printindex" [PARSEP
#### block environments ####
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table
known_enrivonment = itemize | enumerate | figure | table | quotation
| verbatim
generic_enrivonment = begin_enrivonment sequence §end_enrivonment
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item = "\item" [PARSEP] sequence
figure = "\begin{figure}" sequence "\end{figure}"
quotation = ("\begin{quotation}" sequence "\end{quotation}")
| ("\begin{quote}" sequence "\end{quote}")
verbatim = "\begin{verbatim}" sequence "\end{verbatim}"
table = "\begin{tabular}" table_config sequence "\end{tabular}"
table_config = "{" /[lcr|]+/~ "}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_enrivonment ) [PARSEP] }+
paragraph = { !blockcmd textelements //~ }+
textelemts
= command | text | block | inline_enrivonment
paragraph = { !blockcmd text
_
elements //~ }+
text
_
elem
en
ts = command | text | block | inline_enrivonment
#### inline enivronments ####
inline_enrivonment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_enrivonment { textelements }+ §end_enrivonment
generic_inline_env = begin_enrivonment { text
_
elements }+ §end_enrivonment
begin_enrivonment = "\begin{" §NAME §"}"
end_enrivonment = "\end{" §::NAME §"}"
inline_math = "$"
math_Text
"$"
inline_math = "$"
MATH
"$"
#### commands ####
command = known
d
_command | generic_command
command = known_command | generic_command
known_command = footnote
generic_command = CMDNAME [[ //~ config ] //~ block ]
...
...
@@ -98,15 +106,16 @@ footnote = "\footnote" block_of_paragraphs
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { textelements } §/}/
block = /{/ { text
_
elements } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
| "\paragraph" | "\subparagraph" | "\item"
| "\begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
#######################################################################
...
...
@@ -117,6 +126,7 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
efb8af4d
...
...
@@ -7,24 +7,23 @@
#######################################################################
from
functools
import
partial
import
os
import
sys
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser.toolkit
import
logging
,
is_filename
,
load_if_file
from
DHParser.parsers
import
Grammar
,
Compiler
,
nil_scanner
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Optional
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
RE
,
Capture
,
\
from
DHParser.toolkit
import
logging
,
is_filename
from
DHParser.parsers
import
Grammar
,
Compiler
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Optional
,
OneOrMore
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
last_value
,
counterpart
,
accumulate
,
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_brackets
,
keep_children
,
\
remove_children_if
,
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
remove_expendables
,
remove_tokens
,
flatten
,
is_whitespace
,
is_expendable
,
join
,
\
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
TransformationFunc
,
\
remove_empty
,
replace_parser
,
apply_if
ScannerFunc
from
DHParser.syntaxtree
import
traverse
,
remove_brackets
,
reduce_single_child
,
replace_by_single_child
,
\
remove_expendables
,
flatten
,
join
,
\
collapse
,
replace_content
,
TransformationFunc
,
\
remove_empty
#######################################################################
...
...
@@ -49,42 +48,135 @@ def get_scanner() -> ScannerFunc:
class
LaTeXGrammar
(
Grammar
):
r
"""Parser for a LaTeX source file, with this grammar:
#
latex
Grammar
#
LaTeX-
Grammar
for DHParser
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP]
frontpages [PARSEP]
(Chapters | Sections) [PARSEP]
[Bibliography] [Index] [PARSEP]
"\end{document}" [PARSEP] §EOF
frontpages = sequence
#######################################################################
#
# document structure
#
#######################################################################
Chapters = { Chapter [PARSEP] }+
Chapter = "\Chapter" block [PARSEP] { sequence | Sections }
Sections = { Section [PARSEP] }+
Section = "\Section" block [PARSEP] { sequence | SubSections }
SubSections = { SubSection [PARSEP] }+
SubSection = "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSubSections = { SubSubSection [PARSEP] }+
SubSubSection = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }
Paragraphs = { Paragraph [PARSEP] }+
Paragraph = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [PARSEP] }+
SubParagraph = "\subparagpaph" block [PARSEP] { sequence }
Bibliography = "\bibliography" block [PARSEP]
Index = "\printindex" [PARSEP]
#######################################################################
#
# document content
#
#######################################################################
#### block environments ####
blockenv = beginenv sequence §endenv
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table | quotation
| verbatim
generic_enrivonment = begin_enrivonment sequence §end_enrivonment
parblock = "{" sequence §"}"
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item = "\item" [PARSEP] sequence
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) //~ }+
figure = "\begin{figure}" sequence "\end{figure}"
quotation = ("\begin{quotation}" sequence "\end{quotation}")
| ("\begin{quote}" sequence "\end{quote}")
verbatim = "\begin{verbatim}" sequence "\end{verbatim}"
table = "\begin{tabular}" table_config sequence "\end{tabular}"
table_config = "{" /[lcr|]+/~ "}"
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ //~ config ] //~ block ]
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_enrivonment ) [PARSEP] }+
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_enrivonment
#### inline enivronments ####
inline_enrivonment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_enrivonment { text_elements }+ §end_enrivonment
begin_enrivonment = "\begin{" §NAME §"}"
end_enrivonment = "\end{" §::NAME §"}"
inline_math = "$" MATH "$"
#### commands ####
command = known_command | generic_command
known_command = footnote
generic_command = CMDNAME [[ //~ config ] //~ block ]
footnote = "\footnote" block_of_paragraphs
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ {
command | text | block
} §/}/
block = /{/ {
text_elements
} §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
| "\paragraph" | "\subparagraph" | "\item"
| "\begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
#######################################################################
#
# Primitives
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
...
...
@@ -94,12 +186,12 @@ class LaTeXGrammar(Grammar):
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
"""
block
=
Forward
()
command
=
Forward
()
source_hash__
=
"936e76e84dd027b0af532abfad617d15"
block_enrivonment
=
Forward
()
block_of_paragraphs
=
Forward
()
text_elements
=
Forward
()
source_hash__
=
"484ed98c05f7142c72f06d7c31e61089"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
...
...
@@ -112,23 +204,76 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK
=
RE
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
,
wR
=
''
)
BRACKETS
=
RE
(
'[
\\
[
\\
]]'
,
wR
=
''
)
ESCAPED
=
RE
(
'
\\\\
[%$&_/]'
,
wR
=
''
)
MATH
=
RE
(
'[
\\
w_^{}[
\\
]]*'
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
CMDNAME
=
RE
(
'
\\\\
(?:(?!_)
\\
w)+'
)
blockcmd
=
Alternative
(
Token
(
"
\\
subsection"
),
Token
(
"
\\
section"
),
Token
(
"
\\
chapter"
),
Token
(
"
\\
subsubsection"
),
Token
(
"
\\
paragraph"
),
Token
(
"
\\
subparagraph"
),
Token
(
"
\\
begin{enumerate}"
),
Token
(
"
\\
begin{itemize}"
),
Token
(
"
\\
item"
),
Token
(
"
\\
begin{figure}"
))
blockcmd
=
Alternative
(
Token
(
"
\\
subsection"
),
Token
(
"
\\
section"
),
Token
(
"
\\
chapter"
),
Token
(
"
\\
subsubsection"
),
Token
(
"
\\
paragraph"
),
Token
(
"
\\
subparagraph"
),
Token
(
"
\\
item"
),
Series
(
Token
(
"
\\
begin{"
),
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"figure"
),
Token
(
"quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)),
Token
(
"}"
)))
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
block
.
set
(
Series
(
RE
(
'{'
,
wR
=
''
),
ZeroOrMore
(
Alternative
(
command
,
text
,
block
)
),
Required
(
RE
(
'}'
,
wR
=
''
)))
)
block
=
Series
(
RE
(
'{'
,
wR
=
''
),
ZeroOrMore
(
text_elements
),
Required
(
RE
(
'}'
,
wR
=
''
)))
config
=
Series
(
Token
(
"["
),
cfgtext
,
Required
(
Token
(
"]"
)))
command
.
set
(
Series
(
CMDNAME
,
Optional
(
Series
(
Optional
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
))))
endenv
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
beginenv
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
inlineenv
=
Series
(
beginenv
,
OneOrMore
(
Alternative
(
command
,
block
,
text
)),
endenv
)
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
Alternative
(
command
,
block
,
text
),
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
paragraph
,
Optional
(
PARSEP
)))
parblock
=
Series
(
Token
(
"{"
),
sequence
,
Required
(
Token
(
"}"
)))
blockenv
=
Series
(
beginenv
,
sequence
,
Required
(
endenv
))
document
=
Series
(
Optional
(
PARSEP
),
ZeroOrMore
(
Series
(
Optional
(
PARSEP
),
paragraph
)),
Required
(
EOF
))
footnote
=
Series
(
Token
(
"
\\
footnote"
),
block_of_paragraphs
)
generic_command
=
Series
(
CMDNAME
,
Optional
(
Series
(
Optional
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
)))
known_command
=
Synonym
(
footnote
)
command
=
Alternative
(
known_command
,
generic_command
)
inline_math
=
Series
(
Token
(
"$"
),
MATH
,
Token
(
"$"
))
end_enrivonment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_enrivonment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
generic_inline_env
=
Series
(
begin_enrivonment
,
OneOrMore
(
text_elements
),
Required
(
end_enrivonment
))
known_inline_env
=
Synonym
(
inline_math
)
inline_enrivonment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_elements
.
set
(
Alternative
(
command
,
text
,
block
,
inline_enrivonment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_elements
,
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_enrivonment
),
Optional
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
RE
(
'{'
,
wR
=
''
),
sequence
,
Required
(
RE
(
'}'
,
wR
=
''
))))
table_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Token
(
"}"
))
table
=
Series
(
Token
(
"
\\
begin{tabular}"
),
table_config
,
sequence
,
Token
(
"
\\
end{tabular}"
))
verbatim
=
Series
(
Token
(
"
\\
begin{verbatim}"
),
sequence
,
Token
(
"
\\
end{verbatim}"
))
quotation
=
Alternative
(
Series
(
Token
(
"
\\
begin{quotation}"
),
sequence
,
Token
(
"
\\
end{quotation}"
)),
Series
(
Token
(
"
\\
begin{quote}"
),
sequence
,
Token
(
"
\\
end{quote}"
)))
figure
=
Series
(
Token
(
"
\\
begin{figure}"
),
sequence
,
Token
(
"
\\
end{figure}"
))
item
=
Series
(
Token
(
"
\\
item"
),
Optional
(
PARSEP
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
generic_enrivonment
=
Series
(
begin_enrivonment
,
sequence
,
Required
(
end_enrivonment
))
known_enrivonment
=
Alternative
(
itemize
,
enumerate
,
figure
,
table
,
quotation
,
verbatim
)
block_enrivonment
.
set
(
Alternative
(
known_enrivonment
,
generic_enrivonment
))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Optional
(
PARSEP
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Optional
(
PARSEP
))
SubParagraph
=
Series
(
Token
(
"
\\
subparagpaph"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
sequence
))
SubParagraphs
=
OneOrMore
(
Series
(
SubParagraph
,
Optional
(
PARSEP
)))
Paragraph
=
Series
(
Token
(
"
\\
paragraph"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
Alternative
(
sequence
,
SubParagraphs
)))
Paragraphs
=
OneOrMore
(
Series
(
Paragraph
,
Optional
(
PARSEP
)))
SubSubSection
=
Series
(
Token
(
"
\\
SubSubSection"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
Alternative
(
sequence
,
Paragraphs
)))
SubSubSections
=
OneOrMore
(
Series
(
SubSubSection
,
Optional
(
PARSEP
)))
SubSection
=
Series
(
Token
(
"
\\
SubSection"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSubSections
)))
SubSections
=
OneOrMore
(
Series
(
SubSection
,
Optional
(
PARSEP
)))
Section
=
Series
(
Token
(
"
\\
Section"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSections
)))
Sections
=
OneOrMore
(
Series
(
Section
,
Optional
(
PARSEP
)))
Chapter
=
Series
(
Token
(
"
\\
Chapter"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
Alternative
(
sequence
,
Sections
)))
Chapters
=
OneOrMore
(
Series
(
Chapter
,
Optional
(
PARSEP
)))
frontpages
=
Synonym
(
sequence
)
document
=
Series
(
Optional
(
PARSEP
),
Token
(
"
\\
begin{document}"
),
Optional
(
PARSEP
),
frontpages
,
Optional
(
PARSEP
),
Alternative
(
Chapters
,
Sections
),
Optional
(
PARSEP
),
Optional
(
Bibliography
),
Optional
(
Index
),
Optional
(
PARSEP
),
Token
(
"
\\
end{document}"
),
Optional
(
PARSEP
),
Required
(
EOF
))
preamble
=
OneOrMore
(
command
)
latexdoc
=
Series
(
preamble
,
document
)
root__
=
latexdoc
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment