LaTeX.ebnf 5.31 KB
Newer Older
Eckhart Arnold's avatar
Eckhart Arnold committed
1
# LaTeX-Grammar for DHParser
Eckhart Arnold's avatar
Eckhart Arnold committed
2

3
@ testing    = True
4
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
5
@ comment    = /%.*(?:\n|$)/
Eckhart Arnold's avatar
Eckhart Arnold committed
6

7

Eckhart Arnold's avatar
Eckhart Arnold committed
8
9
10
11
12
latexdoc       = preamble document
preamble       = { command }+

document       = [PARSEP] "\begin{document}" [PARSEP]
                 frontpages [PARSEP]
13
14
                 (Chapters | Sections) [PARSEP]
                 [Bibliography] [Index] [PARSEP]
Eckhart Arnold's avatar
Eckhart Arnold committed
15
16
17
                 "\end{document}" [PARSEP] §EOF
frontpages     = sequence

Eckhart Arnold's avatar
Eckhart Arnold committed
18
19
20
21
22
23
24

#######################################################################
#
# document structure
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
25
26
Chapters       = { Chapter [PARSEP] }+
Chapter        = "\Chapter" block [PARSEP] { sequence | Sections }
Eckhart Arnold's avatar
Eckhart Arnold committed
27

Eckhart Arnold's avatar
Eckhart Arnold committed
28
29
Sections       = { Section [PARSEP] }+
Section        = "\Section" block [PARSEP] { sequence | SubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
30

Eckhart Arnold's avatar
Eckhart Arnold committed
31
32
SubSections    = { SubSection [PARSEP] }+
SubSection     = "\SubSection" block [PARSEP] { sequence | SubSubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
33

34
SubSubSections = { SubSubSection [PARSEP] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
35
36
SubSubSection  = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }

Eckhart Arnold's avatar
Eckhart Arnold committed
37
38
Paragraphs     = { Paragraph [PARSEP] }+
Paragraph      = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
39

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
SubParagraphs  = { SubParagraph [PARSEP] }+
SubParagraph   = "\subparagpaph" block [PARSEP] { sequence }
Eckhart Arnold's avatar
Eckhart Arnold committed
42

43
44
Bibliography   = "\bibliography" block [PARSEP]
Index          = "\printindex" [PARSEP]
Eckhart Arnold's avatar
Eckhart Arnold committed
45
46
47
48
49
50
51


#######################################################################
#
# document content
#
#######################################################################
52

Eckhart Arnold's avatar
Eckhart Arnold committed
53

Eckhart Arnold's avatar
Eckhart Arnold committed
54
55
#### block environments ####

56
block_environment   = known_environment | generic_block
57
known_environment   = itemize | enumerate | figure | table | quotation
58
                    | verbatim
59
60
61
generic_block       = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment &PRED_LB
end_generic_block   = -&SUCC_LB  end_environment  &PRED_LB
Eckhart Arnold's avatar
Eckhart Arnold committed
62
63

itemize             = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
64
enumerate           = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
Eckhart Arnold's avatar
Eckhart Arnold committed
65
66
item                = "\item" [PARSEP] sequence

67
68
69
70
71
72
73
figure              = "\begin{figure}" sequence "\end{figure}"
quotation           = ("\begin{quotation}" sequence "\end{quotation}")
                    | ("\begin{quote}" sequence "\end{quote}")
verbatim            = "\begin{verbatim}" sequence "\end{verbatim}"
table               = "\begin{tabular}" table_config sequence "\end{tabular}"
table_config        = "{" /[lcr|]+/~ "}"

Eckhart Arnold's avatar
Eckhart Arnold committed
74
75
76
77

#### paragraphs and sequences of paragraphs ####

block_of_paragraphs = /{/ sequence §/}/
78
sequence            = { (paragraph | block_environment ) [PARSEP] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
79

80
paragraph           = { !blockcmd text_elements //~ }+
81
text_elements       = command | text | block | inline_environment
Eckhart Arnold's avatar
Eckhart Arnold committed
82
83
84
85


#### inline enivronments ####

86
inline_environment  = known_inline_env | generic_inline_env
Eckhart Arnold's avatar
Eckhart Arnold committed
87
known_inline_env    = inline_math
88
89
90
generic_inline_env  = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env    = (-!SUCC_LB begin_environment) | (begin_environment !PRED_LB)
end_inline_env      = (-!SUCC_LB end_environment)   | (end_environment   !PRED_LB)
91
92
begin_environment   = "\begin{" §NAME §"}"
end_environment     = "\end{" §::NAME §"}"
Eckhart Arnold's avatar
Eckhart Arnold committed
93

94
inline_math         = "$" MATH "$"
Eckhart Arnold's avatar
Eckhart Arnold committed
95
96
97


#### commands ####
Eckhart Arnold's avatar
Eckhart Arnold committed
98

99
command             = known_command | generic_command
Eckhart Arnold's avatar
Eckhart Arnold committed
100
known_command       = footnote | includegraphics | caption
Eckhart Arnold's avatar
Eckhart Arnold committed
101
generic_command     = CMDNAME [[ //~ config ] //~ block ]
Eckhart Arnold's avatar
Eckhart Arnold committed
102

Eckhart Arnold's avatar
Eckhart Arnold committed
103
footnote            = "\footnote" block_of_paragraphs
Eckhart Arnold's avatar
Eckhart Arnold committed
104
105
includegraphics     = "\includegraphics" config block
caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
106
107
108
109
110
111

#######################################################################
#
# low-level text and character sequences
#
#######################################################################
112

113
config     = "[" cfgtext §"]"
114
block      = /{/ { text_elements } §/}/
Eckhart Arnold's avatar
Eckhart Arnold committed
115

116
117
118
text       = { cfgtext | (BRACKETS //~) }+
cfgtext    = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
119

120
121
122
123
124
125
126
blockcmd   = /[\\]/ ( ( "begin{" | "end{" )
                      ( "enumerate" | "itemize" | "figure" | "quote"
                      | "quotation" | "tabular") "}"
                    | structural)

structural = "subsection" | "section" | "chapter" | "subsubsection"
           | "paragraph" | "subparagraph" | "item"
127

Eckhart Arnold's avatar
Eckhart Arnold committed
128
129
130
131
132
133
134

#######################################################################
#
# Primitives
#
#######################################################################

135
CMDNAME    = /\\(?:(?!_)\w)+/~
136
NAME       = /\w+/~
137
MATH       = /[\w_^{}[\]]*/~
Eckhart Arnold's avatar
Eckhart Arnold committed
138

139
ESCAPED    = /\\[%$&_\/]/
140
141
142
143
BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                            # linefeed and special characters
WSPC       = /[ \t]+/                       # (horizontal) whitespace
144
LF         = !PARSEP /[ \t]*\n[ \t]*/       # linefeed but not an empty line
145
146
PARSEP     = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/  # at least one empty line, i.e.
                                            # [whitespace] linefeed [whitespace] linefeed
147
EOF        = !/./
148
149
150

SUCC_LB    = /(?:.*\n)+\s*$/                # linebreak succeeding an arbitrary chunk of text
PRED_LB    = /\s*?\n/                       # linebreak preeceding any text