LaTeX.ebnf 8.97 KB
Newer Older
Eckhart Arnold's avatar
Eckhart Arnold committed
1
# LaTeX-Grammar for DHParser
Eckhart Arnold's avatar
Eckhart Arnold committed
2

3
# preamble
eckhart's avatar
eckhart committed
4
@ literalws  = right
5
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # insignificant whitespace, including at most one linefeed
6
@ comment    = /%.*/                                # note: trailing linefeed is not part of the comment proper
7
@ reduction  = merge_treetops
8
@ disposable = _WSPC, _GAP, _LB, _PARSEP, _LETTERS, _NAME, INTEGER, FRAC,
di68kap's avatar
di68kap committed
9
               _QUALIFIED, TEXT_NOPAR, TEXT, _block_content,
10
               block_environment, known_environment, text_element,
11
               line_element, inline_environment, known_inline_env, info_block,
12
               begin_inline_env, end_inline_env, command, known_command
13
@ drop       = strings, backticked, whitespace, regexps, _WSPC, _GAP, _PARSEP, _LB
Eckhart Arnold's avatar
Eckhart Arnold committed
14

Eckhart Arnold's avatar
Eckhart Arnold committed
15
16
########################################################################
#
17
#: outer document structure
Eckhart Arnold's avatar
Eckhart Arnold committed
18
19
20
#
########################################################################

21
latexdoc       = preamble §document
22
preamble       = { [_WSPC] command }+
Eckhart Arnold's avatar
Eckhart Arnold committed
23

di68kap's avatar
di68kap committed
24
document       = [_WSPC] "\begin{document}"
25
                 §frontpages
eckhart's avatar
eckhart committed
26
                 (Chapters | Sections)
di68kap's avatar
di68kap committed
27
28
                 [Bibliography] [Index] [_WSPC]
                 "\end{document}" [_WSPC] §EOF
Eckhart Arnold's avatar
Eckhart Arnold committed
29
30
frontpages     = sequence

Eckhart Arnold's avatar
Eckhart Arnold committed
31
32
33

#######################################################################
#
34
#: document structure
Eckhart Arnold's avatar
Eckhart Arnold committed
35
36
37
#
#######################################################################

di68kap's avatar
di68kap committed
38
Chapters       = { [_WSPC] Chapter }+
eckhart's avatar
eckhart committed
39
Chapter        = "\chapter" heading { sequence | Sections }
Eckhart Arnold's avatar
Eckhart Arnold committed
40

di68kap's avatar
di68kap committed
41
Sections       = { [_WSPC] Section }+
eckhart's avatar
eckhart committed
42
Section        = "\section" heading { sequence | SubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
43

di68kap's avatar
di68kap committed
44
SubSections    = { [_WSPC] SubSection }+
eckhart's avatar
eckhart committed
45
SubSection     = "\subsection" heading { sequence | SubSubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
46

di68kap's avatar
di68kap committed
47
SubSubSections = { [_WSPC] SubSubSection }+
eckhart's avatar
eckhart committed
48
SubSubSection  = "\subsubsection" heading { sequence | Paragraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
49

di68kap's avatar
di68kap committed
50
Paragraphs     = { [_WSPC] Paragraph  }+
eckhart's avatar
eckhart committed
51
Paragraph      = "\paragraph" heading { sequence | SubParagraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
52

di68kap's avatar
di68kap committed
53
SubParagraphs  = { [_WSPC] SubParagraph }+
eckhart's avatar
eckhart committed
54
SubParagraph   = "\subparagraph" heading [ sequence ]
Eckhart Arnold's avatar
Eckhart Arnold committed
55

di68kap's avatar
di68kap committed
56
57
Bibliography   = [_WSPC] "\bibliography" heading
Index          = [_WSPC] "\printindex"
Eckhart Arnold's avatar
Eckhart Arnold committed
58

59
heading        = block
Eckhart Arnold's avatar
Eckhart Arnold committed
60
61
62

#######################################################################
#
63
#: document content
Eckhart Arnold's avatar
Eckhart Arnold committed
64
65
#
#######################################################################
66

Eckhart Arnold's avatar
Eckhart Arnold committed
67

Eckhart Arnold's avatar
Eckhart Arnold committed
68
69
#### block environments ####

70
71
block_environment   = known_environment | generic_block
known_environment   = itemize | enumerate | figure | tabular | quotation
72
                    | verbatim
73
generic_block       = begin_generic_block sequence §end_generic_block
74
75
begin_generic_block = <-&_LB begin_environment LFF
end_generic_block   = <-&_LB  end_environment LFF
Eckhart Arnold's avatar
Eckhart Arnold committed
76

di68kap's avatar
di68kap committed
77
78
itemize             = "\begin{itemize}" [_WSPC] { item } §"\end{itemize}"
enumerate           = "\begin{enumerate}" [_WSPC] {item } §"\end{enumerate}"
eckhart's avatar
eckhart committed
79
item                = "\item" sequence
Eckhart Arnold's avatar
Eckhart Arnold committed
80

81
82
83
84
figure              = "\begin{figure}" sequence §"\end{figure}"
quotation           = ("\begin{quotation}" sequence §"\end{quotation}")
                    | ("\begin{quote}" sequence §"\end{quote}")
verbatim            = "\begin{verbatim}" sequence §"\end{verbatim}"
Eckhart Arnold's avatar
Eckhart Arnold committed
85
tabular             = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
86
87
tabular_row         = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
                      "\\" ( hline | { cline } )
88
tabular_cell        = {line_element [S] }
di68kap's avatar
di68kap committed
89
90
tabular_config      = "{" TBCFG_VALUE §"}"
TBCFG_VALUE         = /[lcr|]+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
91

Eckhart Arnold's avatar
Eckhart Arnold committed
92
93
#### paragraphs and sequences of paragraphs ####

94
block_of_paragraphs = "{" [sequence] §"}"
95
96
97
sequence            = [_WSPC] { (paragraph | block_environment ) [_PARSEP] }+
paragraph           = { !blockcmd text_element [S] }+
text_element        = line_element | LINEFEED
di68kap's avatar
di68kap committed
98
line_element        = text | inline_environment | command | block
Eckhart Arnold's avatar
Eckhart Arnold committed
99

Eckhart Arnold's avatar
Eckhart Arnold committed
100

Eckhart Arnold's avatar
Eckhart Arnold committed
101
102
#### inline enivronments ####

103
104
105
inline_environment  = known_inline_env | generic_inline_env
known_inline_env    = inline_math
generic_inline_env  = begin_inline_env ~ paragraph §end_inline_env
106
begin_inline_env    = (<-!_LB begin_environment) | (begin_environment !LFF)
107
end_inline_env      = end_environment
108
                      ## (<-!_LB end_environment)   | (end_environment !LFF)  # ambiguity with generic_block when EOF
109
110
begin_environment   = /\\begin{/ §NAME /}/
end_environment     = /\\end{/ §::NAME /}/
Eckhart Arnold's avatar
Eckhart Arnold committed
111

Eckhart Arnold's avatar
Eckhart Arnold committed
112
inline_math         = /\$/ /[^$]*/ §/\$/
Eckhart Arnold's avatar
Eckhart Arnold committed
113

Eckhart Arnold's avatar
Eckhart Arnold committed
114

115
#### commands ####
Eckhart Arnold's avatar
Eckhart Arnold committed
116

117
command             = known_command | text_command | generic_command
118

119
known_command       = citet | citep | footnote | includegraphics | caption
120
                    | multicolumn | hline | cline | documentclass | pdfinfo
121
                    | hypersetup
122
text_command        = TXTCOMMAND | ESCAPED | BRACKETS
123
generic_command     = !no_command CMDNAME [[ ~ config ] { ~ block }+ ]
di68kap's avatar
di68kap committed
124
                    | `{` CMDNAME _block_content §`}`
Eckhart Arnold's avatar
Eckhart Arnold committed
125

126
127
citet               = "\citet" [config] block
citep               = ("\citep" | "\cite") [config] block
Eckhart Arnold's avatar
Eckhart Arnold committed
128
footnote            = "\footnote" block_of_paragraphs
129
includegraphics     = "\includegraphics" [ config ] block
Eckhart Arnold's avatar
Eckhart Arnold committed
130
caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
131
132
133
multicolumn         = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
hline               = "\hline"
cline               = "\cline{" INTEGER "-" INTEGER "}"
134
documentclass       = "\documentclass" [ config ] block
135
pdfinfo             = "\pdfinfo" info_block
136
hypersetup          = "\hypersetup" param_block
Eckhart Arnold's avatar
Eckhart Arnold committed
137

138

Eckhart Arnold's avatar
Eckhart Arnold committed
139
140
#######################################################################
#
141
#: text
Eckhart Arnold's avatar
Eckhart Arnold committed
142
143
#
#######################################################################
144

di68kap's avatar
di68kap committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
block          = "{" _block_content §`}`
_block_content =  { !blockcmd text_element [S] }
info_block     = "{" §{ info_assoc } "}"
info_assoc     = info_key ~ [ "(" §info_value ")" ]
info_key       = `/` _NAME
info_value     = TEXT_NOPAR { S TEXT_NOPAR }  # text without parentheses
# text           = CHARS { S CHARS }
# text           = LINE { S LINE }
text           = TEXT { S TEXT }

no_command     = "\begin{" | "\end" | BACKSLASH structural
blockcmd       = BACKSLASH ( ( "begin{" | "end{" )
                             ( "enumerate" | "itemize" | "figure" | "quote"
                             | "quotation" | "tabular") "}"
                           | structural | begin_generic_block | end_generic_block )

structural     = "subsection" | "section" | "chapter" | "subsubsection"
               | "paragraph" | "subparagraph" | "item"
163

Eckhart Arnold's avatar
Eckhart Arnold committed
164
165
166

#######################################################################
#
167
168
169
170
171
172
#: parameters
#
#######################################################################

config       = "[" § (parameters &"]" | cfg_text) "]"
param_config = "[" § [parameters] "]"
173
param_block  = "{" [parameters] "}"
eckhart's avatar
eckhart committed
174
175
176
parameters   = (association | flag) { "," (association | flag) }  [ WARN_Komma ]
association  = key~ "=" value~
flag         = _QUALIFIED | magnitude
177
178
key          = _QUALIFIED
value        = magnitude | _LETTERS | CMDNAME | param_block | block
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
magnitude    = number [UNIT]
number       = INTEGER [FRAC]
cfg_text     = { (~ text) | CMDNAME | SPECIAL }


#######################################################################
#
#: errors and warnings
#
#######################################################################

WARN_Komma = ","


#######################################################################
#
#: primitives
Eckhart Arnold's avatar
Eckhart Arnold committed
196
197
198
#
#######################################################################

199
CMDNAME    = /\\(?:(?![\d_])\w)+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
200
TXTCOMMAND = /\\text\w+/
201
ESCAPED    = /\\[%$&_\/{} ]/
202
SPECIAL    = /[$&_\/\\\\]/
Eckhart Arnold's avatar
Eckhart Arnold committed
203
BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
204
LINEFEED   = /[\\][\\]/
Eckhart Arnold's avatar
Eckhart Arnold committed
205

206
207
208
209
_QUALIFIED = IDENTIFIER { /[:.-]/ IDENTIFIER }
IDENTIFIER = _NAME
NAME       = _NAME                          # captured! don't use without retrival
_NAME      = /(?!\d)\w+/
210
211
INTEGER    = /-?(?:(?:[1-9][0-9]+)|[0-9])/
FRAC       = /\.[0-9]+/
212
UNIT       = /(?!\d)\w+/
Eckhart Arnold's avatar
Eckhart Arnold committed
213

eckhart's avatar
eckhart committed
214
TEXT       = /(?:[^\\%$&\{\}\[\]\n]+(?:\n(?![ \t]*\n))?)+/
215
TEXT_NOPAR = /(?:[^\\%$&\{\}\[\]\(\)\n]+(?:\n(?![ \t]*\n))?)+/  # text without parnetheses
eckhart's avatar
eckhart committed
216
LINE       = /[^\\%$&\{\}\[\]\n]+/          # a line of text
217
CHARS      = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
218
                                            # linefeed and special characters
219
_LETTERS   = /\w+/
eckhart's avatar
eckhart committed
220
LF         = NEW_LINE { COMMENT__ WHITESPACE__ }  # linefeed but not an empty line
di68kap's avatar
di68kap committed
221
LFF        = NEW_LINE [ _WSPC ]             # at least one linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
222
S          = &/[% \t\n]/ !_GAP WSP_RE__     # significant whitespace
di68kap's avatar
di68kap committed
223
224
_PARSEP    = { WHITESPACE__ COMMENT__ } _GAP [_WSPC] # paragraph separator
_WSPC      = { COMMENT__ | /\s+/ }+         # arbitrary horizontal or vertical whitespace
eckhart's avatar
eckhart committed
225
_GAP       = /[ \t]*(?:\n[ \t]*)+\n/~       # at least one empty line, i.e.
226
                                            # [whitespace] linefeed [whitespace] linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
227
NEW_LINE   = /[ \t]*/ [COMMENT__] /\n/
228
_LB        = /\s*?\n|$/                     # backwards line break for Lookbehind-Operator
Eckhart Arnold's avatar
Eckhart Arnold committed
229
230
231
232
                                            # beginning of text marker '$' added for test code
BACKSLASH  = /[\\]/

EOF        = /(?!.)/                        # End-Of-File