LaTeX.ebnf 5.71 KB
Newer Older
Eckhart Arnold's avatar
Eckhart Arnold committed
1
# LaTeX-Grammar for DHParser
Eckhart Arnold's avatar
Eckhart Arnold committed
2

3
@ testing    = True
4
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
5
@ comment    = /%.*(?:\n|$)/
Eckhart Arnold's avatar
Eckhart Arnold committed
6

7

Eckhart Arnold's avatar
Eckhart Arnold committed
8
latexdoc       = preamble document
Eckhart Arnold's avatar
Eckhart Arnold committed
9
preamble       = { [WSPC] command }+
Eckhart Arnold's avatar
Eckhart Arnold committed
10

Eckhart Arnold's avatar
Eckhart Arnold committed
11
12
13
14
15
document       = [WSPC] "\begin{document}" [WSPC]
                 frontpages [WSPC]
                 (Chapters | Sections) [WSPC]
                 [Bibliography] [Index] [WSPC]
                 "\end{document}" [WSPC] §EOF
Eckhart Arnold's avatar
Eckhart Arnold committed
16
17
frontpages     = sequence

Eckhart Arnold's avatar
Eckhart Arnold committed
18
19
20
21
22
23
24

#######################################################################
#
# document structure
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
25
Chapters       = { Chapter [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
26
Chapter        = "\chapter" block [WSPC] { sequence | Sections }
Eckhart Arnold's avatar
Eckhart Arnold committed
27

Eckhart Arnold's avatar
Eckhart Arnold committed
28
Sections       = { Section [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
29
Section        = "\section" block [WSPC] { sequence | SubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
30

Eckhart Arnold's avatar
Eckhart Arnold committed
31
SubSections    = { SubSection [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
32
SubSection     = "\subsection" block [WSPC] { sequence | SubSubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
33

Eckhart Arnold's avatar
Eckhart Arnold committed
34
SubSubSections = { SubSubSection [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
35
SubSubSection  = "\subsubsection" block [WSPC] { sequence | Paragraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
36

Eckhart Arnold's avatar
Eckhart Arnold committed
37
38
Paragraphs     = { Paragraph [WSPC] }+
Paragraph      = "\paragraph" block [WSPC] { sequence | SubParagraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
39

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
SubParagraphs  = { SubParagraph [WSPC] }+
SubParagraph   = "\subparagraph" block [WSPC] [ sequence ]
Eckhart Arnold's avatar
Eckhart Arnold committed
42

Eckhart Arnold's avatar
Eckhart Arnold committed
43
44
Bibliography   = "\bibliography" block [WSPC]
Index          = "\printindex" [WSPC]
Eckhart Arnold's avatar
Eckhart Arnold committed
45
46
47
48
49
50
51


#######################################################################
#
# document content
#
#######################################################################
52

Eckhart Arnold's avatar
Eckhart Arnold committed
53

Eckhart Arnold's avatar
Eckhart Arnold committed
54
55
#### block environments ####

56
block_environment   = known_environment | generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
57
known_environment   = itemize | enumerate | figure | tabular | quotation
58
                    | verbatim
59
generic_block       = begin_generic_block sequence §end_generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
60
61
begin_generic_block = -&LB begin_environment LFF
end_generic_block   = -&LB  end_environment LFF
Eckhart Arnold's avatar
Eckhart Arnold committed
62

Eckhart Arnold's avatar
Eckhart Arnold committed
63
64
65
itemize             = "\begin{itemize}" [WSPC] { item } §"\end{itemize}"
enumerate           = "\begin{enumerate}" [WSPC] {item } §"\end{enumerate}"
item                = "\item" [WSPC] sequence
Eckhart Arnold's avatar
Eckhart Arnold committed
66

67
68
69
70
figure              = "\begin{figure}" sequence §"\end{figure}"
quotation           = ("\begin{quotation}" sequence §"\end{quotation}")
                    | ("\begin{quote}" sequence §"\end{quote}")
verbatim            = "\begin{verbatim}" sequence §"\end{verbatim}"
Eckhart Arnold's avatar
Eckhart Arnold committed
71
72
tabular             = "\begin{tabular}" tabular_config sequence §"\end{tabular}"
tabular_config      = "{" /[lcr|]+/~ §"}"
73

Eckhart Arnold's avatar
Eckhart Arnold committed
74
75
76

#### paragraphs and sequences of paragraphs ####

Eckhart Arnold's avatar
Eckhart Arnold committed
77
block_of_paragraphs = /{/~ sequence §/}/
78
sequence            = { (paragraph | block_environment ) [PARSEP] }+
79
paragraph           = { !blockcmd text_element //~ }+
Eckhart Arnold's avatar
Eckhart Arnold committed
80
text_element        = text | block | inline_environment | command
Eckhart Arnold's avatar
Eckhart Arnold committed
81
82
83

#### inline enivronments ####

84
inline_environment  = known_inline_env | generic_inline_env
Eckhart Arnold's avatar
Eckhart Arnold committed
85
known_inline_env    = inline_math
Eckhart Arnold's avatar
Eckhart Arnold committed
86
87
generic_inline_env  = begin_inline_env //~ paragraph §end_inline_env
begin_inline_env    = (-!LB begin_environment) | (begin_environment !LFF)
88
end_inline_env      = end_environment
Eckhart Arnold's avatar
Eckhart Arnold committed
89
90
91
                      ## (-!LB end_environment)   | (end_environment !LFF)  # ambiguity with genric_block when EOF
begin_environment   = /\\begin{/ §NAME §/}/
end_environment     = /\\end{/ §::NAME §/}/
Eckhart Arnold's avatar
Eckhart Arnold committed
92

Eckhart Arnold's avatar
Eckhart Arnold committed
93
inline_math         = /\$/ /[^$]*/ §/\$/
Eckhart Arnold's avatar
Eckhart Arnold committed
94
95
96


#### commands ####
Eckhart Arnold's avatar
Eckhart Arnold committed
97

Eckhart Arnold's avatar
Eckhart Arnold committed
98
command             = known_command | text_command | generic_command
Eckhart Arnold's avatar
Eckhart Arnold committed
99
known_command       = footnote | includegraphics | caption
Eckhart Arnold's avatar
Eckhart Arnold committed
100
text_command        = TXTCOMMAND | ESCAPED | BRACKETS
Eckhart Arnold's avatar
Eckhart Arnold committed
101
generic_command     = !no_command CMDNAME [[ //~ config ] //~ block ]
Eckhart Arnold's avatar
Eckhart Arnold committed
102

Eckhart Arnold's avatar
Eckhart Arnold committed
103
footnote            = "\footnote" block_of_paragraphs
104
includegraphics     = "\includegraphics" [ config ] block
Eckhart Arnold's avatar
Eckhart Arnold committed
105
caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
106

107

Eckhart Arnold's avatar
Eckhart Arnold committed
108
109
110
111
112
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
113

114

Eckhart Arnold's avatar
Eckhart Arnold committed
115
116
117
config     = "[" text §"]"
block      = /{/ //~ { !blockcmd text_element //~ } §/}/
text       = TEXTCHUNK { //~ TEXTCHUNK }
118

Eckhart Arnold's avatar
Eckhart Arnold committed
119
120
121
122
123
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd   = BACKSLASH ( ( "begin{" | "end{" )
                         ( "enumerate" | "itemize" | "figure" | "quote"
                         | "quotation" | "tabular") "}"
                       | structural | begin_generic_block | end_generic_block )
124
125
126

structural = "subsection" | "section" | "chapter" | "subsubsection"
           | "paragraph" | "subparagraph" | "item"
127

Eckhart Arnold's avatar
Eckhart Arnold committed
128
129
130
131
132
133
134

#######################################################################
#
# Primitives
#
#######################################################################

135

136
CMDNAME    = /\\(?:(?!_)\w)+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
137
138
139
140
TXTCOMMAND = /\\text\w+/
ESCAPED    = /\\[%$&_\/{}]/
BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]

141
NAME       = /\w+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
142

143
144
TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                            # linefeed and special characters
Eckhart Arnold's avatar
Eckhart Arnold committed
145
LF         = !GAP /[ \t]*\n[ \t]*/          # linefeed but not an empty line
Eckhart Arnold's avatar
Eckhart Arnold committed
146
LFF        = //~ -&LB [ WSPC ]              # at least one linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
147
148
WSPC       = { COMMENT__ | /\s+/ }+
# WSPC       = { /\s+/~ | ~/\s+/ }+           # arbitrary horizontal or vertical whitespace
Eckhart Arnold's avatar
Eckhart Arnold committed
149
150
PARSEP     = { GAP }+                       # paragraph separator
GAP        = /[ \t]*(?:\n[ \t]*)+\n/~       # at least one empty line, i.e.
151
                                            # [whitespace] linefeed [whitespace] linefeed
152
LB         = /\s*?\n|$/                     # backwards line break for Lookbehind-Operator
Eckhart Arnold's avatar
Eckhart Arnold committed
153
154
155
156
                                            # beginning of text marker '$' added for test code
BACKSLASH  = /[\\]/

EOF        = /(?!.)/                        # End-Of-File