LaTeX.ebnf 4.19 KB
Newer Older
Eckhart Arnold's avatar
Eckhart Arnold committed
1
# LaTeX-Grammar for DHParser
Eckhart Arnold's avatar
Eckhart Arnold committed
2

3
@ testing    = True
4
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
5
@ comment    = /%.*(?:\n|$)/
Eckhart Arnold's avatar
Eckhart Arnold committed
6

7

Eckhart Arnold's avatar
Eckhart Arnold committed
8
9
10
11
12
13
14
15
16
17
latexdoc       = preamble document
preamble       = { command }+

document       = [PARSEP] "\begin{document}" [PARSEP]
                 frontpages [PARSEP]
                 (chapters | sections) [PARSEP]
                 [bibliography] [index] [PARSEP]
                 "\end{document}" [PARSEP] §EOF
frontpages     = sequence

Eckhart Arnold's avatar
Eckhart Arnold committed
18
19
20
21
22
23
24

#######################################################################
#
# document structure
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
25
26
Chapters       = { Chapter [PARSEP] }+
Chapter        = "\Chapter" block [PARSEP] { sequence | Sections }
Eckhart Arnold's avatar
Eckhart Arnold committed
27

Eckhart Arnold's avatar
Eckhart Arnold committed
28
29
Sections       = { Section [PARSEP] }+
Section        = "\Section" block [PARSEP] { sequence | SubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
30

Eckhart Arnold's avatar
Eckhart Arnold committed
31
32
SubSections    = { SubSection [PARSEP] }+
SubSection     = "\SubSection" block [PARSEP] { sequence | SubSubSections }
Eckhart Arnold's avatar
Eckhart Arnold committed
33
34
35
36

SubSubsections = { SubSubSection [PARSEP] }+
SubSubSection  = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }

Eckhart Arnold's avatar
Eckhart Arnold committed
37
38
Paragraphs     = { Paragraph [PARSEP] }+
Paragraph      = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
Eckhart Arnold's avatar
Eckhart Arnold committed
39

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
SubParagraphs  = { SubParagraph [PARSEP] }+
SubParagraph   = "\subparagpaph" block [PARSEP] { sequence }
Eckhart Arnold's avatar
Eckhart Arnold committed
42

Eckhart Arnold's avatar
Eckhart Arnold committed
43
44
bibliography   = "\bibliography" block [PARSEP]
index          = "\printindex" [PARSEP
Eckhart Arnold's avatar
Eckhart Arnold committed
45
46
47
48
49
50
51


#######################################################################
#
# document content
#
#######################################################################
52

Eckhart Arnold's avatar
Eckhart Arnold committed
53

Eckhart Arnold's avatar
Eckhart Arnold committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#### block environments ####

block_enrivonment   = known_enrivonment | generic_enrivonment
known_enrivonment   = itemize | enumerate | figure | table
generic_enrivonment = begin_enrivonment sequence §end_enrivonment

itemize             = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate           = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item                = "\item" [PARSEP] sequence


#### paragraphs and sequences of paragraphs ####

block_of_paragraphs = /{/ sequence §/}/
sequence            = { (paragraph | block_enrivonment ) [PARSEP] }+

paragraph           = { !blockcmd textelements //~ }+
textelemts          = command | text | block | inline_enrivonment


#### inline enivronments ####

inline_enrivonment  = known_inline_env | generic_inline_env
known_inline_env    = inline_math
generic_inline_env  = begin_enrivonment { textelements }+ §end_enrivonment
begin_enrivonment   = "\begin{" §NAME §"}"
end_enrivonment     = "\end{" §::NAME §"}"

inline_math         = "$" math_Text "$"


#### commands ####
Eckhart Arnold's avatar
Eckhart Arnold committed
86

Eckhart Arnold's avatar
Eckhart Arnold committed
87
88
89
command             = knownd_command | generic_command
known_command       = footnote
generic_command     = CMDNAME [[ //~ config ] //~ block ]
Eckhart Arnold's avatar
Eckhart Arnold committed
90

Eckhart Arnold's avatar
Eckhart Arnold committed
91
92
93
94
95
96
97
98
footnote            = "\footnote" block_of_paragraphs


#######################################################################
#
# low-level text and character sequences
#
#######################################################################
99

100
config     = "[" cfgtext §"]"
Eckhart Arnold's avatar
Eckhart Arnold committed
101
block      = /{/ { textelements } §/}/
Eckhart Arnold's avatar
Eckhart Arnold committed
102

103
104
105
text       = { cfgtext | (BRACKETS //~) }+
cfgtext    = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
106
107

blockcmd   = "\subsection" | "\section" | "\chapter" | "\subsubsection"
108
             | "\paragraph" | "\subparagraph" | "\begin{enumerate}"
109
             | "\begin{itemize}" | "\item" | "\begin{figure}"
110

Eckhart Arnold's avatar
Eckhart Arnold committed
111
112
113
114
115
116
117

#######################################################################
#
# Primitives
#
#######################################################################

118
CMDNAME    = /\\(?:(?!_)\w)+/~
119
NAME       = /\w+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
120

121
ESCAPED    = /\\[%$&_\/]/
122
123
124
125
126
127
128
BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                            # linefeed and special characters
WSPC       = /[ \t]+/                       # (horizontal) whitespace
LF         = !PARSEP /[ \t]*\n[ \t]*/       # LF but not an empty line
PARSEP     = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/  # at least one empty line, i.e.
                                            # [whitespace] linefeed [whitespace] linefeed
129
EOF        = !/./