LaTeXCompiler.py 27.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
12
13
from functools import partial

14
15
16
17
try:
    import regex as re
except ImportError:
    import re
18
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
19
    Token, Synonym, \
20
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
21
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
22
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
23
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    reduce_single_child, replace_by_single_child, remove_whitespace, \
25
    flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first, \
eckhart's avatar
eckhart committed
26
    remove_tokens, remove_nodes, TOKEN_PTYPE
27
from DHParser.log import logging
28
29
30
31


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
32
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
33
34
35
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
36
def LaTeXPreprocessor(text):
37
38
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
39
40
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
41
42
43
44
45
46
47
48
49
50
51


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
    r"""Parser for a LaTeX source file, with this grammar:
    
52
    # LaTeX-Grammar for DHParser
53
    
54
    # preamble
55
    @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    @ comment    = /%.*/
57
    
Eckhart Arnold's avatar
Eckhart Arnold committed
58
59
60
61
62
63
    ########################################################################
    #
    # outer document structure
    #
    ########################################################################
    
64
    latexdoc       = preamble document
65
    preamble       = { [WSPC] command }+
66
    
eckhart's avatar
eckhart committed
67
68
69
    document       = [WSPC] "\begin{document}"
                     frontpages
                     (Chapters | Sections)
Eckhart Arnold's avatar
Eckhart Arnold committed
70
71
                     [Bibliography] [Index] [WSPC]
                     "\end{document}" [WSPC] §EOF
72
73
74
75
76
77
78
79
80
    frontpages     = sequence
    
    
    #######################################################################
    #
    # document structure
    #
    #######################################################################
    
eckhart's avatar
eckhart committed
81
82
    Chapters       = { [WSPC] Chapter }+
    Chapter        = "\chapter" heading { sequence | Sections }
83
    
eckhart's avatar
eckhart committed
84
85
    Sections       = { [WSPC] Section }+
    Section        = "\section" heading { sequence | SubSections }
86
    
eckhart's avatar
eckhart committed
87
88
    SubSections    = { [WSPC] SubSection }+
    SubSection     = "\subsection" heading { sequence | SubSubSections }
89
    
eckhart's avatar
eckhart committed
90
91
    SubSubSections = { [WSPC] SubSubSection }+
    SubSubSection  = "\subsubsection" heading { sequence | Paragraphs }
92
    
eckhart's avatar
eckhart committed
93
94
    Paragraphs     = { [WSPC] Paragraph  }+
    Paragraph      = "\paragraph" heading { sequence | SubParagraphs }
95
    
eckhart's avatar
eckhart committed
96
97
    SubParagraphs  = { [WSPC] SubParagraph }+
    SubParagraph   = "\subparagraph" heading [ sequence ]
98
    
eckhart's avatar
eckhart committed
99
100
    Bibliography   = [WSPC] "\bibliography" heading
    Index          = [WSPC] "\printindex"
101
    
102
    heading        = block
103
104
105
106
107
108
109
110
111
    
    #######################################################################
    #
    # document content
    #
    #######################################################################
    
    
    #### block environments ####
112
    
Eckhart Arnold's avatar
Eckhart Arnold committed
113
    block_environment   = known_environment | generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
114
    known_environment   = itemize | enumerate | figure | tabular | quotation
115
                        | verbatim
Eckhart Arnold's avatar
Eckhart Arnold committed
116
    generic_block       = begin_generic_block sequence §end_generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
117
118
    begin_generic_block = -&LB begin_environment LFF
    end_generic_block   = -&LB  end_environment LFF
119
    
Eckhart Arnold's avatar
Eckhart Arnold committed
120
121
    itemize             = "\begin{itemize}" [WSPC] { item } §"\end{itemize}"
    enumerate           = "\begin{enumerate}" [WSPC] {item } §"\end{enumerate}"
eckhart's avatar
eckhart committed
122
    item                = "\item" sequence
123
    
Eckhart Arnold's avatar
Eckhart Arnold committed
124
125
126
127
    figure              = "\begin{figure}" sequence §"\end{figure}"
    quotation           = ("\begin{quotation}" sequence §"\end{quotation}")
                        | ("\begin{quote}" sequence §"\end{quote}")
    verbatim            = "\begin{verbatim}" sequence §"\end{verbatim}"
Eckhart Arnold's avatar
Eckhart Arnold committed
128
    tabular             = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
129
130
    tabular_row         = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
                          "\\" ( hline | { cline } )
131
    tabular_cell        = { line_element //~ }
Eckhart Arnold's avatar
Eckhart Arnold committed
132
    tabular_config      = "{" /[lcr|]+/~ §"}"
133
    
Eckhart Arnold's avatar
Eckhart Arnold committed
134
    
135
136
    #### paragraphs and sequences of paragraphs ####
    
137
    block_of_paragraphs = "{" [sequence] §"}"
eckhart's avatar
eckhart committed
138
    sequence            = [WSPC] { (paragraph | block_environment ) [PARSEP] }+
139
140
141
    paragraph           = { !blockcmd text_element //~ }+
    text_element        = line_element | LINEFEED
    line_element        = text | block | inline_environment | command
142
    
Eckhart Arnold's avatar
Eckhart Arnold committed
143
    
144
145
    #### inline enivronments ####
    
146
    inline_environment  = known_inline_env | generic_inline_env
147
    known_inline_env    = inline_math
Eckhart Arnold's avatar
Eckhart Arnold committed
148
149
    generic_inline_env  = begin_inline_env //~ paragraph §end_inline_env
    begin_inline_env    = (-!LB begin_environment) | (begin_environment !LFF)
150
    end_inline_env      = end_environment
Eckhart Arnold's avatar
Eckhart Arnold committed
151
                          ## (-!LB end_environment)   | (end_environment !LFF)  # ambiguity with genric_block when EOF
152
153
    begin_environment   = /\\begin{/ §NAME /}/
    end_environment     = /\\end{/ §::NAME /}/
154
    
Eckhart Arnold's avatar
Eckhart Arnold committed
155
    inline_math         = /\$/ /[^$]*/ §/\$/
156
    
Eckhart Arnold's avatar
Eckhart Arnold committed
157
    
158
159
    #### commands ####
    
Eckhart Arnold's avatar
Eckhart Arnold committed
160
    command             = known_command | text_command | generic_command
Eckhart Arnold's avatar
Eckhart Arnold committed
161
    known_command       = footnote | includegraphics | caption | multicolumn | hline | cline
162
    text_command        = TXTCOMMAND | ESCAPED | BRACKETS
Eckhart Arnold's avatar
Eckhart Arnold committed
163
    generic_command     = !no_command CMDNAME [[ //~ config ] //~ block ]
164
165
    
    footnote            = "\footnote" block_of_paragraphs
Eckhart Arnold's avatar
Eckhart Arnold committed
166
    includegraphics     = "\includegraphics" [ config ] block
Eckhart Arnold's avatar
Eckhart Arnold committed
167
    caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
168
169
170
171
    multicolumn         = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
    hline               = "\hline"
    cline               = "\cline{" INTEGER "-" INTEGER "}"
    
172
    
173
174
175
176
177
178
    #######################################################################
    #
    # low-level text and character sequences
    #
    #######################################################################
    
179
    
180
    config     = "[" cfg_text §"]"
Eckhart Arnold's avatar
Eckhart Arnold committed
181
    cfg_text   = { ([//~] text) | CMDNAME | SPECIAL }
Eckhart Arnold's avatar
Eckhart Arnold committed
182
183
    block      = /{/ //~ { !blockcmd text_element //~ } §/}/
    text       = TEXTCHUNK { //~ TEXTCHUNK }
184
    
Eckhart Arnold's avatar
Eckhart Arnold committed
185
186
187
188
189
    no_command = "\begin{" | "\end" | BACKSLASH structural
    blockcmd   = BACKSLASH ( ( "begin{" | "end{" )
                             ( "enumerate" | "itemize" | "figure" | "quote"
                             | "quotation" | "tabular") "}"
                           | structural | begin_generic_block | end_generic_block )
190
191
192
    
    structural = "subsection" | "section" | "chapter" | "subsubsection"
               | "paragraph" | "subparagraph" | "item"
193
194
195
196
    
    
    #######################################################################
    #
197
    # primitives
198
199
    #
    #######################################################################
200
    
201
    
202
    CMDNAME    = /\\(?:(?!_)\w)+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
203
204
    TXTCOMMAND = /\\text\w+/
    ESCAPED    = /\\[%$&_\/{}]/
Eckhart Arnold's avatar
Eckhart Arnold committed
205
    SPECIAL    = /[$&_\\\\\/]/
Eckhart Arnold's avatar
Eckhart Arnold committed
206
    BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
207
    LINEFEED   = /[\\][\\]/
Eckhart Arnold's avatar
Eckhart Arnold committed
208
    
209
    NAME       = /\w+/~
210
    INTEGER    = /\d+/~
211
212
213
    
    TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                                # linefeed and special characters
Eckhart Arnold's avatar
Eckhart Arnold committed
214
215
216
    LF         = NEW_LINE { COMMENT__ WHITESPACE__ }   # linefeed but not an empty line
    LFF        = NEW_LINE [ WSPC ]              # at least one linefeed
    PARSEP     = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
217
    WSPC       = { COMMENT__ | /\s+/ }+         # arbitrary horizontal or vertical whitespace
Eckhart Arnold's avatar
Eckhart Arnold committed
218
    GAP        = /[ \t]*(?:\n[ \t]*)+\n/~       # at least one empty line, i.e.
219
                                                # [whitespace] linefeed [whitespace] linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
220
    NEW_LINE   = /[ \t]*/ [COMMENT__] /\n/
221
222
    LB         = /\s*?\n|$/                     # backwards line break for Lookbehind-Operator
                                                # beginning of text marker '$' added for test code
Eckhart Arnold's avatar
Eckhart Arnold committed
223
224
225
    BACKSLASH  = /[\\]/
    
    EOF        = /(?!.)/                        # End-Of-File
226
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
227
    begin_generic_block = Forward()
228
    block_environment = Forward()
229
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
230
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
231
    paragraph = Forward()
232
    tabular_config = Forward()
233
    text_element = Forward()
234
    source_hash__ = "96b3c5ce2f75505a279d4d27f7712323"
235
    parser_initialization__ = "upon instantiation"
Eckhart Arnold's avatar
Eckhart Arnold committed
236
237
238
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
    WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
239
240
    wspL__ = ''
    wspR__ = WSP__
Eckhart Arnold's avatar
Eckhart Arnold committed
241
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
242
243
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
244
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
Eckhart Arnold's avatar
Eckhart Arnold committed
245
    GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
Eckhart Arnold's avatar
Eckhart Arnold committed
246
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
247
248
249
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
250
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
251
    INTEGER = RE('\\d+')
252
    NAME = Capture(RE('\\w+'))
253
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
254
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
255
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
256
257
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
258
    CMDNAME = RE('\\\\(?:(?!_)\\w)+')
Eckhart Arnold's avatar
Eckhart Arnold committed
259
    structural = Alternative(Token("subsection"), Token("section"), Token("chapter"), Token("subsubsection"), Token("paragraph"), Token("subparagraph"), Token("item"))
di68kap's avatar
di68kap committed
260
261
262
263
264
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Token("begin{"), Token("end{")), Alternative(Token("enumerate"), Token("itemize"), Token("figure"), Token("quote"), Token("quotation"), Token("tabular")), Token("}")), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Token("\\begin{"), Token("\\end"), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RE(''), TEXTCHUNK)))
    block = Series(RegExp('{'), RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))), RegExp('}'), mandatory=3)
    cfg_text = ZeroOrMore(Alternative(Series(Option(RE('')), text), CMDNAME, SPECIAL))
265
    config = Series(Token("["), cfg_text, Token("]"), mandatory=2)
di68kap's avatar
di68kap committed
266
    cline = Series(Token("\\cline{"), INTEGER, Token("-"), INTEGER, Token("}"))
Eckhart Arnold's avatar
Eckhart Arnold committed
267
    hline = Token("\\hline")
di68kap's avatar
di68kap committed
268
269
270
271
272
    multicolumn = Series(Token("\\multicolumn"), Token("{"), INTEGER, Token("}"), tabular_config, block_of_paragraphs)
    caption = Series(Token("\\caption"), block)
    includegraphics = Series(Token("\\includegraphics"), Option(config), block)
    footnote = Series(Token("\\footnote"), block_of_paragraphs)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RE(''), config)), RE(''), block)))
273
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
Eckhart Arnold's avatar
Eckhart Arnold committed
274
    known_command = Alternative(footnote, includegraphics, caption, multicolumn, hline, cline)
Eckhart Arnold's avatar
Eckhart Arnold committed
275
    command = Alternative(known_command, text_command, generic_command)
276
277
278
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
279
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
280
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
281
    generic_inline_env = Series(begin_inline_env, RE(''), paragraph, end_inline_env, mandatory=3)
282
    known_inline_env = Synonym(inline_math)
283
    inline_environment = Alternative(known_inline_env, generic_inline_env)
284
285
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
286
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))))
eckhart's avatar
eckhart committed
287
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
288
289
    block_of_paragraphs.set(Series(Token("{"), Option(sequence), Token("}"), mandatory=2))
    tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Token("}"), mandatory=2))
di68kap's avatar
di68kap committed
290
291
292
    tabular_cell = ZeroOrMore(Series(line_element, RE('')))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Token("&"), Alternative(multicolumn, tabular_cell))), Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), Token("\\end{tabular}"), mandatory=3)
293
    verbatim = Series(Token("\\begin{verbatim}"), sequence, Token("\\end{verbatim}"), mandatory=2)
di68kap's avatar
di68kap committed
294
    quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Token("\\end{quotation}"), mandatory=2), Series(Token("\\begin{quote}"), sequence, Token("\\end{quote}"), mandatory=2))
295
    figure = Series(Token("\\begin{figure}"), sequence, Token("\\end{figure}"), mandatory=2)
eckhart's avatar
eckhart committed
296
    item = Series(Token("\\item"), sequence)
di68kap's avatar
di68kap committed
297
298
299
300
    enumerate = Series(Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), Token("\\end{enumerate}"), mandatory=3)
    itemize = Series(Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), Token("\\end{itemize}"), mandatory=3)
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
301
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
302
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
303
    block_environment.set(Alternative(known_environment, generic_block))
304
    heading = Synonym(block)
eckhart's avatar
eckhart committed
305
306
307
308
309
310
311
312
313
314
315
316
317
318
    Index = Series(Option(WSPC), Token("\\printindex"))
    Bibliography = Series(Option(WSPC), Token("\\bibliography"), heading)
    SubParagraph = Series(Token("\\subparagraph"), heading, Option(sequence))
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
    Paragraph = Series(Token("\\paragraph"), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
    SubSubSection = Series(Token("\\subsubsection"), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
    SubSection = Series(Token("\\subsection"), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
    Section = Series(Token("\\section"), heading, ZeroOrMore(Alternative(sequence, SubSections)))
    Sections = OneOrMore(Series(Option(WSPC), Section))
    Chapter = Series(Token("\\chapter"), heading, ZeroOrMore(Alternative(sequence, Sections)))
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
319
    frontpages = Synonym(sequence)
eckhart's avatar
eckhart committed
320
    document = Series(Option(WSPC), Token("\\begin{document}"), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
321
322
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
323
324
325
326
327
328
329
330
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
    global thread_local_LaTeX_grammar_singleton
    try:
        grammar = thread_local_LaTeX_grammar_singleton
    except NameError:
        thread_local_LaTeX_grammar_singleton = LaTeXGrammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
331
332
        grammar = thread_local_LaTeX_grammar_singleton
    return grammar
333
334
335
336
337
338
339
340
341


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


342
def streamline_whitespace(context):
eckhart's avatar
eckhart committed
343
344
    if context[-2].parser.ptype == ":Token":
        return
345
    node = context[-1]
346
    assert node.tag_name in ['WSPC', ':Whitespace']
347
348
    s = node.content
    if s.find('%') >= 0:
349
        node.result = '\n'
350
        # c = s.find('%')
351
352
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
353
354
355
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
356
        node.result = ' ' if s else ''
357
358


359
360
361
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
              "SubParagraphs", "sequence"}), True)


def is_commandname(context):
    node = context[-1]
    if node.parser.ptype == TOKEN_PTYPE:
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

385

386
387
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
eckhart's avatar
eckhart committed
388
    "+": [drop_expendables, flatten_structure],
389
390
    "latexdoc": [],
    "preamble": [],
eckhart's avatar
eckhart committed
391
    "document": [flatten_structure],
Eckhart Arnold's avatar
Eckhart Arnold committed
392
    "frontpages": reduce_single_child,
393
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
394
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
395
    "heading": reduce_single_child,
396
397
    "Bibliography": [],
    "Index": [],
398
399
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
400
    "generic_block": [],
401
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
402
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
403
    "item": [],
404
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
405
    "quotation": [reduce_single_child, remove_brackets],
406
    "verbatim": [],
eckhart's avatar
eckhart committed
407
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
408
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
409
410
411
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
412
    "hline": [remove_whitespace, reduce_single_child],
413
414
    "sequence": [flatten],
    "paragraph": [flatten],
415
416
417
418
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
419
    "generic_inline_env": [],
420
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
421
422
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
423
424
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
425
    "text_command": [],
426
427
428
429
    "generic_command": [flatten],
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
430
    "config": [remove_brackets, reduce_single_child],
431
    "block": [remove_brackets, flatten, replace_by_single_child],
432
433
    "text": collapse,
    "no_command, blockcmd": [],
434
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
435
436
437
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
438
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
439
440
    "BRACKETS": [],
    "TEXTCHUNK": [],
441
442
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
443
    "GAP": [],
444
445
    "LB": [],
    "BACKSLASH": [],
446
    "EOF": [],
eckhart's avatar
eckhart committed
447
    ":Token":
Eckhart Arnold's avatar
Eckhart Arnold committed
448
        [remove_whitespace, reduce_single_child],
449
    ":RE": replace_by_single_child,
450
    ":Whitespace": streamline_whitespace,
451
    "*": replace_by_single_child
452
453
}

454

455
456
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
457
458

def get_transformer() -> TransformationFunc:
459
460
461
462
463
464
465
466
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """

    def __init__(self, grammar_name="LaTeX", grammar_source=""):
        super(LaTeXCompiler, self).__init__(grammar_name, grammar_source)
        assert re.match('\w+\Z', grammar_name)

    def on_latexdoc(self, node):
484
485
        self.compile(node['preamble'])
        self.compile(node['document'])
Eckhart Arnold's avatar
Eckhart Arnold committed
486
        return node
487
488

    def on_preamble(self, node):
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
        return node

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
514

515
516
    # def on_SubSubSections(self, node):
    #     return node
517

518
519
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
520

521
522
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
523

524
525
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
526

527
528
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
529

530
531
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
532

533
534
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
535

536
537
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
538

539
540
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
541

542
543
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
544

545
546
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
547

548
549
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
550

551
552
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
553

554
555
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
556

557
558
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
559

560
561
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
562

563
564
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
565

566
567
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
568

569
570
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
571

572
573
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
574

575
576
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
577

578
579
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
580

581
582
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
583

584
585
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
586

587
588
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
589

590
591
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
592

593
594
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
595

596
597
    # def on_text_element(self, node):
    #     return node
598

599
600
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
601

602
603
    # def on_inline_environment(self, node):
    #     return node
604

605
606
    # def on_known_inline_env(self, node):
    #     return node
607

608
609
    # def on_generic_inline_env(self, node):
    #     return node
610

611
612
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
613

614
615
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
616

617
618
    # def on_begin_environment(self, node):
    #     return node
619

620
621
    # def on_end_environment(self, node):
    #     return node
622

623
624
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
625

626
627
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
628

629
630
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
631

632
633
    # def on_text_command(self, node):
    #     return node
634

635
636
    # def on_generic_command(self, node):
    #     return node
637

638
639
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
640

641
642
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
643

644
645
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
646

647
648
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
649

650
651
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
652

653
654
    # def on_cline(self, node):
    #     return node
655

656
657
    # def on_config(self, node):
    #     return node
658

659
660
    # def on_cfg_text(self, node):
    #     return node
661

662
663
    # def on_block(self, node):
    #     return node
664

665
666
    # def on_text(self, node):
    #     return node
667

668
669
    # def on_no_command(self, node):
    #     return node
670

671
672
    # def on_blockcmd(self, node):
    #     return node
673

674
675
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
676

677
678
    # def on_CMDNAME(self, node):
    #     return node
679

680
681
    # def on_TXTCOMMAND(self, node):
    #     return node
682

683
684
    # def on_ESCAPED(self, node):
    #     return node
685

686
687
    # def on_SPECIAL(self, node):
    #     return node
688

689
690
    # def on_BRACKETS(self, node):
    #     return node
691

692
693
    # def on_LINEFEED(self, node):
    #     return node
694

695
696
    # def on_NAME(self, node):
    #     return node
697

698
699
    # def on_INTEGER(self, node):
    #     return node
700

701
702
    # def on_TEXTCHUNK(self, node):
    #     return node
703

704
705
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
706

707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
730

731
732
733
734
735
736
737
738
739

def get_compiler(grammar_name="LaTeX", grammar_source="") -> LaTeXCompiler:
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
        compiler.set_grammar_name(grammar_name, grammar_source)
    except NameError:
        thread_local_LaTeX_compiler_singleton = \
            LaTeXCompiler(grammar_name, grammar_source)
740
741
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
758
759
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
760
761
762
763
764
765
766
767
768
769
770
771
772
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
773
            print(result.as_xml() if isinstance(result, Node) else result)
774
775
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")