LaTeXCompiler.py 25.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
12
13
from functools import partial

14
15
16
17
try:
    import regex as re
except ImportError:
    import re
18
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
19
    Token, Synonym, \
20
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
21
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
22
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
23
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    reduce_single_child, replace_by_single_child, remove_whitespace, \
25
    flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first, \
eckhart's avatar
eckhart committed
26
    remove_tokens, remove_nodes, TOKEN_PTYPE
27
from DHParser.log import logging
28
29
30
31


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
32
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
33
34
35
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
36
def LaTeXPreprocessor(text):
37
38
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
39
40
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
41
42
43
44
45
46
47
48
49
50
51


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
    r"""Parser for a LaTeX source file, with this grammar:
    
52
    # LaTeX-Grammar for DHParser
53
    
54
    # preamble
55
    @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    @ comment    = /%.*/
57
    
Eckhart Arnold's avatar
Eckhart Arnold committed
58
59
60
61
62
63
    ########################################################################
    #
    # outer document structure
    #
    ########################################################################
    
64
    latexdoc       = preamble document
65
    preamble       = { [WSPC] command }+
66
    
eckhart's avatar
eckhart committed
67
68
69
    document       = [WSPC] "\begin{document}"
                     frontpages
                     (Chapters | Sections)
Eckhart Arnold's avatar
Eckhart Arnold committed
70
71
                     [Bibliography] [Index] [WSPC]
                     "\end{document}" [WSPC] §EOF
72
73
74
75
76
77
78
79
80
    frontpages     = sequence
    
    
    #######################################################################
    #
    # document structure
    #
    #######################################################################
    
eckhart's avatar
eckhart committed
81
82
    Chapters       = { [WSPC] Chapter }+
    Chapter        = "\chapter" heading { sequence | Sections }
83
    
eckhart's avatar
eckhart committed
84
85
    Sections       = { [WSPC] Section }+
    Section        = "\section" heading { sequence | SubSections }
86
    
eckhart's avatar
eckhart committed
87
88
    SubSections    = { [WSPC] SubSection }+
    SubSection     = "\subsection" heading { sequence | SubSubSections }
89
    
eckhart's avatar
eckhart committed
90
91
    SubSubSections = { [WSPC] SubSubSection }+
    SubSubSection  = "\subsubsection" heading { sequence | Paragraphs }
92
    
eckhart's avatar
eckhart committed
93
94
    Paragraphs     = { [WSPC] Paragraph  }+
    Paragraph      = "\paragraph" heading { sequence | SubParagraphs }
95
    
eckhart's avatar
eckhart committed
96
97
    SubParagraphs  = { [WSPC] SubParagraph }+
    SubParagraph   = "\subparagraph" heading [ sequence ]
98
    
eckhart's avatar
eckhart committed
99
100
    Bibliography   = [WSPC] "\bibliography" heading
    Index          = [WSPC] "\printindex"
101
    
102
    heading        = block
103
104
105
106
107
108
109
110
111
    
    #######################################################################
    #
    # document content
    #
    #######################################################################
    
    
    #### block environments ####
112
    
Eckhart Arnold's avatar
Eckhart Arnold committed
113
    block_environment   = known_environment | generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
114
    known_environment   = itemize | enumerate | figure | tabular | quotation
115
                        | verbatim
Eckhart Arnold's avatar
Eckhart Arnold committed
116
    generic_block       = begin_generic_block sequence §end_generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
117
118
    begin_generic_block = -&LB begin_environment LFF
    end_generic_block   = -&LB  end_environment LFF
119
    
Eckhart Arnold's avatar
Eckhart Arnold committed
120
121
    itemize             = "\begin{itemize}" [WSPC] { item } §"\end{itemize}"
    enumerate           = "\begin{enumerate}" [WSPC] {item } §"\end{enumerate}"
eckhart's avatar
eckhart committed
122
    item                = "\item" sequence
123
    
Eckhart Arnold's avatar
Eckhart Arnold committed
124
125
126
127
    figure              = "\begin{figure}" sequence §"\end{figure}"
    quotation           = ("\begin{quotation}" sequence §"\end{quotation}")
                        | ("\begin{quote}" sequence §"\end{quote}")
    verbatim            = "\begin{verbatim}" sequence §"\end{verbatim}"
Eckhart Arnold's avatar
Eckhart Arnold committed
128
    tabular             = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
129
130
    tabular_row         = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
                          "\\" ( hline | { cline } )
131
    tabular_cell        = { line_element //~ }
Eckhart Arnold's avatar
Eckhart Arnold committed
132
    tabular_config      = "{" /[lcr|]+/~ §"}"
133
    
Eckhart Arnold's avatar
Eckhart Arnold committed
134
    
135
136
    #### paragraphs and sequences of paragraphs ####
    
137
    block_of_paragraphs = "{" [sequence] §"}"
eckhart's avatar
eckhart committed
138
    sequence            = [WSPC] { (paragraph | block_environment ) [PARSEP] }+
139
140
141
    paragraph           = { !blockcmd text_element //~ }+
    text_element        = line_element | LINEFEED
    line_element        = text | block | inline_environment | command
142
    
Eckhart Arnold's avatar
Eckhart Arnold committed
143
    
144
145
    #### inline enivronments ####
    
146
    inline_environment  = known_inline_env | generic_inline_env
147
    known_inline_env    = inline_math
Eckhart Arnold's avatar
Eckhart Arnold committed
148
149
    generic_inline_env  = begin_inline_env //~ paragraph §end_inline_env
    begin_inline_env    = (-!LB begin_environment) | (begin_environment !LFF)
150
    end_inline_env      = end_environment
Eckhart Arnold's avatar
Eckhart Arnold committed
151
                          ## (-!LB end_environment)   | (end_environment !LFF)  # ambiguity with genric_block when EOF
152
153
    begin_environment   = /\\begin{/ §NAME /}/
    end_environment     = /\\end{/ §::NAME /}/
154
    
Eckhart Arnold's avatar
Eckhart Arnold committed
155
    inline_math         = /\$/ /[^$]*/ §/\$/
156
    
Eckhart Arnold's avatar
Eckhart Arnold committed
157
    
158
159
    #### commands ####
    
Eckhart Arnold's avatar
Eckhart Arnold committed
160
    command             = known_command | text_command | generic_command
Eckhart Arnold's avatar
Eckhart Arnold committed
161
    known_command       = footnote | includegraphics | caption | multicolumn | hline | cline
162
    text_command        = TXTCOMMAND | ESCAPED | BRACKETS
Eckhart Arnold's avatar
Eckhart Arnold committed
163
    generic_command     = !no_command CMDNAME [[ //~ config ] //~ block ]
164
165
    
    footnote            = "\footnote" block_of_paragraphs
Eckhart Arnold's avatar
Eckhart Arnold committed
166
    includegraphics     = "\includegraphics" [ config ] block
Eckhart Arnold's avatar
Eckhart Arnold committed
167
    caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
168
169
170
171
    multicolumn         = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
    hline               = "\hline"
    cline               = "\cline{" INTEGER "-" INTEGER "}"
    
172
    
173
174
175
176
177
178
    #######################################################################
    #
    # low-level text and character sequences
    #
    #######################################################################
    
179
    
180
    config     = "[" cfg_text §"]"
Eckhart Arnold's avatar
Eckhart Arnold committed
181
    cfg_text   = { ([//~] text) | CMDNAME | SPECIAL }
Eckhart Arnold's avatar
Eckhart Arnold committed
182
183
    block      = /{/ //~ { !blockcmd text_element //~ } §/}/
    text       = TEXTCHUNK { //~ TEXTCHUNK }
184
    
Eckhart Arnold's avatar
Eckhart Arnold committed
185
186
187
188
189
    no_command = "\begin{" | "\end" | BACKSLASH structural
    blockcmd   = BACKSLASH ( ( "begin{" | "end{" )
                             ( "enumerate" | "itemize" | "figure" | "quote"
                             | "quotation" | "tabular") "}"
                           | structural | begin_generic_block | end_generic_block )
190
191
192
    
    structural = "subsection" | "section" | "chapter" | "subsubsection"
               | "paragraph" | "subparagraph" | "item"
193
194
195
196
    
    
    #######################################################################
    #
197
    # primitives
198
199
    #
    #######################################################################
200
    
201
    
202
    CMDNAME    = /\\(?:(?!_)\w)+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
203
204
    TXTCOMMAND = /\\text\w+/
    ESCAPED    = /\\[%$&_\/{}]/
Eckhart Arnold's avatar
Eckhart Arnold committed
205
    SPECIAL    = /[$&_\\\\\/]/
Eckhart Arnold's avatar
Eckhart Arnold committed
206
    BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
207
    LINEFEED   = /[\\][\\]/
Eckhart Arnold's avatar
Eckhart Arnold committed
208
    
209
    NAME       = /\w+/~
210
    INTEGER    = /\d+/~
211
212
213
    
    TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                                # linefeed and special characters
Eckhart Arnold's avatar
Eckhart Arnold committed
214
215
216
    LF         = NEW_LINE { COMMENT__ WHITESPACE__ }   # linefeed but not an empty line
    LFF        = NEW_LINE [ WSPC ]              # at least one linefeed
    PARSEP     = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
217
    WSPC       = { COMMENT__ | /\s+/ }+         # arbitrary horizontal or vertical whitespace
Eckhart Arnold's avatar
Eckhart Arnold committed
218
    GAP        = /[ \t]*(?:\n[ \t]*)+\n/~       # at least one empty line, i.e.
219
                                                # [whitespace] linefeed [whitespace] linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
220
    NEW_LINE   = /[ \t]*/ [COMMENT__] /\n/
221
222
    LB         = /\s*?\n|$/                     # backwards line break for Lookbehind-Operator
                                                # beginning of text marker '$' added for test code
Eckhart Arnold's avatar
Eckhart Arnold committed
223
224
225
    BACKSLASH  = /[\\]/
    
    EOF        = /(?!.)/                        # End-Of-File
226
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
227
    begin_generic_block = Forward()
228
    block_environment = Forward()
229
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
230
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
231
    paragraph = Forward()
232
    tabular_config = Forward()
233
    text_element = Forward()
234
    source_hash__ = "96b3c5ce2f75505a279d4d27f7712323"
235
    parser_initialization__ = "upon instantiation"
Eckhart Arnold's avatar
Eckhart Arnold committed
236
237
238
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
    WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
239
240
    wspL__ = ''
    wspR__ = WSP__
Eckhart Arnold's avatar
Eckhart Arnold committed
241
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
242
243
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
244
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
Eckhart Arnold's avatar
Eckhart Arnold committed
245
    GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
Eckhart Arnold's avatar
Eckhart Arnold committed
246
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
247
248
249
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
250
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
251
    INTEGER = RE('\\d+')
252
    NAME = Capture(RE('\\w+'))
253
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
254
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
255
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
256
257
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
258
    CMDNAME = RE('\\\\(?:(?!_)\\w)+')
Eckhart Arnold's avatar
Eckhart Arnold committed
259
    structural = Alternative(Token("subsection"), Token("section"), Token("chapter"), Token("subsubsection"), Token("paragraph"), Token("subparagraph"), Token("item"))
di68kap's avatar
di68kap committed
260
261
262
263
264
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Token("begin{"), Token("end{")), Alternative(Token("enumerate"), Token("itemize"), Token("figure"), Token("quote"), Token("quotation"), Token("tabular")), Token("}")), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Token("\\begin{"), Token("\\end"), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RE(''), TEXTCHUNK)))
    block = Series(RegExp('{'), RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))), RegExp('}'), mandatory=3)
    cfg_text = ZeroOrMore(Alternative(Series(Option(RE('')), text), CMDNAME, SPECIAL))
265
    config = Series(Token("["), cfg_text, Token("]"), mandatory=2)
di68kap's avatar
di68kap committed
266
    cline = Series(Token("\\cline{"), INTEGER, Token("-"), INTEGER, Token("}"))
Eckhart Arnold's avatar
Eckhart Arnold committed
267
    hline = Token("\\hline")
di68kap's avatar
di68kap committed
268
269
270
271
272
    multicolumn = Series(Token("\\multicolumn"), Token("{"), INTEGER, Token("}"), tabular_config, block_of_paragraphs)
    caption = Series(Token("\\caption"), block)
    includegraphics = Series(Token("\\includegraphics"), Option(config), block)
    footnote = Series(Token("\\footnote"), block_of_paragraphs)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RE(''), config)), RE(''), block)))
273
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
Eckhart Arnold's avatar
Eckhart Arnold committed
274
    known_command = Alternative(footnote, includegraphics, caption, multicolumn, hline, cline)
Eckhart Arnold's avatar
Eckhart Arnold committed
275
    command = Alternative(known_command, text_command, generic_command)
276
277
278
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
279
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
280
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
281
    generic_inline_env = Series(begin_inline_env, RE(''), paragraph, end_inline_env, mandatory=3)
282
    known_inline_env = Synonym(inline_math)
283
    inline_environment = Alternative(known_inline_env, generic_inline_env)
284
285
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
286
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))))
eckhart's avatar
eckhart committed
287
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
288
289
    block_of_paragraphs.set(Series(Token("{"), Option(sequence), Token("}"), mandatory=2))
    tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Token("}"), mandatory=2))
di68kap's avatar
di68kap committed
290
291
292
    tabular_cell = ZeroOrMore(Series(line_element, RE('')))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Token("&"), Alternative(multicolumn, tabular_cell))), Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), Token("\\end{tabular}"), mandatory=3)
293
    verbatim = Series(Token("\\begin{verbatim}"), sequence, Token("\\end{verbatim}"), mandatory=2)
di68kap's avatar
di68kap committed
294
    quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Token("\\end{quotation}"), mandatory=2), Series(Token("\\begin{quote}"), sequence, Token("\\end{quote}"), mandatory=2))
295
    figure = Series(Token("\\begin{figure}"), sequence, Token("\\end{figure}"), mandatory=2)
eckhart's avatar
eckhart committed
296
    item = Series(Token("\\item"), sequence)
di68kap's avatar
di68kap committed
297
298
299
300
    enumerate = Series(Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), Token("\\end{enumerate}"), mandatory=3)
    itemize = Series(Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), Token("\\end{itemize}"), mandatory=3)
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
301
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
302
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
303
    block_environment.set(Alternative(known_environment, generic_block))
304
    heading = Synonym(block)
eckhart's avatar
eckhart committed
305
306
307
308
309
310
311
312
313
314
315
316
317
318
    Index = Series(Option(WSPC), Token("\\printindex"))
    Bibliography = Series(Option(WSPC), Token("\\bibliography"), heading)
    SubParagraph = Series(Token("\\subparagraph"), heading, Option(sequence))
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
    Paragraph = Series(Token("\\paragraph"), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
    SubSubSection = Series(Token("\\subsubsection"), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
    SubSection = Series(Token("\\subsection"), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
    Section = Series(Token("\\section"), heading, ZeroOrMore(Alternative(sequence, SubSections)))
    Sections = OneOrMore(Series(Option(WSPC), Section))
    Chapter = Series(Token("\\chapter"), heading, ZeroOrMore(Alternative(sequence, Sections)))
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
319
    frontpages = Synonym(sequence)
eckhart's avatar
eckhart committed
320
    document = Series(Option(WSPC), Token("\\begin{document}"), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
321
322
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
323
324
325
326
327
328
329
330
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
    global thread_local_LaTeX_grammar_singleton
    try:
        grammar = thread_local_LaTeX_grammar_singleton
    except NameError:
        thread_local_LaTeX_grammar_singleton = LaTeXGrammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
331
332
        grammar = thread_local_LaTeX_grammar_singleton
    return grammar
333
334
335
336
337
338
339
340
341


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


342
def streamline_whitespace(context):
eckhart's avatar
eckhart committed
343
344
    if context[-2].parser.ptype == ":Token":
        return
345
    node = context[-1]
346
347
348
349
350
    assert node.tag_name in ['WSPC', ':Whitespace']
    s = str(node)
    c = s.find('%')
    n = s.find('\n')
    if c >= 0:
351
352
353
        node.result = '\n'
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
354
355
356
357
358
359
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
        node.result = ' '


360
361
362
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
              "SubParagraphs", "sequence"}), True)


def is_commandname(context):
    node = context[-1]
    if node.parser.ptype == TOKEN_PTYPE:
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

386

387
388
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
eckhart's avatar
eckhart committed
389
    "+": [drop_expendables, flatten_structure],
390
391
    "latexdoc": [],
    "preamble": [],
eckhart's avatar
eckhart committed
392
    "document": [flatten_structure],
Eckhart Arnold's avatar
Eckhart Arnold committed
393
    "frontpages": reduce_single_child,
394
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
395
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
396
    "heading": reduce_single_child,
397
398
    "Bibliography": [],
    "Index": [],
399
400
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
401
    "generic_block": [],
402
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
403
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
404
    "item": [],
405
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
406
    "quotation": [reduce_single_child, remove_brackets],
407
    "verbatim": [],
eckhart's avatar
eckhart committed
408
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
409
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
410
411
412
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
413
    "hline": [remove_whitespace, reduce_single_child],
414
415
    "sequence": [flatten],
    "paragraph": [flatten],
416
417
418
419
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
420
    "generic_inline_env": [],
421
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
422
423
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
424
425
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
426
    "text_command": [],
427
428
429
430
    "generic_command": [flatten],
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
431
    "config": [remove_brackets, reduce_single_child],
432
    "block": [remove_brackets, flatten, replace_by_single_child],
433
434
    "text": collapse,
    "no_command, blockcmd": [],
435
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
436
437
438
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
439
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
440
441
    "BRACKETS": [],
    "TEXTCHUNK": [],
442
443
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
444
    "GAP": [],
445
446
    "LB": [],
    "BACKSLASH": [],
447
    "EOF": [],
eckhart's avatar
eckhart committed
448
    ":Token":
Eckhart Arnold's avatar
Eckhart Arnold committed
449
        [remove_whitespace, reduce_single_child],
450
    ":RE": replace_by_single_child,
451
    ":Whitespace": streamline_whitespace,
452
    "*": replace_by_single_child
453
454
}

455

456
457
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
458
459

def get_transformer() -> TransformationFunc:
460
461
462
463
464
465
466
467
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """

    def __init__(self, grammar_name="LaTeX", grammar_source=""):
        super(LaTeXCompiler, self).__init__(grammar_name, grammar_source)
        assert re.match('\w+\Z', grammar_name)

    def on_latexdoc(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
485
        return node
486
487
488
489
490
491
492

    def on_preamble(self, node):
        pass

    def on_document(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
    def on_frontpages(self, node):
        pass

    def on_Chapters(self, node):
        pass

    def on_Chapter(self, node):
        pass

    def on_Sections(self, node):
        pass

    def on_Section(self, node):
        pass

    def on_SubSections(self, node):
        pass

    def on_SubSection(self, node):
        pass

    def on_SubSubSections(self, node):
        pass

    def on_SubSubSection(self, node):
        pass

    def on_Paragraphs(self, node):
        pass

    def on_Paragraph(self, node):
        pass

    def on_SubParagraphs(self, node):
        pass

    def on_SubParagraph(self, node):
        pass

    def on_Bibliography(self, node):
        pass

    def on_Index(self, node):
        pass

    def on_block_environment(self, node):
        pass

    def on_known_environment(self, node):
        pass

    def on_generic_block(self, node):
        pass

    def on_begin_generic_block(self, node):
        pass

    def on_end_generic_block(self, node):
        pass

    def on_itemize(self, node):
        pass

    def on_enumerate(self, node):
        pass

    def on_item(self, node):
        pass

    def on_figure(self, node):
        pass

    def on_quotation(self, node):
        pass

    def on_verbatim(self, node):
        pass

    def on_table(self, node):
572
573
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
574
575
576
577
    def on_table_config(self, node):
        pass

    def on_block_of_paragraphs(self, node):
578
579
580
581
582
583
584
585
        pass

    def on_sequence(self, node):
        pass

    def on_paragraph(self, node):
        pass

586
    def on_text_element(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
587
588
589
590
591
592
        pass

    def on_inline_environment(self, node):
        pass

    def on_known_inline_env(self, node):
593
594
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
595
    def on_generic_inline_env(self, node):
596
597
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
598
599
600
601
602
603
604
605
606
607
    def on_begin_inline_env(self, node):
        pass

    def on_begin_environment(self, node):
        pass

    def on_end_environment(self, node):
        pass

    def on_inline_math(self, node):
608
609
610
611
612
        pass

    def on_command(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
    def on_known_command(self, node):
        pass

    def on_generic_command(self, node):
        pass

    def on_footnote(self, node):
        pass

    def on_includegraphics(self, node):
        pass

    def on_caption(self, node):
        pass

628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
    def on_config(self, node):
        pass

    def on_block(self, node):
        pass

    def on_text(self, node):
        pass

    def on_cfgtext(self, node):
        pass

    def on_word_sequence(self, node):
        pass

643
644
645
    def on_no_command(self, node):
        pass

646
647
648
    def on_blockcmd(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
649
650
651
    def on_structural(self, node):
        pass

652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
    def on_CMDNAME(self, node):
        pass

    def on_NAME(self, node):
        pass

    def on_ESCAPED(self, node):
        pass

    def on_BRACKETS(self, node):
        pass

    def on_TEXTCHUNK(self, node):
        pass

    def on_WSPC(self, node):
        pass

    def on_LF(self, node):
        pass

    def on_PARSEP(self, node):
        pass

676
    def on_LB(self, node):
677
678
        pass

679
    def on_BACKSLASH(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
680
681
        pass

682
    def on_EOF(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
683
684
        pass

685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711

def get_compiler(grammar_name="LaTeX", grammar_source="") -> LaTeXCompiler:
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
        compiler.set_grammar_name(grammar_name, grammar_source)
        return compiler
    except NameError:
        thread_local_LaTeX_compiler_singleton = \
            LaTeXCompiler(grammar_name, grammar_source)
        return thread_local_LaTeX_compiler_singleton 


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
712
713
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
714
715
716
717
718
719
720
721
722
723
724
725
726
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
727
            print(result.as_xml() if isinstance(result, Node) else result)
728
729
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")