LaTeXCompiler.py 24.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
12 13
from functools import partial

14 15 16 17
try:
    import regex as re
except ImportError:
    import re
Eckhart Arnold's avatar
Eckhart Arnold committed
18 19
from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
    Required, Token, Synonym, \
20
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
21
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
22
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
23
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
24
    reduce_single_child, replace_by_single_child, remove_whitespace, \
Eckhart Arnold's avatar
Eckhart Arnold committed
25
    flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first
26 27 28 29


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
30
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
31 32 33
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
34
def LaTeXPreprocessor(text):
35 36
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
37 38
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
39 40 41 42 43 44 45 46 47 48 49


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
    r"""Parser for a LaTeX source file, with this grammar:
    
50
    # LaTeX-Grammar for DHParser
51 52
    
    @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/    # optional whitespace, including at most one linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    @ comment    = /%.*/
54 55
    
    
Eckhart Arnold's avatar
Eckhart Arnold committed
56 57 58 59 60 61
    ########################################################################
    #
    # outer document structure
    #
    ########################################################################
    
62
    latexdoc       = preamble document
63
    preamble       = { [WSPC] command }+
64
    
Eckhart Arnold's avatar
Eckhart Arnold committed
65 66 67 68 69
    document       = [WSPC] "\begin{document}" [WSPC]
                     frontpages [WSPC]
                     (Chapters | Sections) [WSPC]
                     [Bibliography] [Index] [WSPC]
                     "\end{document}" [WSPC] §EOF
70 71 72 73 74 75 76 77 78
    frontpages     = sequence
    
    
    #######################################################################
    #
    # document structure
    #
    #######################################################################
    
Eckhart Arnold's avatar
Eckhart Arnold committed
79
    Chapters       = { Chapter [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
80
    Chapter        = "\chapter" block [WSPC] { sequence | Sections }
81
    
Eckhart Arnold's avatar
Eckhart Arnold committed
82
    Sections       = { Section [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
83
    Section        = "\section" block [WSPC] { sequence | SubSections }
84
    
Eckhart Arnold's avatar
Eckhart Arnold committed
85
    SubSections    = { SubSection [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
86
    SubSection     = "\subsection" block [WSPC] { sequence | SubSubSections }
87
    
Eckhart Arnold's avatar
Eckhart Arnold committed
88
    SubSubSections = { SubSubSection [WSPC] }+
Eckhart Arnold's avatar
Eckhart Arnold committed
89
    SubSubSection  = "\subsubsection" block [WSPC] { sequence | Paragraphs }
90
    
Eckhart Arnold's avatar
Eckhart Arnold committed
91 92
    Paragraphs     = { Paragraph [WSPC] }+
    Paragraph      = "\paragraph" block [WSPC] { sequence | SubParagraphs }
93
    
Eckhart Arnold's avatar
Eckhart Arnold committed
94 95
    SubParagraphs  = { SubParagraph [WSPC] }+
    SubParagraph   = "\subparagraph" block [WSPC] [ sequence ]
96
    
Eckhart Arnold's avatar
Eckhart Arnold committed
97 98
    Bibliography   = "\bibliography" block [WSPC]
    Index          = "\printindex" [WSPC]
99 100 101 102 103 104 105 106 107 108
    
    
    #######################################################################
    #
    # document content
    #
    #######################################################################
    
    
    #### block environments ####
109
    
Eckhart Arnold's avatar
Eckhart Arnold committed
110
    block_environment   = known_environment | generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
111
    known_environment   = itemize | enumerate | figure | tabular | quotation
112
                        | verbatim
Eckhart Arnold's avatar
Eckhart Arnold committed
113
    generic_block       = begin_generic_block sequence §end_generic_block
Eckhart Arnold's avatar
Eckhart Arnold committed
114 115
    begin_generic_block = -&LB begin_environment LFF
    end_generic_block   = -&LB  end_environment LFF
116
    
Eckhart Arnold's avatar
Eckhart Arnold committed
117 118 119
    itemize             = "\begin{itemize}" [WSPC] { item } §"\end{itemize}"
    enumerate           = "\begin{enumerate}" [WSPC] {item } §"\end{enumerate}"
    item                = "\item" [WSPC] sequence
120
    
Eckhart Arnold's avatar
Eckhart Arnold committed
121 122 123 124
    figure              = "\begin{figure}" sequence §"\end{figure}"
    quotation           = ("\begin{quotation}" sequence §"\end{quotation}")
                        | ("\begin{quote}" sequence §"\end{quote}")
    verbatim            = "\begin{verbatim}" sequence §"\end{verbatim}"
Eckhart Arnold's avatar
Eckhart Arnold committed
125
    tabular             = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
126 127
    tabular_row         = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
                          "\\" ( hline | { cline } )
128
    tabular_cell        = { line_element //~ }
Eckhart Arnold's avatar
Eckhart Arnold committed
129
    tabular_config      = "{" /[lcr|]+/~ §"}"
130
    
Eckhart Arnold's avatar
Eckhart Arnold committed
131
    
132 133
    #### paragraphs and sequences of paragraphs ####
    
134
    block_of_paragraphs = "{" [sequence] §"}"
135
    sequence            = { (paragraph | block_environment ) [PARSEP] }+
136 137 138
    paragraph           = { !blockcmd text_element //~ }+
    text_element        = line_element | LINEFEED
    line_element        = text | block | inline_environment | command
139
    
Eckhart Arnold's avatar
Eckhart Arnold committed
140
    
141 142
    #### inline enivronments ####
    
143
    inline_environment  = known_inline_env | generic_inline_env
144
    known_inline_env    = inline_math
Eckhart Arnold's avatar
Eckhart Arnold committed
145 146
    generic_inline_env  = begin_inline_env //~ paragraph §end_inline_env
    begin_inline_env    = (-!LB begin_environment) | (begin_environment !LFF)
147
    end_inline_env      = end_environment
Eckhart Arnold's avatar
Eckhart Arnold committed
148 149 150
                          ## (-!LB end_environment)   | (end_environment !LFF)  # ambiguity with genric_block when EOF
    begin_environment   = /\\begin{/ §NAME §/}/
    end_environment     = /\\end{/ §::NAME §/}/
151
    
Eckhart Arnold's avatar
Eckhart Arnold committed
152
    inline_math         = /\$/ /[^$]*/ §/\$/
153
    
Eckhart Arnold's avatar
Eckhart Arnold committed
154
    
155 156
    #### commands ####
    
Eckhart Arnold's avatar
Eckhart Arnold committed
157
    command             = known_command | text_command | generic_command
Eckhart Arnold's avatar
Eckhart Arnold committed
158
    known_command       = footnote | includegraphics | caption | multicolumn | hline | cline
159
    text_command        = TXTCOMMAND | ESCAPED | BRACKETS
Eckhart Arnold's avatar
Eckhart Arnold committed
160
    generic_command     = !no_command CMDNAME [[ //~ config ] //~ block ]
161 162
    
    footnote            = "\footnote" block_of_paragraphs
Eckhart Arnold's avatar
Eckhart Arnold committed
163
    includegraphics     = "\includegraphics" [ config ] block
Eckhart Arnold's avatar
Eckhart Arnold committed
164
    caption             = "\caption" block
Eckhart Arnold's avatar
Eckhart Arnold committed
165 166 167 168
    multicolumn         = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
    hline               = "\hline"
    cline               = "\cline{" INTEGER "-" INTEGER "}"
    
169
    
170 171 172 173 174 175
    #######################################################################
    #
    # low-level text and character sequences
    #
    #######################################################################
    
176
    
177
    config     = "[" cfg_text §"]"
Eckhart Arnold's avatar
Eckhart Arnold committed
178
    cfg_text   = { ([//~] text) | CMDNAME | SPECIAL }
Eckhart Arnold's avatar
Eckhart Arnold committed
179 180
    block      = /{/ //~ { !blockcmd text_element //~ } §/}/
    text       = TEXTCHUNK { //~ TEXTCHUNK }
181
    
Eckhart Arnold's avatar
Eckhart Arnold committed
182 183 184 185 186
    no_command = "\begin{" | "\end" | BACKSLASH structural
    blockcmd   = BACKSLASH ( ( "begin{" | "end{" )
                             ( "enumerate" | "itemize" | "figure" | "quote"
                             | "quotation" | "tabular") "}"
                           | structural | begin_generic_block | end_generic_block )
187 188 189
    
    structural = "subsection" | "section" | "chapter" | "subsubsection"
               | "paragraph" | "subparagraph" | "item"
190 191 192 193
    
    
    #######################################################################
    #
194
    # primitives
195 196
    #
    #######################################################################
197
    
198
    
199
    CMDNAME    = /\\(?:(?!_)\w)+/~
Eckhart Arnold's avatar
Eckhart Arnold committed
200 201
    TXTCOMMAND = /\\text\w+/
    ESCAPED    = /\\[%$&_\/{}]/
Eckhart Arnold's avatar
Eckhart Arnold committed
202
    SPECIAL    = /[$&_\\\\\/]/
Eckhart Arnold's avatar
Eckhart Arnold committed
203
    BRACKETS   = /[\[\]]/                       # left or right square bracket: [ ]
204
    LINEFEED   = /[\\][\\]/
Eckhart Arnold's avatar
Eckhart Arnold committed
205
    
206
    NAME       = /\w+/~
207
    INTEGER    = /\d+/~
208 209 210
    
    TEXTCHUNK  = /[^\\%$&\{\}\[\]\s\n]+/        # some piece of text excluding whitespace,
                                                # linefeed and special characters
Eckhart Arnold's avatar
Eckhart Arnold committed
211 212 213
    LF         = NEW_LINE { COMMENT__ WHITESPACE__ }   # linefeed but not an empty line
    LFF        = NEW_LINE [ WSPC ]              # at least one linefeed
    PARSEP     = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
214
    WSPC       = { COMMENT__ | /\s+/ }+         # arbitrary horizontal or vertical whitespace
Eckhart Arnold's avatar
Eckhart Arnold committed
215
    GAP        = /[ \t]*(?:\n[ \t]*)+\n/~       # at least one empty line, i.e.
216
                                                # [whitespace] linefeed [whitespace] linefeed
Eckhart Arnold's avatar
Eckhart Arnold committed
217
    NEW_LINE   = /[ \t]*/ [COMMENT__] /\n/
218 219
    LB         = /\s*?\n|$/                     # backwards line break for Lookbehind-Operator
                                                # beginning of text marker '$' added for test code
Eckhart Arnold's avatar
Eckhart Arnold committed
220 221 222
    BACKSLASH  = /[\\]/
    
    EOF        = /(?!.)/                        # End-Of-File
223
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
224
    begin_generic_block = Forward()
225
    block_environment = Forward()
226
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
227
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
228
    paragraph = Forward()
229
    tabular_config = Forward()
230
    text_element = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
231
    source_hash__ = "37585004123d6b80ecf8f67217b43479"
232
    parser_initialization__ = "upon instantiation"
Eckhart Arnold's avatar
Eckhart Arnold committed
233 234 235
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
    WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
236 237
    wspL__ = ''
    wspR__ = WSP__
Eckhart Arnold's avatar
Eckhart Arnold committed
238
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
239 240
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
241
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
Eckhart Arnold's avatar
Eckhart Arnold committed
242
    GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
Eckhart Arnold's avatar
Eckhart Arnold committed
243
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
244 245
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
Eckhart Arnold's avatar
Eckhart Arnold committed
246
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
247
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
248
    INTEGER = RE('\\d+')
249
    NAME = Capture(RE('\\w+'))
250
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
251
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
252
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
253 254
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
255
    CMDNAME = RE('\\\\(?:(?!_)\\w)+')
Eckhart Arnold's avatar
Eckhart Arnold committed
256
    structural = Alternative(Token("subsection"), Token("section"), Token("chapter"), Token("subsubsection"), Token("paragraph"), Token("subparagraph"), Token("item"))
Eckhart Arnold's avatar
Eckhart Arnold committed
257 258
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Token("begin{"), Token("end{")), Alternative(Token("enumerate"), Token("itemize"), Token("figure"), Token("quote"), Token("quotation"), Token("tabular")), Token("}")), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Token("\\begin{"), Token("\\end"), Series(BACKSLASH, structural))
Eckhart Arnold's avatar
Eckhart Arnold committed
259 260
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RE(''), TEXTCHUNK)))
    block = Series(RegExp('{'), RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))), Required(RegExp('}')))
261
    cfg_text = ZeroOrMore(Alternative(Series(Option(RE('')), text), CMDNAME, SPECIAL))
262
    config = Series(Token("["), cfg_text, Required(Token("]")))
Eckhart Arnold's avatar
Eckhart Arnold committed
263 264
    cline = Series(Token("\\cline{"), INTEGER, Token("-"), INTEGER, Token("}"))
    hline = Token("\\hline")
265
    multicolumn = Series(Token("\\multicolumn"), Token("{"), INTEGER, Token("}"), tabular_config, block_of_paragraphs)
Eckhart Arnold's avatar
Eckhart Arnold committed
266
    caption = Series(Token("\\caption"), block)
267
    includegraphics = Series(Token("\\includegraphics"), Option(config), block)
268
    footnote = Series(Token("\\footnote"), block_of_paragraphs)
269
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RE(''), config)), RE(''), block)))
270
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
Eckhart Arnold's avatar
Eckhart Arnold committed
271
    known_command = Alternative(footnote, includegraphics, caption, multicolumn, hline, cline)
Eckhart Arnold's avatar
Eckhart Arnold committed
272 273 274 275
    command = Alternative(known_command, text_command, generic_command)
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), Required(RegExp('\\$')))
    end_environment = Series(RegExp('\\\\end{'), Required(Pop(NAME)), Required(RegExp('}')))
    begin_environment = Series(RegExp('\\\\begin{'), Required(NAME), Required(RegExp('}')))
276
    end_inline_env = Synonym(end_environment)
Eckhart Arnold's avatar
Eckhart Arnold committed
277 278
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
    generic_inline_env = Series(begin_inline_env, RE(''), paragraph, Required(end_inline_env))
279
    known_inline_env = Synonym(inline_math)
280
    inline_environment = Alternative(known_inline_env, generic_inline_env)
281 282 283
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))))
284 285
    sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP)))
    block_of_paragraphs.set(Series(Token("{"), Option(sequence), Required(Token("}"))))
286
    tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Required(Token("}"))))
287
    tabular_cell = ZeroOrMore(Series(line_element, RE('')))
di68kap's avatar
di68kap committed
288
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Token("&"), Alternative(multicolumn, tabular_cell))), Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
Eckhart Arnold's avatar
Eckhart Arnold committed
289
    tabular = Series(Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), Required(Token("\\end{tabular}")))
Eckhart Arnold's avatar
Eckhart Arnold committed
290 291 292
    verbatim = Series(Token("\\begin{verbatim}"), sequence, Required(Token("\\end{verbatim}")))
    quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Required(Token("\\end{quotation}"))), Series(Token("\\begin{quote}"), sequence, Required(Token("\\end{quote}"))))
    figure = Series(Token("\\begin{figure}"), sequence, Required(Token("\\end{figure}")))
293 294 295
    item = Series(Token("\\item"), Option(WSPC), sequence)
    enumerate = Series(Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), Required(Token("\\end{enumerate}")))
    itemize = Series(Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), Required(Token("\\end{itemize}")))
Eckhart Arnold's avatar
Eckhart Arnold committed
296 297
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
Eckhart Arnold's avatar
Eckhart Arnold committed
298
    generic_block = Series(begin_generic_block, sequence, Required(end_generic_block))
Eckhart Arnold's avatar
Eckhart Arnold committed
299
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
300
    block_environment.set(Alternative(known_environment, generic_block))
301 302 303 304 305 306 307 308 309 310 311 312 313 314
    Index = Series(Token("\\printindex"), Option(WSPC))
    Bibliography = Series(Token("\\bibliography"), block, Option(WSPC))
    SubParagraph = Series(Token("\\subparagraph"), block, Option(WSPC), Option(sequence))
    SubParagraphs = OneOrMore(Series(SubParagraph, Option(WSPC)))
    Paragraph = Series(Token("\\paragraph"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubParagraphs)))
    Paragraphs = OneOrMore(Series(Paragraph, Option(WSPC)))
    SubSubSection = Series(Token("\\subsubsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Paragraphs)))
    SubSubSections = OneOrMore(Series(SubSubSection, Option(WSPC)))
    SubSection = Series(Token("\\subsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSubSections)))
    SubSections = OneOrMore(Series(SubSection, Option(WSPC)))
    Section = Series(Token("\\section"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSections)))
    Sections = OneOrMore(Series(Section, Option(WSPC)))
    Chapter = Series(Token("\\chapter"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Sections)))
    Chapters = OneOrMore(Series(Chapter, Option(WSPC)))
315
    frontpages = Synonym(sequence)
316 317
    document = Series(Option(WSPC), Token("\\begin{document}"), Option(WSPC), frontpages, Option(WSPC), Alternative(Chapters, Sections), Option(WSPC), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), Required(EOF))
    preamble = OneOrMore(Series(Option(WSPC), command))
318 319 320 321 322 323 324 325 326
    latexdoc = Series(preamble, document)
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
    global thread_local_LaTeX_grammar_singleton
    try:
        grammar = thread_local_LaTeX_grammar_singleton
    except NameError:
        thread_local_LaTeX_grammar_singleton = LaTeXGrammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
327 328
        grammar = thread_local_LaTeX_grammar_singleton
    return grammar
329 330 331 332 333 334 335 336 337


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


338 339
def streamline_whitespace(context):
    node = context[-1]
340 341 342 343 344
    assert node.tag_name in ['WSPC', ':Whitespace']
    s = str(node)
    c = s.find('%')
    n = s.find('\n')
    if c >= 0:
345 346 347
        node.result = '\n'
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
348 349 350 351 352 353
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
        node.result = ' '


354 355 356 357
def watch(node):
    print(node.as_sxpr())


358 359
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
Eckhart Arnold's avatar
Eckhart Arnold committed
360 361
    "+": remove_children_if(lambda node: is_empty(node) or is_one_of(node, {'PARSEP'})),
    # remove_empty,
362 363 364
    "latexdoc": [],
    "preamble": [],
    "document": [],
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
    "frontpages": [],
    "Chapters": [],
    "Chapter": [],
    "Sections": [],
    "Section": [],
    "SubSections": [],
    "SubSection": [],
    "SubSubSections": [],
    "SubSubSection": [],
    "Paragraphs": [],
    "Paragraph": [],
    "SubParagraphs": [],
    "SubParagraph": [],
    "Bibliography": [],
    "Index": [],
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
    "generic_block": [],
383
    "begin_generic_block, end_generic_block": replace_by_single_child,
384 385 386 387 388 389 390 391 392 393 394 395 396 397
    "itemize, enumerate": [remove_brackets, flatten],
    "item": [remove_first],
    "figure": [],
    "quotation": [reduce_single_child, remove_brackets],
    "verbatim": [],
    "table": [],
    "table_config": [],
    "block_of_paragraphs": [],
    "sequence": [flatten],
    "paragraph": [flatten],
    "text_element": [],
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
    "generic_inline_env": [],
398
    "begin_inline_env, end_inline_env": [replace_by_single_child],
399 400 401 402
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
    "command": [],
    "known_command": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
403
    "text_command": [],
404 405 406 407 408
    "generic_command": [flatten],
    "footnote": [],
    "includegraphics": [],
    "caption": [],
    "config": [remove_brackets],
Eckhart Arnold's avatar
Eckhart Arnold committed
409
    "block": [remove_brackets, flatten],
410 411
    "text": collapse,
    "no_command, blockcmd": [],
412 413
    "structural": [],
    "CMDNAME": [remove_whitespace, reduce_single_child(is_anonymous)],
Eckhart Arnold's avatar
Eckhart Arnold committed
414
    "TXTCOMMAND": [remove_whitespace, reduce_single_child(is_anonymous)],
415 416
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
417 418
    "BRACKETS": [],
    "TEXTCHUNK": [],
419 420
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
421
    "GAP": [],
422 423
    "LB": [],
    "BACKSLASH": [],
424
    "EOF": [],
425 426 427 428
    ":Token": [],
    ":RE": replace_by_single_child,
    ":Whitespace": streamline_whitespace,
    "*": replace_by_single_child
429 430
}

431

432 433
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
434 435

def get_transformer() -> TransformationFunc:
436 437 438 439 440 441 442 443
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """

    def __init__(self, grammar_name="LaTeX", grammar_source=""):
        super(LaTeXCompiler, self).__init__(grammar_name, grammar_source)
        assert re.match('\w+\Z', grammar_name)

    def on_latexdoc(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
461
        return node
462 463 464 465 466 467 468

    def on_preamble(self, node):
        pass

    def on_document(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
    def on_frontpages(self, node):
        pass

    def on_Chapters(self, node):
        pass

    def on_Chapter(self, node):
        pass

    def on_Sections(self, node):
        pass

    def on_Section(self, node):
        pass

    def on_SubSections(self, node):
        pass

    def on_SubSection(self, node):
        pass

    def on_SubSubSections(self, node):
        pass

    def on_SubSubSection(self, node):
        pass

    def on_Paragraphs(self, node):
        pass

    def on_Paragraph(self, node):
        pass

    def on_SubParagraphs(self, node):
        pass

    def on_SubParagraph(self, node):
        pass

    def on_Bibliography(self, node):
        pass

    def on_Index(self, node):
        pass

    def on_block_environment(self, node):
        pass

    def on_known_environment(self, node):
        pass

    def on_generic_block(self, node):
        pass

    def on_begin_generic_block(self, node):
        pass

    def on_end_generic_block(self, node):
        pass

    def on_itemize(self, node):
        pass

    def on_enumerate(self, node):
        pass

    def on_item(self, node):
        pass

    def on_figure(self, node):
        pass

    def on_quotation(self, node):
        pass

    def on_verbatim(self, node):
        pass

    def on_table(self, node):
548 549
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
550 551 552 553
    def on_table_config(self, node):
        pass

    def on_block_of_paragraphs(self, node):
554 555 556 557 558 559 560 561
        pass

    def on_sequence(self, node):
        pass

    def on_paragraph(self, node):
        pass

562
    def on_text_element(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
563 564 565 566 567 568
        pass

    def on_inline_environment(self, node):
        pass

    def on_known_inline_env(self, node):
569 570
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
571
    def on_generic_inline_env(self, node):
572 573
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
574 575 576 577 578 579 580 581 582 583
    def on_begin_inline_env(self, node):
        pass

    def on_begin_environment(self, node):
        pass

    def on_end_environment(self, node):
        pass

    def on_inline_math(self, node):
584 585 586 587 588
        pass

    def on_command(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
    def on_known_command(self, node):
        pass

    def on_generic_command(self, node):
        pass

    def on_footnote(self, node):
        pass

    def on_includegraphics(self, node):
        pass

    def on_caption(self, node):
        pass

604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
    def on_config(self, node):
        pass

    def on_block(self, node):
        pass

    def on_text(self, node):
        pass

    def on_cfgtext(self, node):
        pass

    def on_word_sequence(self, node):
        pass

619 620 621
    def on_no_command(self, node):
        pass

622 623 624
    def on_blockcmd(self, node):
        pass

Eckhart Arnold's avatar
Eckhart Arnold committed
625 626 627
    def on_structural(self, node):
        pass

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
    def on_CMDNAME(self, node):
        pass

    def on_NAME(self, node):
        pass

    def on_ESCAPED(self, node):
        pass

    def on_BRACKETS(self, node):
        pass

    def on_TEXTCHUNK(self, node):
        pass

    def on_WSPC(self, node):
        pass

    def on_LF(self, node):
        pass

    def on_PARSEP(self, node):
        pass

652
    def on_LB(self, node):
653 654
        pass

655
    def on_BACKSLASH(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
656 657
        pass

658
    def on_EOF(self, node):
Eckhart Arnold's avatar
Eckhart Arnold committed
659 660
        pass

661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687

def get_compiler(grammar_name="LaTeX", grammar_source="") -> LaTeXCompiler:
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
        compiler.set_grammar_name(grammar_name, grammar_source)
        return compiler
    except NameError:
        thread_local_LaTeX_compiler_singleton = \
            LaTeXCompiler(grammar_name, grammar_source)
        return thread_local_LaTeX_compiler_singleton 


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
688 689
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
690 691 692 693 694 695 696 697 698 699 700 701 702
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
703
            print(result.as_xml() if isinstance(result, Node) else result)
704 705
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")