LaTeXCompiler.py 23.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


10
from collections import defaultdict
11
12
import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
13
14
from functools import partial

15
16
17
18
try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
di68kap's avatar
di68kap committed
20
21
    Synonym, Whitespace, Token, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
23
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
25
    reduce_single_child, replace_by_single_child, remove_whitespace, flatten_anonymous_nodes, \
26
    flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
eckhart's avatar
eckhart committed
27
    is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
28
from DHParser.log import logging
29
30
31
32


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
33
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
34
35
36
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
37
def LaTeXPreprocessor(text):
38
39
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
42
43
44
45
46
47
48
49
50


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
51
    r"""Parser for a LaTeX source file.
52
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    begin_generic_block = Forward()
54
    block_environment = Forward()
55
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
    paragraph = Forward()
58
    tabular_config = Forward()
59
    text_element = Forward()
60
    source_hash__ = "dacb1f9ad5b1c18cdc29c7ddb7878959"
Eckhart Arnold's avatar
Eckhart Arnold committed
61
    static_analysis_pending__ = True
eckhart's avatar
eckhart committed
62
    parser_initialization__ = ["upon instantiation"]
eckhart's avatar
eckhart committed
63
    resume_rules__ = {}
Eckhart Arnold's avatar
Eckhart Arnold committed
64
65
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
66
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
67
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
68
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
69
70
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
71
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
di68kap's avatar
di68kap committed
72
    GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
Eckhart Arnold's avatar
Eckhart Arnold committed
73
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
74
75
76
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
77
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
di68kap's avatar
di68kap committed
78
79
    INTEGER = Series(RegExp('\\d+'), wsp__)
    NAME = Capture(Series(RegExp('\\w+'), wsp__))
80
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
81
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
82
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
83
84
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
di68kap's avatar
di68kap committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
    structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
    block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
    cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
    config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
    pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
    documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
    cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
    hline = Series(Token("\\hline"), wsp__)
    multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
    caption = Series(Series(Token("\\caption"), wsp__), block)
    includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
    footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
    citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
    citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
104
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
105
    known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
Eckhart Arnold's avatar
Eckhart Arnold committed
106
    command = Alternative(known_command, text_command, generic_command)
107
108
109
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
110
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
111
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
di68kap's avatar
di68kap committed
112
    generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
113
    known_inline_env = Synonym(inline_math)
114
    inline_environment = Alternative(known_inline_env, generic_inline_env)
115
116
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
117
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
eckhart's avatar
eckhart committed
118
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
di68kap's avatar
di68kap committed
119
120
121
122
123
124
125
126
127
128
129
    block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
    tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
    tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
    verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
    quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
    figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
    item = Series(Series(Token("\\item"), wsp__), sequence)
    enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
    itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
di68kap's avatar
di68kap committed
130
131
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
132
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
133
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
134
    block_environment.set(Alternative(known_environment, generic_block))
135
    heading = Synonym(block)
di68kap's avatar
di68kap committed
136
137
138
    Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
    Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
    SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
eckhart's avatar
eckhart committed
139
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
di68kap's avatar
di68kap committed
140
    Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
eckhart's avatar
eckhart committed
141
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
di68kap's avatar
di68kap committed
142
    SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
eckhart's avatar
eckhart committed
143
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
di68kap's avatar
di68kap committed
144
    SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
eckhart's avatar
eckhart committed
145
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
di68kap's avatar
di68kap committed
146
    Section = Series(Series(Token("\\section"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSections)))
eckhart's avatar
eckhart committed
147
    Sections = OneOrMore(Series(Option(WSPC), Section))
di68kap's avatar
di68kap committed
148
    Chapter = Series(Series(Token("\\chapter"), wsp__), heading, ZeroOrMore(Alternative(sequence, Sections)))
eckhart's avatar
eckhart committed
149
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
150
    frontpages = Synonym(sequence)
di68kap's avatar
di68kap committed
151
    document = Series(Option(WSPC), Series(Token("\\begin{document}"), wsp__), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Series(Token("\\end{document}"), wsp__), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
152
153
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
154
155
156
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
157
    global GLOBALS
158
    try:
159
        grammar = GLOBALS.LaTeX_00000001_grammar_singleton
eckhart's avatar
eckhart committed
160
    except AttributeError:
161
        GLOBALS.LaTeX_00000001_grammar_singleton = LaTeXGrammar()
162
        if hasattr(get_grammar, 'python_src__'):
163
164
            GLOBALS.LaTeX_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
        grammar = GLOBALS.LaTeX_00000001_grammar_singleton
Eckhart Arnold's avatar
Eckhart Arnold committed
165
    return grammar
166
167
168
169
170
171
172
173
174


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


175
def streamline_whitespace(context):
176
    if context[-2].tag_name == TOKEN_PTYPE:
eckhart's avatar
eckhart committed
177
        return
178
    node = context[-1]
179
    assert node.tag_name in ['WSPC', ':Whitespace']
180
181
    s = node.content
    if s.find('%') >= 0:
182
        node.result = '\n'
183
        # c = s.find('%')
184
185
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
186
187
188
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
189
        node.result = ' ' if s else ''
190
191


192
193
194
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
195
flatten_structure = flatten(lambda context: is_one_of(
eckhart's avatar
eckhart committed
196
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
197
              "SubParagraphs", "sequence"}), recursive=True)
eckhart's avatar
eckhart committed
198
199
200


def is_commandname(context):
eckhart's avatar
eckhart committed
201
202
    """Returns True, if last node in the content represents a (potentially
    unknown) LaTeX-command."""
eckhart's avatar
eckhart committed
203
    node = context[-1]
204
    if node.tag_name == TOKEN_PTYPE:
eckhart's avatar
eckhart committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

220

221
222
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
223
    "<": [flatten_anonymous_nodes, flatten_structure],
224
    "latexdoc": [],
225
    "preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
eckhart's avatar
eckhart committed
226
    "document": [flatten_structure],
eckhart's avatar
eckhart committed
227
    "pdfinfo": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
228
    "frontpages": reduce_single_child,
229
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
230
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
231
    "heading": reduce_single_child,
232
233
    "Bibliography": [],
    "Index": [],
234
235
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
236
    "generic_block": [],
237
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
238
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
239
    "item": [],
240
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
241
    "quotation": [reduce_single_child, remove_brackets],
242
    "verbatim": [],
eckhart's avatar
eckhart committed
243
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
244
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
245
246
247
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
248
    "hline": [remove_whitespace, reduce_single_child],
249
250
    "sequence": [flatten],
    "paragraph": [flatten],
251
252
253
254
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
255
    "generic_inline_env": [],
256
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
257
258
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
259
260
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
261
    "text_command": [],
262
    "generic_command": [flatten],
263
    "citet, citep": [],
264
265
266
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
267
    "config": [remove_brackets, reduce_single_child],
268
    "block": [remove_brackets, flatten, replace_by_single_child],
269
270
    "text": collapse,
    "no_command, blockcmd": [],
271
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
272
273
274
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
275
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
276
277
    "BRACKETS": [],
    "TEXTCHUNK": [],
278
279
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
280
    "GAP": [],
281
282
    "LB": [],
    "BACKSLASH": [],
283
    "EOF": [],
284
285
    # "PARSEP": [replace_content_by('\n\n')],
    # "WSPC": [replace_content_by(' ')],
286
    ":Whitespace": streamline_whitespace,
287
    "*": replace_by_single_child
288
289
}

290

291
292
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
293

294

295
def get_transformer() -> TransformationFunc:
296
297
298
299
300
301
302
303
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

304
305
306
307
308
309
310
311


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

312
313
314
315
316
317

def empty_defaultdict():
    """Returns a defaultdict with an empty defaultdict as default value."""
    return defaultdict(empty_defaultdict)


318
319
320
class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """
321
322
    KNOWN_DOCUMENT_CLASSES = {'book', 'article'}
    KNOWN_LANGUAGES = {'english', 'german'}
323

eckhart's avatar
eckhart committed
324
325
    def __init__(self):
        super(LaTeXCompiler, self).__init__()
326
        self.metadata = defaultdict(empty_defaultdict)
327

328
329
330
331
    # def on_latexdoc(self, node):
    #     self.compile(node['preamble'])
    #     self.compile(node['document'])
    #     return node
332

333
334
    # def on_preamble(self, node):
    #     return node
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
359

360
361
    # def on_SubSubSections(self, node):
    #     return node
362

363
364
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
365

366
367
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
368

369
370
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
371

372
373
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
374

375
376
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
377

378
379
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
380

381
382
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
383

384
385
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
386

387
388
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
389

390
391
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
392

393
394
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
395

396
397
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
398

399
400
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
401

402
403
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
404

405
406
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
407

408
409
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
410

411
412
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
413

414
415
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
416

417
418
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
419

420
421
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
422

423
424
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
425

426
427
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
428

429
430
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
431

432
433
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
434

435
436
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
437

438
439
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
440

441
442
    # def on_text_element(self, node):
    #     return node
443

444
445
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
446

447
448
    # def on_inline_environment(self, node):
    #     return node
449

450
451
    # def on_known_inline_env(self, node):
    #     return node
452

453
454
    # def on_generic_inline_env(self, node):
    #     return node
455

456
457
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
458

459
460
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
461

462
463
    # def on_begin_environment(self, node):
    #     return node
464

465
466
    # def on_end_environment(self, node):
    #     return node
467

468
469
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
470

471
472
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
473

474
475
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
476

477
478
    # def on_text_command(self, node):
    #     return node
479

480
481
    # def on_generic_command(self, node):
    #     return node
482

483
484
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
485

486
487
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
488

489
490
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
491

492
493
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
494

495
496
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
497

498
499
    # def on_cline(self, node):
    #     return node
500

501
    def on_documentclass(self, node):
eckhart's avatar
eckhart committed
502
503
504
505
        """
        Saves the documentclass (if known) and the language (if given)
        in the metadata dictionary.
        """
506
507
508
        if 'config' in node:
            for it in {part.strip() for part in node['config'].content.split(',')}:
                if it in self.KNOWN_LANGUAGES:
509
                    if 'language' in node.attr:
510
511
                        self.metadata['language'] = it
                    else:
eckhart's avatar
eckhart committed
512
                        self.tree.new_error(node, 'Only one document language supported. '
513
514
                                            'Using %s, ignoring %s.'
                                            % (self.metadata['language'], it), Error.WARNING)
515
516
517
518
519
520
521
        if node['text'] in self.KNOWN_DOCUMENT_CLASSES:
            self.metadata['documentclass'] = node['text']
        return node

    def on_pdfinfo(self, node):
        return node

522
523
    # def on_config(self, node):
    #     return node
524

525
526
    # def on_cfg_text(self, node):
    #     return node
527

528
529
    # def on_block(self, node):
    #     return node
530

531
532
    # def on_text(self, node):
    #     return node
533

534
535
    # def on_no_command(self, node):
    #     return node
536

537
538
    # def on_blockcmd(self, node):
    #     return node
539

540
541
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
542

543
544
    # def on_CMDNAME(self, node):
    #     return node
545

546
547
    # def on_TXTCOMMAND(self, node):
    #     return node
548

549
550
    # def on_ESCAPED(self, node):
    #     return node
551

552
553
    # def on_SPECIAL(self, node):
    #     return node
554

555
556
    # def on_BRACKETS(self, node):
    #     return node
557

558
559
    # def on_LINEFEED(self, node):
    #     return node
560

561
562
    # def on_NAME(self, node):
    #     return node
563

564
565
    # def on_INTEGER(self, node):
    #     return node
566

567
568
    # def on_TEXTCHUNK(self, node):
    #     return node
569

570
571
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
572

573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
596

597

eckhart's avatar
eckhart committed
598
def get_compiler() -> LaTeXCompiler:
599
600
601
602
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
    except NameError:
eckhart's avatar
eckhart committed
603
        thread_local_LaTeX_compiler_singleton = LaTeXCompiler()
604
605
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
622
623
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
624
625
626
627
628
629
630
631
632
633
634
635
636
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
637
            print(result.as_xml() if isinstance(result, Node) else result)
638
639
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")