LaTeXCompiler.py 23.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


10
from collections import defaultdict
11
12
import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
13
14
from functools import partial

15
16
17
18
try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
di68kap's avatar
di68kap committed
20
21
    Synonym, Whitespace, Token, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
23
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
Eckhart Arnold's avatar
Eckhart Arnold committed
25
    reduce_single_child, replace_by_single_child, remove_whitespace, \
26
    flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
eckhart's avatar
eckhart committed
27
    is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
28
from DHParser.log import logging
29
30
31
32


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
33
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
34
35
36
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
37
def LaTeXPreprocessor(text):
38
39
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
42
43
44
45
46
47
48
49
50


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
51
    r"""Parser for a LaTeX source file.
52
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    begin_generic_block = Forward()
54
    block_environment = Forward()
55
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
    paragraph = Forward()
58
    tabular_config = Forward()
59
    text_element = Forward()
60
    source_hash__ = "79e85f223d89452f2ba796f9c40daac9"
61
    parser_initialization__ = "upon instantiation"
Eckhart Arnold's avatar
Eckhart Arnold committed
62
63
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
64
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
65
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
66
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
67
68
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
69
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
di68kap's avatar
di68kap committed
70
    GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
Eckhart Arnold's avatar
Eckhart Arnold committed
71
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
72
73
74
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
75
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
di68kap's avatar
di68kap committed
76
77
    INTEGER = Series(RegExp('\\d+'), wsp__)
    NAME = Capture(Series(RegExp('\\w+'), wsp__))
78
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
79
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
80
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
81
82
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
di68kap's avatar
di68kap committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
    structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
    block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
    cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
    config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
    pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
    documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
    cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
    hline = Series(Token("\\hline"), wsp__)
    multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
    caption = Series(Series(Token("\\caption"), wsp__), block)
    includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
    footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
    citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
    citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
102
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
103
    known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
Eckhart Arnold's avatar
Eckhart Arnold committed
104
    command = Alternative(known_command, text_command, generic_command)
105
106
107
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
108
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
109
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
di68kap's avatar
di68kap committed
110
    generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
111
    known_inline_env = Synonym(inline_math)
112
    inline_environment = Alternative(known_inline_env, generic_inline_env)
113
114
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
115
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
eckhart's avatar
eckhart committed
116
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
di68kap's avatar
di68kap committed
117
118
119
120
121
122
123
124
125
126
127
    block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
    tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
    tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
    verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
    quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
    figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
    item = Series(Series(Token("\\item"), wsp__), sequence)
    enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
    itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
di68kap's avatar
di68kap committed
128
129
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
130
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
131
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
132
    block_environment.set(Alternative(known_environment, generic_block))
133
    heading = Synonym(block)
di68kap's avatar
di68kap committed
134
135
136
    Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
    Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
    SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
eckhart's avatar
eckhart committed
137
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
di68kap's avatar
di68kap committed
138
    Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
eckhart's avatar
eckhart committed
139
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
di68kap's avatar
di68kap committed
140
    SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
eckhart's avatar
eckhart committed
141
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
di68kap's avatar
di68kap committed
142
    SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
eckhart's avatar
eckhart committed
143
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
di68kap's avatar
di68kap committed
144
    Section = Series(Series(Token("\\section"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSections)))
eckhart's avatar
eckhart committed
145
    Sections = OneOrMore(Series(Option(WSPC), Section))
di68kap's avatar
di68kap committed
146
    Chapter = Series(Series(Token("\\chapter"), wsp__), heading, ZeroOrMore(Alternative(sequence, Sections)))
eckhart's avatar
eckhart committed
147
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
148
    frontpages = Synonym(sequence)
di68kap's avatar
di68kap committed
149
    document = Series(Option(WSPC), Series(Token("\\begin{document}"), wsp__), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Series(Token("\\end{document}"), wsp__), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
150
151
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
152
153
154
155
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
    try:
eckhart's avatar
eckhart committed
156
157
158
159
        grammar = GLOBALS.LaTeX_1_grammar_singleton
    except AttributeError:
        GLOBALS.LaTeX_1_grammar_singleton = LaTeXGrammar()
        grammar = GLOBALS.LaTeX_1_grammar_singleton
Eckhart Arnold's avatar
Eckhart Arnold committed
160
    return grammar
161
162
163
164
165
166
167
168
169


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


170
def streamline_whitespace(context):
171
    if context[-2].parser.ptype == ":_Token":
eckhart's avatar
eckhart committed
172
        return
173
    node = context[-1]
174
    assert node.tag_name in ['WSPC', ':Whitespace']
175
176
    s = node.content
    if s.find('%') >= 0:
177
        node.result = '\n'
178
        # c = s.find('%')
179
180
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
181
182
183
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
184
        node.result = ' ' if s else ''
185
186


187
188
189
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
190
191
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
192
              "SubParagraphs", "sequence"}), recursive=True)
eckhart's avatar
eckhart committed
193
194
195


def is_commandname(context):
eckhart's avatar
eckhart committed
196
197
    """Returns True, if last node in the content represents a (potentially
    unknown) LaTeX-command."""
eckhart's avatar
eckhart committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
    node = context[-1]
    if node.parser.ptype == TOKEN_PTYPE:
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

215

216
217
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
218
    "<": [drop_expendables, flatten_structure],
219
    "latexdoc": [],
220
    "preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
eckhart's avatar
eckhart committed
221
    "document": [flatten_structure],
eckhart's avatar
eckhart committed
222
    "pdfinfo": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
223
    "frontpages": reduce_single_child,
224
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
225
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
226
    "heading": reduce_single_child,
227
228
    "Bibliography": [],
    "Index": [],
229
230
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
231
    "generic_block": [],
232
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
233
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
234
    "item": [],
235
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
236
    "quotation": [reduce_single_child, remove_brackets],
237
    "verbatim": [],
eckhart's avatar
eckhart committed
238
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
239
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
240
241
242
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
243
    "hline": [remove_whitespace, reduce_single_child],
244
245
    "sequence": [flatten],
    "paragraph": [flatten],
246
247
248
249
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
250
    "generic_inline_env": [],
251
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
252
253
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
254
255
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
256
    "text_command": [],
257
    "generic_command": [flatten],
258
    "citet, citep": [],
259
260
261
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
262
    "config": [remove_brackets, reduce_single_child],
263
    "block": [remove_brackets, flatten, replace_by_single_child],
264
265
    "text": collapse,
    "no_command, blockcmd": [],
266
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
267
268
269
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
270
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
271
272
    "BRACKETS": [],
    "TEXTCHUNK": [],
273
274
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
275
    "GAP": [],
276
277
    "LB": [],
    "BACKSLASH": [],
278
    "EOF": [],
279
280
    # "PARSEP": [replace_content_by('\n\n')],
    # "WSPC": [replace_content_by(' ')],
281
    ":Whitespace": streamline_whitespace,
282
    "*": replace_by_single_child
283
284
}

285

286
287
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
288

289

290
def get_transformer() -> TransformationFunc:
291
292
293
294
295
296
297
298
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

299
300
301
302
303
304
305
306


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

307
308
309
310
311
312

def empty_defaultdict():
    """Returns a defaultdict with an empty defaultdict as default value."""
    return defaultdict(empty_defaultdict)


313
314
315
class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """
316
317
    KNOWN_DOCUMENT_CLASSES = {'book', 'article'}
    KNOWN_LANGUAGES = {'english', 'german'}
318

eckhart's avatar
eckhart committed
319
320
    def __init__(self):
        super(LaTeXCompiler, self).__init__()
321
        self.metadata = defaultdict(empty_defaultdict)
322

323
324
325
326
    # def on_latexdoc(self, node):
    #     self.compile(node['preamble'])
    #     self.compile(node['document'])
    #     return node
327

328
329
    # def on_preamble(self, node):
    #     return node
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
354

355
356
    # def on_SubSubSections(self, node):
    #     return node
357

358
359
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
360

361
362
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
363

364
365
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
366

367
368
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
369

370
371
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
372

373
374
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
375

376
377
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
378

379
380
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
381

382
383
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
384

385
386
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
387

388
389
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
390

391
392
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
393

394
395
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
396

397
398
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
399

400
401
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
402

403
404
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
405

406
407
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
408

409
410
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
411

412
413
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
414

415
416
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
417

418
419
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
420

421
422
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
423

424
425
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
426

427
428
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
429

430
431
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
432

433
434
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
435

436
437
    # def on_text_element(self, node):
    #     return node
438

439
440
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
441

442
443
    # def on_inline_environment(self, node):
    #     return node
444

445
446
    # def on_known_inline_env(self, node):
    #     return node
447

448
449
    # def on_generic_inline_env(self, node):
    #     return node
450

451
452
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
453

454
455
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
456

457
458
    # def on_begin_environment(self, node):
    #     return node
459

460
461
    # def on_end_environment(self, node):
    #     return node
462

463
464
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
465

466
467
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
468

469
470
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
471

472
473
    # def on_text_command(self, node):
    #     return node
474

475
476
    # def on_generic_command(self, node):
    #     return node
477

478
479
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
480

481
482
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
483

484
485
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
486

487
488
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
489

490
491
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
492

493
494
    # def on_cline(self, node):
    #     return node
495

496
    def on_documentclass(self, node):
eckhart's avatar
eckhart committed
497
498
499
500
        """
        Saves the documentclass (if known) and the language (if given)
        in the metadata dictionary.
        """
501
502
503
        if 'config' in node:
            for it in {part.strip() for part in node['config'].content.split(',')}:
                if it in self.KNOWN_LANGUAGES:
504
                    if 'language' in node.attr:
505
506
                        self.metadata['language'] = it
                    else:
eckhart's avatar
eckhart committed
507
                        self.tree.new_error(node, 'Only one document language supported. '
508
509
                                            'Using %s, ignoring %s.'
                                            % (self.metadata['language'], it), Error.WARNING)
510
511
512
513
514
515
516
        if node['text'] in self.KNOWN_DOCUMENT_CLASSES:
            self.metadata['documentclass'] = node['text']
        return node

    def on_pdfinfo(self, node):
        return node

517
518
    # def on_config(self, node):
    #     return node
519

520
521
    # def on_cfg_text(self, node):
    #     return node
522

523
524
    # def on_block(self, node):
    #     return node
525

526
527
    # def on_text(self, node):
    #     return node
528

529
530
    # def on_no_command(self, node):
    #     return node
531

532
533
    # def on_blockcmd(self, node):
    #     return node
534

535
536
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
537

538
539
    # def on_CMDNAME(self, node):
    #     return node
540

541
542
    # def on_TXTCOMMAND(self, node):
    #     return node
543

544
545
    # def on_ESCAPED(self, node):
    #     return node
546

547
548
    # def on_SPECIAL(self, node):
    #     return node
549

550
551
    # def on_BRACKETS(self, node):
    #     return node
552

553
554
    # def on_LINEFEED(self, node):
    #     return node
555

556
557
    # def on_NAME(self, node):
    #     return node
558

559
560
    # def on_INTEGER(self, node):
    #     return node
561

562
563
    # def on_TEXTCHUNK(self, node):
    #     return node
564

565
566
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
567

568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
591

592

eckhart's avatar
eckhart committed
593
def get_compiler() -> LaTeXCompiler:
594
595
596
597
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
    except NameError:
eckhart's avatar
eckhart committed
598
        thread_local_LaTeX_compiler_singleton = LaTeXCompiler()
599
600
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
617
618
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
619
620
621
622
623
624
625
626
627
628
629
630
631
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
632
            print(result.as_xml() if isinstance(result, Node) else result)
633
634
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")