LaTeXCompiler.py 23.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


10
from collections import defaultdict
11
12
import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
13
14
from functools import partial

15
16
17
18
try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
di68kap's avatar
di68kap committed
20
21
    Synonym, Whitespace, Token, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
23
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
Eckhart Arnold's avatar
Eckhart Arnold committed
25
    reduce_single_child, replace_by_single_child, remove_whitespace, \
26
    flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
eckhart's avatar
eckhart committed
27
    is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
28
from DHParser.log import logging
29
30
31
32


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
33
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
34
35
36
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
37
def LaTeXPreprocessor(text):
38
39
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
42
43
44
45
46
47
48
49
50


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
51
    r"""Parser for a LaTeX source file.
52
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    begin_generic_block = Forward()
54
    block_environment = Forward()
55
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
    paragraph = Forward()
58
    tabular_config = Forward()
59
    text_element = Forward()
60
    source_hash__ = "e09808ecd485c07b3455c3a2bf4eada3"
61
    parser_initialization__ = "upon instantiation"
eckhart's avatar
eckhart committed
62
    resume_rules__ = {}
Eckhart Arnold's avatar
Eckhart Arnold committed
63
64
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
65
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
66
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
67
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
68
69
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
70
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
di68kap's avatar
di68kap committed
71
    GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
Eckhart Arnold's avatar
Eckhart Arnold committed
72
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
73
74
75
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
76
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
di68kap's avatar
di68kap committed
77
78
    INTEGER = Series(RegExp('\\d+'), wsp__)
    NAME = Capture(Series(RegExp('\\w+'), wsp__))
79
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
80
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
81
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
82
83
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
di68kap's avatar
di68kap committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
    structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
    block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
    cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
    config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
    pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
    documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
    cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
    hline = Series(Token("\\hline"), wsp__)
    multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
    caption = Series(Series(Token("\\caption"), wsp__), block)
    includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
    footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
    citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
    citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
103
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
104
    known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
Eckhart Arnold's avatar
Eckhart Arnold committed
105
    command = Alternative(known_command, text_command, generic_command)
106
107
108
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
109
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
110
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
di68kap's avatar
di68kap committed
111
    generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
112
    known_inline_env = Synonym(inline_math)
113
    inline_environment = Alternative(known_inline_env, generic_inline_env)
114
115
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
116
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
eckhart's avatar
eckhart committed
117
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
di68kap's avatar
di68kap committed
118
119
120
121
122
123
124
125
126
127
128
    block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
    tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
    tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
    verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
    quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
    figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
    item = Series(Series(Token("\\item"), wsp__), sequence)
    enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
    itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
di68kap's avatar
di68kap committed
129
130
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
131
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
132
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
133
    block_environment.set(Alternative(known_environment, generic_block))
134
    heading = Synonym(block)
di68kap's avatar
di68kap committed
135
136
137
    Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
    Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
    SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
eckhart's avatar
eckhart committed
138
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
di68kap's avatar
di68kap committed
139
    Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
eckhart's avatar
eckhart committed
140
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
di68kap's avatar
di68kap committed
141
    SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
eckhart's avatar
eckhart committed
142
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
di68kap's avatar
di68kap committed
143
    SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
eckhart's avatar
eckhart committed
144
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
di68kap's avatar
di68kap committed
145
    Section = Series(Series(Token("\\section"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSections)))
eckhart's avatar
eckhart committed
146
    Sections = OneOrMore(Series(Option(WSPC), Section))
di68kap's avatar
di68kap committed
147
    Chapter = Series(Series(Token("\\chapter"), wsp__), heading, ZeroOrMore(Alternative(sequence, Sections)))
eckhart's avatar
eckhart committed
148
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
149
    frontpages = Synonym(sequence)
di68kap's avatar
di68kap committed
150
    document = Series(Option(WSPC), Series(Token("\\begin{document}"), wsp__), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Series(Token("\\end{document}"), wsp__), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
151
152
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
153
154
155
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
156
    global GLOBALS
157
    try:
eckhart's avatar
eckhart committed
158
159
160
        grammar = GLOBALS.LaTeX_1_grammar_singleton
    except AttributeError:
        GLOBALS.LaTeX_1_grammar_singleton = LaTeXGrammar()
161
162
        if hasattr(get_grammar, 'python_src__'):
            GLOBALS.LaTeX_1_grammar_singleton.python_src__ = get_grammar.python_src__
eckhart's avatar
eckhart committed
163
        grammar = GLOBALS.LaTeX_1_grammar_singleton
Eckhart Arnold's avatar
Eckhart Arnold committed
164
    return grammar
165
166
167
168
169
170
171
172
173


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


174
def streamline_whitespace(context):
175
    if context[-2].parser.ptype == ":_Token":
eckhart's avatar
eckhart committed
176
        return
177
    node = context[-1]
178
    assert node.tag_name in ['WSPC', ':Whitespace']
179
180
    s = node.content
    if s.find('%') >= 0:
181
        node.result = '\n'
182
        # c = s.find('%')
183
184
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
185
186
187
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
188
        node.result = ' ' if s else ''
189
190


191
192
193
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
194
195
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
196
              "SubParagraphs", "sequence"}), recursive=True)
eckhart's avatar
eckhart committed
197
198
199


def is_commandname(context):
eckhart's avatar
eckhart committed
200
201
    """Returns True, if last node in the content represents a (potentially
    unknown) LaTeX-command."""
eckhart's avatar
eckhart committed
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    node = context[-1]
    if node.parser.ptype == TOKEN_PTYPE:
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

219

220
221
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
222
    "<": [drop_expendables, flatten_structure],
223
    "latexdoc": [],
224
    "preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
eckhart's avatar
eckhart committed
225
    "document": [flatten_structure],
eckhart's avatar
eckhart committed
226
    "pdfinfo": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
227
    "frontpages": reduce_single_child,
228
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
229
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
230
    "heading": reduce_single_child,
231
232
    "Bibliography": [],
    "Index": [],
233
234
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
235
    "generic_block": [],
236
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
237
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
238
    "item": [],
239
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
240
    "quotation": [reduce_single_child, remove_brackets],
241
    "verbatim": [],
eckhart's avatar
eckhart committed
242
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
243
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
244
245
246
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
247
    "hline": [remove_whitespace, reduce_single_child],
248
249
    "sequence": [flatten],
    "paragraph": [flatten],
250
251
252
253
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
254
    "generic_inline_env": [],
255
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
256
257
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
258
259
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
260
    "text_command": [],
261
    "generic_command": [flatten],
262
    "citet, citep": [],
263
264
265
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
266
    "config": [remove_brackets, reduce_single_child],
267
    "block": [remove_brackets, flatten, replace_by_single_child],
268
269
    "text": collapse,
    "no_command, blockcmd": [],
270
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
271
272
273
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
274
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
275
276
    "BRACKETS": [],
    "TEXTCHUNK": [],
277
278
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
279
    "GAP": [],
280
281
    "LB": [],
    "BACKSLASH": [],
282
    "EOF": [],
283
284
    # "PARSEP": [replace_content_by('\n\n')],
    # "WSPC": [replace_content_by(' ')],
285
    ":Whitespace": streamline_whitespace,
286
    "*": replace_by_single_child
287
288
}

289

290
291
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
292

293

294
def get_transformer() -> TransformationFunc:
295
296
297
298
299
300
301
302
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

303
304
305
306
307
308
309
310


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

311
312
313
314
315
316

def empty_defaultdict():
    """Returns a defaultdict with an empty defaultdict as default value."""
    return defaultdict(empty_defaultdict)


317
318
319
class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """
320
321
    KNOWN_DOCUMENT_CLASSES = {'book', 'article'}
    KNOWN_LANGUAGES = {'english', 'german'}
322

eckhart's avatar
eckhart committed
323
324
    def __init__(self):
        super(LaTeXCompiler, self).__init__()
325
        self.metadata = defaultdict(empty_defaultdict)
326

327
328
329
330
    # def on_latexdoc(self, node):
    #     self.compile(node['preamble'])
    #     self.compile(node['document'])
    #     return node
331

332
333
    # def on_preamble(self, node):
    #     return node
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
358

359
360
    # def on_SubSubSections(self, node):
    #     return node
361

362
363
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
364

365
366
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
367

368
369
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
370

371
372
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
373

374
375
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
376

377
378
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
379

380
381
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
382

383
384
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
385

386
387
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
388

389
390
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
391

392
393
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
394

395
396
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
397

398
399
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
400

401
402
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
403

404
405
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
406

407
408
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
409

410
411
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
412

413
414
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
415

416
417
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
418

419
420
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
421

422
423
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
424

425
426
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
427

428
429
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
430

431
432
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
433

434
435
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
436

437
438
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
439

440
441
    # def on_text_element(self, node):
    #     return node
442

443
444
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
445

446
447
    # def on_inline_environment(self, node):
    #     return node
448

449
450
    # def on_known_inline_env(self, node):
    #     return node
451

452
453
    # def on_generic_inline_env(self, node):
    #     return node
454

455
456
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
457

458
459
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
460

461
462
    # def on_begin_environment(self, node):
    #     return node
463

464
465
    # def on_end_environment(self, node):
    #     return node
466

467
468
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
469

470
471
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
472

473
474
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
475

476
477
    # def on_text_command(self, node):
    #     return node
478

479
480
    # def on_generic_command(self, node):
    #     return node
481

482
483
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
484

485
486
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
487

488
489
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
490

491
492
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
493

494
495
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
496

497
498
    # def on_cline(self, node):
    #     return node
499

500
    def on_documentclass(self, node):
eckhart's avatar
eckhart committed
501
502
503
504
        """
        Saves the documentclass (if known) and the language (if given)
        in the metadata dictionary.
        """
505
506
507
        if 'config' in node:
            for it in {part.strip() for part in node['config'].content.split(',')}:
                if it in self.KNOWN_LANGUAGES:
508
                    if 'language' in node.attr:
509
510
                        self.metadata['language'] = it
                    else:
eckhart's avatar
eckhart committed
511
                        self.tree.new_error(node, 'Only one document language supported. '
512
513
                                            'Using %s, ignoring %s.'
                                            % (self.metadata['language'], it), Error.WARNING)
514
515
516
517
518
519
520
        if node['text'] in self.KNOWN_DOCUMENT_CLASSES:
            self.metadata['documentclass'] = node['text']
        return node

    def on_pdfinfo(self, node):
        return node

521
522
    # def on_config(self, node):
    #     return node
523

524
525
    # def on_cfg_text(self, node):
    #     return node
526

527
528
    # def on_block(self, node):
    #     return node
529

530
531
    # def on_text(self, node):
    #     return node
532

533
534
    # def on_no_command(self, node):
    #     return node
535

536
537
    # def on_blockcmd(self, node):
    #     return node
538

539
540
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
541

542
543
    # def on_CMDNAME(self, node):
    #     return node
544

545
546
    # def on_TXTCOMMAND(self, node):
    #     return node
547

548
549
    # def on_ESCAPED(self, node):
    #     return node
550

551
552
    # def on_SPECIAL(self, node):
    #     return node
553

554
555
    # def on_BRACKETS(self, node):
    #     return node
556

557
558
    # def on_LINEFEED(self, node):
    #     return node
559

560
561
    # def on_NAME(self, node):
    #     return node
562

563
564
    # def on_INTEGER(self, node):
    #     return node
565

566
567
    # def on_TEXTCHUNK(self, node):
    #     return node
568

569
570
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
571

572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
595

596

eckhart's avatar
eckhart committed
597
def get_compiler() -> LaTeXCompiler:
598
599
600
601
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
    except NameError:
eckhart's avatar
eckhart committed
602
        thread_local_LaTeX_compiler_singleton = LaTeXCompiler()
603
604
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
621
622
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
623
624
625
626
627
628
629
630
631
632
633
634
635
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
636
            print(result.as_xml() if isinstance(result, Node) else result)
637
638
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")