LaTeXCompiler.py 23.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


10
from collections import defaultdict
11
12
import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
13
14
from functools import partial

15
16
17
18
try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
di68kap's avatar
di68kap committed
20
21
    Synonym, Whitespace, Token, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
23
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
Eckhart Arnold's avatar
Eckhart Arnold committed
25
    reduce_single_child, replace_by_single_child, remove_whitespace, \
26
    flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
eckhart's avatar
eckhart committed
27
    is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
28
from DHParser.log import logging
29
30
31
32


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
33
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
34
35
36
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
37
def LaTeXPreprocessor(text):
38
39
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
40
41
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
42
43
44
45
46
47
48
49
50


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
51
    r"""Parser for a LaTeX source file.
52
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    begin_generic_block = Forward()
54
    block_environment = Forward()
55
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
    paragraph = Forward()
58
    tabular_config = Forward()
59
    text_element = Forward()
60
    source_hash__ = "e09808ecd485c07b3455c3a2bf4eada3"
61
    parser_initialization__ = "upon instantiation"
eckhart's avatar
eckhart committed
62
    resume_rules__ = {}
Eckhart Arnold's avatar
Eckhart Arnold committed
63
64
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
65
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
66
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
67
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
68
69
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
70
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
di68kap's avatar
di68kap committed
71
    GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
Eckhart Arnold's avatar
Eckhart Arnold committed
72
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
73
74
75
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
76
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
di68kap's avatar
di68kap committed
77
78
    INTEGER = Series(RegExp('\\d+'), wsp__)
    NAME = Capture(Series(RegExp('\\w+'), wsp__))
79
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
80
    BRACKETS = RegExp('[\\[\\]]')
Eckhart Arnold's avatar
Eckhart Arnold committed
81
    SPECIAL = RegExp('[$&_\\\\\\\\/]')
Eckhart Arnold's avatar
Eckhart Arnold committed
82
83
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
di68kap's avatar
di68kap committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
    structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
    block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
    cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
    config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
    pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
    documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
    cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
    hline = Series(Token("\\hline"), wsp__)
    multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
    caption = Series(Series(Token("\\caption"), wsp__), block)
    includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
    footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
    citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
    citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
103
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
104
    known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
Eckhart Arnold's avatar
Eckhart Arnold committed
105
    command = Alternative(known_command, text_command, generic_command)
106
107
108
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
109
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
110
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
di68kap's avatar
di68kap committed
111
    generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
112
    known_inline_env = Synonym(inline_math)
113
    inline_environment = Alternative(known_inline_env, generic_inline_env)
114
115
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
116
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
eckhart's avatar
eckhart committed
117
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
di68kap's avatar
di68kap committed
118
119
120
121
122
123
124
125
126
127
128
    block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
    tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
    tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
    verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
    quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
    figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
    item = Series(Series(Token("\\item"), wsp__), sequence)
    enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
    itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
di68kap's avatar
di68kap committed
129
130
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
131
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
132
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
133
    block_environment.set(Alternative(known_environment, generic_block))
134
    heading = Synonym(block)
di68kap's avatar
di68kap committed
135
136
137
    Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
    Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
    SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
eckhart's avatar
eckhart committed
138
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
di68kap's avatar
di68kap committed
139
    Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
eckhart's avatar
eckhart committed
140
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
di68kap's avatar
di68kap committed
141
    SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
eckhart's avatar
eckhart committed
142
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
di68kap's avatar
di68kap committed
143
    SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
eckhart's avatar
eckhart committed
144
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
di68kap's avatar
di68kap committed
145
    Section = Series(Series(Token("\\section"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSections)))
eckhart's avatar
eckhart committed
146
    Sections = OneOrMore(Series(Option(WSPC), Section))
di68kap's avatar
di68kap committed
147
    Chapter = Series(Series(Token("\\chapter"), wsp__), heading, ZeroOrMore(Alternative(sequence, Sections)))
eckhart's avatar
eckhart committed
148
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
149
    frontpages = Synonym(sequence)
di68kap's avatar
di68kap committed
150
    document = Series(Option(WSPC), Series(Token("\\begin{document}"), wsp__), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Series(Token("\\end{document}"), wsp__), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
151
152
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
153
154
155
156
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
    try:
eckhart's avatar
eckhart committed
157
158
159
160
        grammar = GLOBALS.LaTeX_1_grammar_singleton
    except AttributeError:
        GLOBALS.LaTeX_1_grammar_singleton = LaTeXGrammar()
        grammar = GLOBALS.LaTeX_1_grammar_singleton
Eckhart Arnold's avatar
Eckhart Arnold committed
161
    return grammar
162
163
164
165
166
167
168
169
170


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


171
def streamline_whitespace(context):
172
    if context[-2].parser.ptype == ":_Token":
eckhart's avatar
eckhart committed
173
        return
174
    node = context[-1]
175
    assert node.tag_name in ['WSPC', ':Whitespace']
176
177
    s = node.content
    if s.find('%') >= 0:
178
        node.result = '\n'
179
        # c = s.find('%')
180
181
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
182
183
184
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
185
        node.result = ' ' if s else ''
186
187


188
189
190
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
191
192
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
193
              "SubParagraphs", "sequence"}), recursive=True)
eckhart's avatar
eckhart committed
194
195
196


def is_commandname(context):
eckhart's avatar
eckhart committed
197
198
    """Returns True, if last node in the content represents a (potentially
    unknown) LaTeX-command."""
eckhart's avatar
eckhart committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    node = context[-1]
    if node.parser.ptype == TOKEN_PTYPE:
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

216

217
218
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
219
    "<": [drop_expendables, flatten_structure],
220
    "latexdoc": [],
221
    "preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
eckhart's avatar
eckhart committed
222
    "document": [flatten_structure],
eckhart's avatar
eckhart committed
223
    "pdfinfo": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
224
    "frontpages": reduce_single_child,
225
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
226
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
227
    "heading": reduce_single_child,
228
229
    "Bibliography": [],
    "Index": [],
230
231
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
232
    "generic_block": [],
233
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
234
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
235
    "item": [],
236
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
237
    "quotation": [reduce_single_child, remove_brackets],
238
    "verbatim": [],
eckhart's avatar
eckhart committed
239
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
240
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
241
242
243
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
244
    "hline": [remove_whitespace, reduce_single_child],
245
246
    "sequence": [flatten],
    "paragraph": [flatten],
247
248
249
250
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
251
    "generic_inline_env": [],
252
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
253
254
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
255
256
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
257
    "text_command": [],
258
    "generic_command": [flatten],
259
    "citet, citep": [],
260
261
262
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
263
    "config": [remove_brackets, reduce_single_child],
264
    "block": [remove_brackets, flatten, replace_by_single_child],
265
266
    "text": collapse,
    "no_command, blockcmd": [],
267
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
268
269
270
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
271
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
272
273
    "BRACKETS": [],
    "TEXTCHUNK": [],
274
275
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
276
    "GAP": [],
277
278
    "LB": [],
    "BACKSLASH": [],
279
    "EOF": [],
280
281
    # "PARSEP": [replace_content_by('\n\n')],
    # "WSPC": [replace_content_by(' ')],
282
    ":Whitespace": streamline_whitespace,
283
    "*": replace_by_single_child
284
285
}

286

287
288
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
289

290

291
def get_transformer() -> TransformationFunc:
292
293
294
295
296
297
298
299
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

300
301
302
303
304
305
306
307


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

308
309
310
311
312
313

def empty_defaultdict():
    """Returns a defaultdict with an empty defaultdict as default value."""
    return defaultdict(empty_defaultdict)


314
315
316
class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """
317
318
    KNOWN_DOCUMENT_CLASSES = {'book', 'article'}
    KNOWN_LANGUAGES = {'english', 'german'}
319

eckhart's avatar
eckhart committed
320
321
    def __init__(self):
        super(LaTeXCompiler, self).__init__()
322
        self.metadata = defaultdict(empty_defaultdict)
323

324
325
326
327
    # def on_latexdoc(self, node):
    #     self.compile(node['preamble'])
    #     self.compile(node['document'])
    #     return node
328

329
330
    # def on_preamble(self, node):
    #     return node
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
355

356
357
    # def on_SubSubSections(self, node):
    #     return node
358

359
360
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
361

362
363
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
364

365
366
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
367

368
369
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
370

371
372
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
373

374
375
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
376

377
378
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
379

380
381
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
382

383
384
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
385

386
387
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
388

389
390
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
391

392
393
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
394

395
396
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
397

398
399
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
400

401
402
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
403

404
405
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
406

407
408
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
409

410
411
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
412

413
414
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
415

416
417
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
418

419
420
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
421

422
423
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
424

425
426
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
427

428
429
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
430

431
432
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
433

434
435
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
436

437
438
    # def on_text_element(self, node):
    #     return node
439

440
441
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
442

443
444
    # def on_inline_environment(self, node):
    #     return node
445

446
447
    # def on_known_inline_env(self, node):
    #     return node
448

449
450
    # def on_generic_inline_env(self, node):
    #     return node
451

452
453
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
454

455
456
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
457

458
459
    # def on_begin_environment(self, node):
    #     return node
460

461
462
    # def on_end_environment(self, node):
    #     return node
463

464
465
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
466

467
468
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
469

470
471
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
472

473
474
    # def on_text_command(self, node):
    #     return node
475

476
477
    # def on_generic_command(self, node):
    #     return node
478

479
480
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
481

482
483
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
484

485
486
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
487

488
489
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
490

491
492
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
493

494
495
    # def on_cline(self, node):
    #     return node
496

497
    def on_documentclass(self, node):
eckhart's avatar
eckhart committed
498
499
500
501
        """
        Saves the documentclass (if known) and the language (if given)
        in the metadata dictionary.
        """
502
503
504
        if 'config' in node:
            for it in {part.strip() for part in node['config'].content.split(',')}:
                if it in self.KNOWN_LANGUAGES:
505
                    if 'language' in node.attr:
506
507
                        self.metadata['language'] = it
                    else:
eckhart's avatar
eckhart committed
508
                        self.tree.new_error(node, 'Only one document language supported. '
509
510
                                            'Using %s, ignoring %s.'
                                            % (self.metadata['language'], it), Error.WARNING)
511
512
513
514
515
516
517
        if node['text'] in self.KNOWN_DOCUMENT_CLASSES:
            self.metadata['documentclass'] = node['text']
        return node

    def on_pdfinfo(self, node):
        return node

518
519
    # def on_config(self, node):
    #     return node
520

521
522
    # def on_cfg_text(self, node):
    #     return node
523

524
525
    # def on_block(self, node):
    #     return node
526

527
528
    # def on_text(self, node):
    #     return node
529

530
531
    # def on_no_command(self, node):
    #     return node
532

533
534
    # def on_blockcmd(self, node):
    #     return node
535

536
537
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
538

539
540
    # def on_CMDNAME(self, node):
    #     return node
541

542
543
    # def on_TXTCOMMAND(self, node):
    #     return node
544

545
546
    # def on_ESCAPED(self, node):
    #     return node
547

548
549
    # def on_SPECIAL(self, node):
    #     return node
550

551
552
    # def on_BRACKETS(self, node):
    #     return node
553

554
555
    # def on_LINEFEED(self, node):
    #     return node
556

557
558
    # def on_NAME(self, node):
    #     return node
559

560
561
    # def on_INTEGER(self, node):
    #     return node
562

563
564
    # def on_TEXTCHUNK(self, node):
    #     return node
565

566
567
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
568

569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
592

593

eckhart's avatar
eckhart committed
594
def get_compiler() -> LaTeXCompiler:
595
596
597
598
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
    except NameError:
eckhart's avatar
eckhart committed
599
        thread_local_LaTeX_compiler_singleton = LaTeXCompiler()
600
601
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
618
619
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
620
621
622
623
624
625
626
627
628
629
630
631
632
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
633
            print(result.as_xml() if isinstance(result, Node) else result)
634
635
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")