LaTeXCompiler.py 23.3 KB
Newer Older
1
#!/usr/bin/python3
2 3 4 5 6 7 8 9

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


10
from collections import defaultdict
11 12
import os
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
13 14
from functools import partial

15 16 17 18
try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
di68kap's avatar
di68kap committed
20 21
    Synonym, Whitespace, Token, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
23
    PreprocessorFunc, TransformationDict, \
Eckhart Arnold's avatar
Eckhart Arnold committed
24
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
25
    reduce_single_child, replace_by_single_child, remove_whitespace, flatten_anonymous_nodes, \
26
    flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
eckhart's avatar
eckhart committed
27
    is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
28
from DHParser.log import logging
29 30 31 32


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
33
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
34 35 36
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
37
def LaTeXPreprocessor(text):
38 39
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
40 41
def get_preprocessor() -> PreprocessorFunc:
    return LaTeXPreprocessor
42 43 44 45 46 47 48 49 50


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LaTeXGrammar(Grammar):
51
    r"""Parser for a LaTeX source file.
52
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
53
    begin_generic_block = Forward()
54
    block_environment = Forward()
55
    block_of_paragraphs = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    end_generic_block = Forward()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
    paragraph = Forward()
58
    tabular_config = Forward()
59
    text_element = Forward()
60
    source_hash__ = "30f9fd1ad9257035ba83975dd2f46856"
61
    static_analysis_pending__ = [True]
eckhart's avatar
eckhart committed
62
    parser_initialization__ = ["upon instantiation"]
eckhart's avatar
eckhart committed
63
    resume_rules__ = {}
Eckhart Arnold's avatar
Eckhart Arnold committed
64 65
    COMMENT__ = r'%.*'
    WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
66
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
67
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
68
    EOF = RegExp('(?!.)')
Eckhart Arnold's avatar
Eckhart Arnold committed
69 70
    BACKSLASH = RegExp('[\\\\]')
    LB = RegExp('\\s*?\\n|$')
di68kap's avatar
di68kap committed
71
    NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
di68kap's avatar
di68kap committed
72
    GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
Eckhart Arnold's avatar
Eckhart Arnold committed
73
    WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
di68kap's avatar
di68kap committed
74 75 76
    PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
    LFF = Series(NEW_LINE, Option(WSPC))
    LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
Eckhart Arnold's avatar
Eckhart Arnold committed
77
    TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
di68kap's avatar
di68kap committed
78 79
    INTEGER = Series(RegExp('\\d+'), wsp__)
    NAME = Capture(Series(RegExp('\\w+'), wsp__))
80
    LINEFEED = RegExp('[\\\\][\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
81
    BRACKETS = RegExp('[\\[\\]]')
82
    SPECIAL = RegExp('[$&_/\\\\\\\\]')
Eckhart Arnold's avatar
Eckhart Arnold committed
83 84
    ESCAPED = RegExp('\\\\[%$&_/{}]')
    TXTCOMMAND = RegExp('\\\\text\\w+')
di68kap's avatar
di68kap committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
    structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
    blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
    no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
    text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
    block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
    cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
    config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
    pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
    documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
    cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
    hline = Series(Token("\\hline"), wsp__)
    multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
    caption = Series(Series(Token("\\caption"), wsp__), block)
    includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
    footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
    citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
    citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
    generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
104
    text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
105
    known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
Eckhart Arnold's avatar
Eckhart Arnold committed
106
    command = Alternative(known_command, text_command, generic_command)
107 108 109
    inline_math = Series(RegExp('\\$'), RegExp('[^$]*'), RegExp('\\$'), mandatory=2)
    end_environment = Series(RegExp('\\\\end{'), Pop(NAME), RegExp('}'), mandatory=1)
    begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
110
    end_inline_env = Synonym(end_environment)
di68kap's avatar
di68kap committed
111
    begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
di68kap's avatar
di68kap committed
112
    generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
113
    known_inline_env = Synonym(inline_math)
114
    inline_environment = Alternative(known_inline_env, generic_inline_env)
115 116
    line_element = Alternative(text, block, inline_environment, command)
    text_element.set(Alternative(line_element, LINEFEED))
di68kap's avatar
di68kap committed
117
    paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
eckhart's avatar
eckhart committed
118
    sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
di68kap's avatar
di68kap committed
119 120 121 122 123 124 125 126 127 128 129
    block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
    tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
    tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
    tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
    tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
    verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
    quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
    figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
    item = Series(Series(Token("\\item"), wsp__), sequence)
    enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
    itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
di68kap's avatar
di68kap committed
130 131
    end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
    begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
132
    generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
Eckhart Arnold's avatar
Eckhart Arnold committed
133
    known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
Eckhart Arnold's avatar
Eckhart Arnold committed
134
    block_environment.set(Alternative(known_environment, generic_block))
135
    heading = Synonym(block)
di68kap's avatar
di68kap committed
136 137 138
    Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
    Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
    SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
eckhart's avatar
eckhart committed
139
    SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
di68kap's avatar
di68kap committed
140
    Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
eckhart's avatar
eckhart committed
141
    Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
di68kap's avatar
di68kap committed
142
    SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
eckhart's avatar
eckhart committed
143
    SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
di68kap's avatar
di68kap committed
144
    SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
eckhart's avatar
eckhart committed
145
    SubSections = OneOrMore(Series(Option(WSPC), SubSection))
di68kap's avatar
di68kap committed
146
    Section = Series(Series(Token("\\section"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSections)))
eckhart's avatar
eckhart committed
147
    Sections = OneOrMore(Series(Option(WSPC), Section))
di68kap's avatar
di68kap committed
148
    Chapter = Series(Series(Token("\\chapter"), wsp__), heading, ZeroOrMore(Alternative(sequence, Sections)))
eckhart's avatar
eckhart committed
149
    Chapters = OneOrMore(Series(Option(WSPC), Chapter))
150
    frontpages = Synonym(sequence)
di68kap's avatar
di68kap committed
151
    document = Series(Option(WSPC), Series(Token("\\begin{document}"), wsp__), frontpages, Alternative(Chapters, Sections), Option(Bibliography), Option(Index), Option(WSPC), Series(Token("\\end{document}"), wsp__), Option(WSPC), EOF, mandatory=9)
di68kap's avatar
di68kap committed
152 153
    preamble = OneOrMore(Series(Option(WSPC), command))
    latexdoc = Series(preamble, document)
154 155 156
    root__ = latexdoc
    
def get_grammar() -> LaTeXGrammar:
157
    global GLOBALS
158
    try:
159
        grammar = GLOBALS.LaTeX_00000001_grammar_singleton
eckhart's avatar
eckhart committed
160
    except AttributeError:
161
        GLOBALS.LaTeX_00000001_grammar_singleton = LaTeXGrammar()
162
        if hasattr(get_grammar, 'python_src__'):
163 164
            GLOBALS.LaTeX_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
        grammar = GLOBALS.LaTeX_00000001_grammar_singleton
Eckhart Arnold's avatar
Eckhart Arnold committed
165
    return grammar
166 167 168 169 170 171 172 173 174


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


175
def streamline_whitespace(context):
176
    if context[-2].tag_name == TOKEN_PTYPE:
eckhart's avatar
eckhart committed
177
        return
178
    node = context[-1]
179
    assert node.tag_name in ['WSPC', ':Whitespace']
180 181
    s = node.content
    if s.find('%') >= 0:
182
        node.result = '\n'
183
        # c = s.find('%')
184 185
        # node.result = ('  ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
        # node.parser = MockParser('COMMENT', '')
186 187 188
    elif s.find('\n') >= 0:
        node.result = '\n'
    else:
189
        node.result = ' ' if s else ''
190 191


192 193 194
def watch(node):
    print(node.as_sxpr())

eckhart's avatar
eckhart committed
195
flatten_structure = flatten(lambda context: is_one_of(
eckhart's avatar
eckhart committed
196
    context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
197
              "SubParagraphs", "sequence"}), recursive=True)
eckhart's avatar
eckhart committed
198 199 200


def is_commandname(context):
eckhart's avatar
eckhart committed
201 202
    """Returns True, if last node in the content represents a (potentially
    unknown) LaTeX-command."""
eckhart's avatar
eckhart committed
203
    node = context[-1]
204
    if node.tag_name == TOKEN_PTYPE:
eckhart's avatar
eckhart committed
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
        parent = context[-2]
        if len(parent.children) > 1:
            parent_name = parent.tag_name.lower()
            content = str(node)
            if (content == '\\' + parent_name
                or content == '\\begin{' + parent_name + '}'
                or content == '\\end{' + parent_name + '}'):
                return True
    return False


drop_expendables = remove_children_if(lambda context: is_empty(context) or
                                                      is_one_of(context, {'PARSEP', 'WSPC'}) or
                                                      is_commandname(context))

220

221 222
LaTeX_AST_transformation_table = {
    # AST Transformations for the LaTeX-grammar
223
    "<": [flatten_anonymous_nodes, flatten_structure],
224
    "latexdoc": [],
225
    "preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
eckhart's avatar
eckhart committed
226
    "document": [flatten_structure],
eckhart's avatar
eckhart committed
227
    "pdfinfo": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
228
    "frontpages": reduce_single_child,
229
    "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
eckhart's avatar
eckhart committed
230
    "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
231
    "heading": reduce_single_child,
232 233
    "Bibliography": [],
    "Index": [],
234 235
    "block_environment": replace_by_single_child,
    "known_environment": replace_by_single_child,
236
    "generic_block": [],
237
    "begin_generic_block, end_generic_block": [remove_nodes('NEW_LINE'), replace_by_single_child],
238
    "itemize, enumerate": [remove_brackets, flatten],
eckhart's avatar
eckhart committed
239
    "item": [],
240
    "figure": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
241
    "quotation": [reduce_single_child, remove_brackets],
242
    "verbatim": [],
eckhart's avatar
eckhart committed
243
    "tabular": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
244
    "tabular_config, block_of_paragraphs": [remove_brackets, reduce_single_child],
eckhart's avatar
eckhart committed
245 246 247
    "tabular_row": [flatten, remove_tokens('&', '\\')],
    "tabular_cell": [flatten, remove_whitespace],
    "multicolumn": [remove_tokens('{', '}')],
Eckhart Arnold's avatar
Eckhart Arnold committed
248
    "hline": [remove_whitespace, reduce_single_child],
249 250
    "sequence": [flatten],
    "paragraph": [flatten],
251 252 253 254
    "text_element": replace_by_single_child,
    "line_element": replace_by_single_child,
    "inline_environment": replace_by_single_child,
    "known_inline_env": replace_by_single_child,
255
    "generic_inline_env": [],
256
    "begin_inline_env, end_inline_env": [replace_by_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
257 258
    "begin_environment, end_environment": [remove_brackets, reduce_single_child],
    "inline_math": [remove_brackets, reduce_single_child],
259 260
    "command": replace_by_single_child,
    "known_command": replace_by_single_child,
Eckhart Arnold's avatar
Eckhart Arnold committed
261
    "text_command": [],
262
    "generic_command": [flatten],
263
    "citet, citep": [],
264 265 266
    "footnote": [],
    "includegraphics": [],
    "caption": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
267
    "config": [remove_brackets, reduce_single_child],
268
    "block": [remove_brackets, flatten, replace_by_single_child],
269 270
    "text": collapse,
    "no_command, blockcmd": [],
271
    "structural": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
272 273 274
    "CMDNAME": [remove_whitespace, reduce_single_child],
    "TXTCOMMAND": [remove_whitespace, reduce_single_child],
    "NAME": [reduce_single_child, remove_whitespace, reduce_single_child],
275
    "ESCAPED": [replace_content(lambda node: str(node)[1:])],
276 277
    "BRACKETS": [],
    "TEXTCHUNK": [],
278 279
    "LF": [],
    "PARSEP": replace_content(lambda node: '\n\n'),
Eckhart Arnold's avatar
Eckhart Arnold committed
280
    "GAP": [],
281 282
    "LB": [],
    "BACKSLASH": [],
283
    "EOF": [],
284 285
    # "PARSEP": [replace_content_by('\n\n')],
    # "WSPC": [replace_content_by(' ')],
286
    ":Whitespace": streamline_whitespace,
287
    "*": replace_by_single_child
288 289
}

290

291 292
def LaTeXTransform() -> TransformationDict:
    return partial(traverse, processing_table=LaTeX_AST_transformation_table.copy())
293

294

295
def get_transformer() -> TransformationFunc:
296 297 298 299 300 301 302 303
    global thread_local_LaTeX_transformer_singleton
    try:
        transformer = thread_local_LaTeX_transformer_singleton
    except NameError:
        thread_local_LaTeX_transformer_singleton = LaTeXTransform()
        transformer = thread_local_LaTeX_transformer_singleton
    return transformer

304 305 306 307 308 309 310 311


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

312 313 314 315 316 317

def empty_defaultdict():
    """Returns a defaultdict with an empty defaultdict as default value."""
    return defaultdict(empty_defaultdict)


318 319 320
class LaTeXCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a LaTeX source file.
    """
321 322
    KNOWN_DOCUMENT_CLASSES = {'book', 'article'}
    KNOWN_LANGUAGES = {'english', 'german'}
323

eckhart's avatar
eckhart committed
324 325
    def __init__(self):
        super(LaTeXCompiler, self).__init__()
326
        self.metadata = defaultdict(empty_defaultdict)
327

328 329 330 331
    # def on_latexdoc(self, node):
    #     self.compile(node['preamble'])
    #     self.compile(node['document'])
    #     return node
332

333 334
    # def on_preamble(self, node):
    #     return node
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

    # def on_document(self, node):
    #     return node

    # def on_frontpages(self, node):
    #     return node

    # def on_Chapters(self, node):
    #     return node

    # def on_Chapter(self, node):
    #     return node

    # def on_Sections(self, node):
    #     return node

    # def on_Section(self, node):
    #     return node

    # def on_SubSections(self, node):
    #     return node

    # def on_SubSection(self, node):
    #     return node
359

360 361
    # def on_SubSubSections(self, node):
    #     return node
362

363 364
    # def on_SubSubSection(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
365

366 367
    # def on_Paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
368

369 370
    # def on_Paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
371

372 373
    # def on_SubParagraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
374

375 376
    # def on_SubParagraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
377

378 379
    # def on_Bibliography(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
380

381 382
    # def on_Index(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
383

384 385
    # def on_heading(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
386

387 388
    # def on_block_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
389

390 391
    # def on_known_environment(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
392

393 394
    # def on_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
395

396 397
    # def on_begin_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
398

399 400
    # def on_end_generic_block(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
401

402 403
    # def on_itemize(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
404

405 406
    # def on_enumerate(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
407

408 409
    # def on_item(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
410

411 412
    # def on_figure(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
413

414 415
    # def on_quotation(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
416

417 418
    # def on_verbatim(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
419

420 421
    # def on_tabular(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
422

423 424
    # def on_tabular_row(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
425

426 427
    # def on_tabular_cell(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
428

429 430
    # def on_tabular_config(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
431

432 433
    # def on_block_of_paragraphs(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
434

435 436
    # def on_sequence(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
437

438 439
    # def on_paragraph(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
440

441 442
    # def on_text_element(self, node):
    #     return node
443

444 445
    # def on_line_element(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
446

447 448
    # def on_inline_environment(self, node):
    #     return node
449

450 451
    # def on_known_inline_env(self, node):
    #     return node
452

453 454
    # def on_generic_inline_env(self, node):
    #     return node
455

456 457
    # def on_begin_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
458

459 460
    # def on_end_inline_env(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
461

462 463
    # def on_begin_environment(self, node):
    #     return node
464

465 466
    # def on_end_environment(self, node):
    #     return node
467

468 469
    # def on_inline_math(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
470

471 472
    # def on_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
473

474 475
    # def on_known_command(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
476

477 478
    # def on_text_command(self, node):
    #     return node
479

480 481
    # def on_generic_command(self, node):
    #     return node
482

483 484
    # def on_footnote(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
485

486 487
    # def on_includegraphics(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
488

489 490
    # def on_caption(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
491

492 493
    # def on_multicolumn(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
494

495 496
    # def on_hline(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
497

498 499
    # def on_cline(self, node):
    #     return node
500

501
    def on_documentclass(self, node):
eckhart's avatar
eckhart committed
502 503 504 505
        """
        Saves the documentclass (if known) and the language (if given)
        in the metadata dictionary.
        """
506 507 508
        if 'config' in node:
            for it in {part.strip() for part in node['config'].content.split(',')}:
                if it in self.KNOWN_LANGUAGES:
509
                    if 'language' in node.attr:
510 511
                        self.metadata['language'] = it
                    else:
eckhart's avatar
eckhart committed
512
                        self.tree.new_error(node, 'Only one document language supported. '
513 514
                                            'Using %s, ignoring %s.'
                                            % (self.metadata['language'], it), Error.WARNING)
515 516 517 518 519 520 521
        if node['text'] in self.KNOWN_DOCUMENT_CLASSES:
            self.metadata['documentclass'] = node['text']
        return node

    def on_pdfinfo(self, node):
        return node

522 523
    # def on_config(self, node):
    #     return node
524

525 526
    # def on_cfg_text(self, node):
    #     return node
527

528 529
    # def on_block(self, node):
    #     return node
530

531 532
    # def on_text(self, node):
    #     return node
533

534 535
    # def on_no_command(self, node):
    #     return node
536

537 538
    # def on_blockcmd(self, node):
    #     return node
539

540 541
    # def on_structural(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
542

543 544
    # def on_CMDNAME(self, node):
    #     return node
545

546 547
    # def on_TXTCOMMAND(self, node):
    #     return node
548

549 550
    # def on_ESCAPED(self, node):
    #     return node
551

552 553
    # def on_SPECIAL(self, node):
    #     return node
554

555 556
    # def on_BRACKETS(self, node):
    #     return node
557

558 559
    # def on_LINEFEED(self, node):
    #     return node
560

561 562
    # def on_NAME(self, node):
    #     return node
563

564 565
    # def on_INTEGER(self, node):
    #     return node
566

567 568
    # def on_TEXTCHUNK(self, node):
    #     return node
569

570 571
    # def on_LF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
572

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
    # def on_LFF(self, node):
    #     return node

    # def on_PARSEP(self, node):
    #     return node

    # def on_WSPC(self, node):
    #     return node

    # def on_GAP(self, node):
    #     return node

    # def on_NEW_LINE(self, node):
    #     return node

    # def on_LB(self, node):
    #     return node

    # def on_BACKSLASH(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node
Eckhart Arnold's avatar
Eckhart Arnold committed
596

597

eckhart's avatar
eckhart committed
598
def get_compiler() -> LaTeXCompiler:
599 600 601 602
    global thread_local_LaTeX_compiler_singleton
    try:
        compiler = thread_local_LaTeX_compiler_singleton
    except NameError:
eckhart's avatar
eckhart committed
603
        thread_local_LaTeX_compiler_singleton = LaTeXCompiler()
604 605
        compiler = thread_local_LaTeX_compiler_singleton
    return compiler
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
622 623
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
        result = compile_source(source, get_preprocessor(), 
624 625 626 627 628 629 630 631 632 633 634 635 636
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
Eckhart Arnold's avatar
Eckhart Arnold committed
637
            print(result.as_xml() if isinstance(result, Node) else result)
638 639
    else:
        print("Usage: LaTeXCompiler.py [FILENAME]")