Commit efb8af4d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- LaTeX ebnf zusätzlich erweitert

parent 968f1acc
......@@ -10,8 +10,8 @@ preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP]
frontpages [PARSEP]
(chapters | sections) [PARSEP]
[bibliography] [index] [PARSEP]
(Chapters | Sections) [PARSEP]
[Bibliography] [Index] [PARSEP]
"\end{document}" [PARSEP] §EOF
frontpages = sequence
......@@ -31,7 +31,7 @@ Section = "\Section" block [PARSEP] { sequence | SubSections }
SubSections = { SubSection [PARSEP] }+
SubSection = "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSubsections = { SubSubSection [PARSEP] }+
SubSubSections = { SubSubSection [PARSEP] }+
SubSubSection = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }
Paragraphs = { Paragraph [PARSEP] }+
......@@ -40,8 +40,8 @@ Paragraph = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [PARSEP] }+
SubParagraph = "\subparagpaph" block [PARSEP] { sequence }
bibliography = "\bibliography" block [PARSEP]
index = "\printindex" [PARSEP
Bibliography = "\bibliography" block [PARSEP]
Index = "\printindex" [PARSEP]
#######################################################################
......@@ -54,37 +54,45 @@ index = "\printindex" [PARSEP
#### block environments ####
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table
known_enrivonment = itemize | enumerate | figure | table | quotation
| verbatim
generic_enrivonment = begin_enrivonment sequence §end_enrivonment
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item = "\item" [PARSEP] sequence
figure = "\begin{figure}" sequence "\end{figure}"
quotation = ("\begin{quotation}" sequence "\end{quotation}")
| ("\begin{quote}" sequence "\end{quote}")
verbatim = "\begin{verbatim}" sequence "\end{verbatim}"
table = "\begin{tabular}" table_config sequence "\end{tabular}"
table_config = "{" /[lcr|]+/~ "}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_enrivonment ) [PARSEP] }+
paragraph = { !blockcmd textelements //~ }+
textelemts = command | text | block | inline_enrivonment
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_enrivonment
#### inline enivronments ####
inline_enrivonment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_enrivonment { textelements }+ §end_enrivonment
generic_inline_env = begin_enrivonment { text_elements }+ §end_enrivonment
begin_enrivonment = "\begin{" §NAME §"}"
end_enrivonment = "\end{" §::NAME §"}"
inline_math = "$" math_Text "$"
inline_math = "$" MATH "$"
#### commands ####
command = knownd_command | generic_command
command = known_command | generic_command
known_command = footnote
generic_command = CMDNAME [[ //~ config ] //~ block ]
......@@ -98,15 +106,16 @@ footnote = "\footnote" block_of_paragraphs
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { textelements } §/}/
block = /{/ { text_elements } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
| "\paragraph" | "\subparagraph" | "\item"
| "\begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
#######################################################################
......@@ -117,6 +126,7 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
......
......@@ -7,24 +7,23 @@
#######################################################################
from functools import partial
import os
import sys
from functools import partial
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
from DHParser.toolkit import logging, is_filename
from DHParser.parsers import Grammar, Compiler, Alternative, Pop, Required, Token, Synonym, \
Optional, OneOrMore, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_brackets, keep_children, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, join, \
collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformationFunc, \
remove_empty, replace_parser, apply_if
ScannerFunc
from DHParser.syntaxtree import traverse, remove_brackets, reduce_single_child, replace_by_single_child, \
remove_expendables, flatten, join, \
collapse, replace_content, TransformationFunc, \
remove_empty
#######################################################################
......@@ -49,42 +48,135 @@ def get_scanner() -> ScannerFunc:
class LaTeXGrammar(Grammar):
r"""Parser for a LaTeX source file, with this grammar:
# latex Grammar
# LaTeX-Grammar for DHParser
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP]
frontpages [PARSEP]
(Chapters | Sections) [PARSEP]
[Bibliography] [Index] [PARSEP]
"\end{document}" [PARSEP] §EOF
frontpages = sequence
#######################################################################
#
# document structure
#
#######################################################################
Chapters = { Chapter [PARSEP] }+
Chapter = "\Chapter" block [PARSEP] { sequence | Sections }
Sections = { Section [PARSEP] }+
Section = "\Section" block [PARSEP] { sequence | SubSections }
SubSections = { SubSection [PARSEP] }+
SubSection = "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSubSections = { SubSubSection [PARSEP] }+
SubSubSection = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }
Paragraphs = { Paragraph [PARSEP] }+
Paragraph = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [PARSEP] }+
SubParagraph = "\subparagpaph" block [PARSEP] { sequence }
Bibliography = "\bibliography" block [PARSEP]
Index = "\printindex" [PARSEP]
#######################################################################
#
# document content
#
#######################################################################
#### block environments ####
blockenv = beginenv sequence §endenv
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table | quotation
| verbatim
generic_enrivonment = begin_enrivonment sequence §end_enrivonment
parblock = "{" sequence §"}"
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item = "\item" [PARSEP] sequence
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) //~ }+
figure = "\begin{figure}" sequence "\end{figure}"
quotation = ("\begin{quotation}" sequence "\end{quotation}")
| ("\begin{quote}" sequence "\end{quote}")
verbatim = "\begin{verbatim}" sequence "\end{verbatim}"
table = "\begin{tabular}" table_config sequence "\end{tabular}"
table_config = "{" /[lcr|]+/~ "}"
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ //~ config ] //~ block ]
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_enrivonment ) [PARSEP] }+
paragraph = { !blockcmd text_elements //~ }+
text_elements = command | text | block | inline_enrivonment
#### inline enivronments ####
inline_enrivonment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_enrivonment { text_elements }+ §end_enrivonment
begin_enrivonment = "\begin{" §NAME §"}"
end_enrivonment = "\end{" §::NAME §"}"
inline_math = "$" MATH "$"
#### commands ####
command = known_command | generic_command
known_command = footnote
generic_command = CMDNAME [[ //~ config ] //~ block ]
footnote = "\footnote" block_of_paragraphs
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { command | text | block } §/}/
block = /{/ { text_elements } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
| "\paragraph" | "\subparagraph" | "\item"
| "\begin{" ("enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
#######################################################################
#
# Primitives
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
MATH = /[\w_^{}[\]]*/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
......@@ -94,12 +186,12 @@ class LaTeXGrammar(Grammar):
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
"""
block = Forward()
command = Forward()
source_hash__ = "936e76e84dd027b0af532abfad617d15"
block_enrivonment = Forward()
block_of_paragraphs = Forward()
text_elements = Forward()
source_hash__ = "484ed98c05f7142c72f06d7c31e61089"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
......@@ -112,23 +204,76 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = RE('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+', wR='')
BRACKETS = RE('[\\[\\]]', wR='')
ESCAPED = RE('\\\\[%$&_/]', wR='')
MATH = RE('[\\w_^{}[\\]]*')
NAME = Capture(RE('\\w+'))
CMDNAME = RE('\\\\(?:(?!_)\\w)+')
blockcmd = Alternative(Token("\\subsection"), Token("\\section"), Token("\\chapter"), Token("\\subsubsection"), Token("\\paragraph"), Token("\\subparagraph"), Token("\\begin{enumerate}"), Token("\\begin{itemize}"), Token("\\item"), Token("\\begin{figure}"))
blockcmd = Alternative(Token("\\subsection"), Token("\\section"), Token("\\chapter"), Token("\\subsubsection"),
Token("\\paragraph"), Token("\\subparagraph"), Token("\\item"), Series(Token("\\begin{"),
Alternative(Token(
"enumerate"),
Token(
"itemize"),
Token(
"figure"),
Token(
"quote"),
Token(
"quotation"),
Token(
"tabular")),
Token("}")))
word_sequence = OneOrMore(Series(TEXTCHUNK, RE('')))
cfgtext = OneOrMore(Alternative(word_sequence, Series(ESCAPED, RE(''))))
text = OneOrMore(Alternative(cfgtext, Series(BRACKETS, RE(''))))
block.set(Series(RE('{', wR=''), ZeroOrMore(Alternative(command, text, block)), Required(RE('}', wR=''))))
block = Series(RE('{', wR=''), ZeroOrMore(text_elements), Required(RE('}', wR='')))
config = Series(Token("["), cfgtext, Required(Token("]")))
command.set(Series(CMDNAME, Optional(Series(Optional(Series(RE(''), config)), RE(''), block))))
endenv = Series(Token("\\end{"), Required(Pop(NAME)), Required(Token("}")))
beginenv = Series(Token("\\begin{"), Required(NAME), Required(Token("}")))
inlineenv = Series(beginenv, OneOrMore(Alternative(command, block, text)), endenv)
paragraph = OneOrMore(Series(NegativeLookahead(blockcmd), Alternative(command, block, text), RE('')))
sequence = OneOrMore(Series(paragraph, Optional(PARSEP)))
parblock = Series(Token("{"), sequence, Required(Token("}")))
blockenv = Series(beginenv, sequence, Required(endenv))
document = Series(Optional(PARSEP), ZeroOrMore(Series(Optional(PARSEP), paragraph)), Required(EOF))
footnote = Series(Token("\\footnote"), block_of_paragraphs)
generic_command = Series(CMDNAME, Optional(Series(Optional(Series(RE(''), config)), RE(''), block)))
known_command = Synonym(footnote)
command = Alternative(known_command, generic_command)
inline_math = Series(Token("$"), MATH, Token("$"))
end_enrivonment = Series(Token("\\end{"), Required(Pop(NAME)), Required(Token("}")))
begin_enrivonment = Series(Token("\\begin{"), Required(NAME), Required(Token("}")))
generic_inline_env = Series(begin_enrivonment, OneOrMore(text_elements), Required(end_enrivonment))
known_inline_env = Synonym(inline_math)
inline_enrivonment = Alternative(known_inline_env, generic_inline_env)
text_elements.set(Alternative(command, text, block, inline_enrivonment))
paragraph = OneOrMore(Series(NegativeLookahead(blockcmd), text_elements, RE('')))
sequence = OneOrMore(Series(Alternative(paragraph, block_enrivonment), Optional(PARSEP)))
block_of_paragraphs.set(Series(RE('{', wR=''), sequence, Required(RE('}', wR=''))))
table_config = Series(Token("{"), RE('[lcr|]+'), Token("}"))
table = Series(Token("\\begin{tabular}"), table_config, sequence, Token("\\end{tabular}"))
verbatim = Series(Token("\\begin{verbatim}"), sequence, Token("\\end{verbatim}"))
quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Token("\\end{quotation}")),
Series(Token("\\begin{quote}"), sequence, Token("\\end{quote}")))
figure = Series(Token("\\begin{figure}"), sequence, Token("\\end{figure}"))
item = Series(Token("\\item"), Optional(PARSEP), sequence)
enumerate = Series(Token("\\begin{enumerate}"), Optional(PARSEP), ZeroOrMore(item),
Required(Token("end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{itemize}")))
generic_enrivonment = Series(begin_enrivonment, sequence, Required(end_enrivonment))
known_enrivonment = Alternative(itemize, enumerate, figure, table, quotation, verbatim)
block_enrivonment.set(Alternative(known_enrivonment, generic_enrivonment))
Index = Series(Token("\\printindex"), Optional(PARSEP))
Bibliography = Series(Token("\\bibliography"), block, Optional(PARSEP))
SubParagraph = Series(Token("\\subparagpaph"), block, Optional(PARSEP), ZeroOrMore(sequence))
SubParagraphs = OneOrMore(Series(SubParagraph, Optional(PARSEP)))
Paragraph = Series(Token("\\paragraph"), block, Optional(PARSEP), ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Paragraph, Optional(PARSEP)))
SubSubSection = Series(Token("\\SubSubSection"), block, Optional(PARSEP),
ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSections = OneOrMore(Series(SubSubSection, Optional(PARSEP)))
SubSection = Series(Token("\\SubSection"), block, Optional(PARSEP),
ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSections = OneOrMore(Series(SubSection, Optional(PARSEP)))
Section = Series(Token("\\Section"), block, Optional(PARSEP), ZeroOrMore(Alternative(sequence, SubSections)))
Sections = OneOrMore(Series(Section, Optional(PARSEP)))
Chapter = Series(Token("\\Chapter"), block, Optional(PARSEP), ZeroOrMore(Alternative(sequence, Sections)))
Chapters = OneOrMore(Series(Chapter, Optional(PARSEP)))
frontpages = Synonym(sequence)
document = Series(Optional(PARSEP), Token("\\begin{document}"), Optional(PARSEP), frontpages, Optional(PARSEP),
Alternative(Chapters, Sections), Optional(PARSEP), Optional(Bibliography), Optional(Index),
Optional(PARSEP), Token("\\end{document}"), Optional(PARSEP), Required(EOF))
preamble = OneOrMore(command)
latexdoc = Series(preamble, document)
root__ = latexdoc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment