Commit e3c33e3d authored by eckhart's avatar eckhart
Browse files

- small changes to documentation and LaTeX example

parent bebeac02
......@@ -132,6 +132,9 @@ of our "example.dsl"-document according the grammar specified in "poetry.ebnf"
If you see the pseudo-XML on screen, the setup of the new DHParser-project
has been successful.
Developing a DHParser-project
=============================
Understanding how compilation of DSL-documents with DHParser works
------------------------------------------------------------------
......
......@@ -48,187 +48,7 @@ def get_preprocessor() -> PreprocessorFunc:
#######################################################################
class LaTeXGrammar(Grammar):
r"""Parser for a LaTeX source file, with this grammar:
# LaTeX-Grammar for DHParser
# preamble
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*/
########################################################################
#
# outer document structure
#
########################################################################
latexdoc = preamble document
preamble = { [WSPC] command }+
document = [WSPC] "\begin{document}"
frontpages
(Chapters | Sections)
[Bibliography] [Index] [WSPC]
"\end{document}" [WSPC] §EOF
frontpages = sequence
#######################################################################
#
# document structure
#
#######################################################################
Chapters = { [WSPC] Chapter }+
Chapter = "\chapter" heading { sequence | Sections }
Sections = { [WSPC] Section }+
Section = "\section" heading { sequence | SubSections }
SubSections = { [WSPC] SubSection }+
SubSection = "\subsection" heading { sequence | SubSubSections }
SubSubSections = { [WSPC] SubSubSection }+
SubSubSection = "\subsubsection" heading { sequence | Paragraphs }
Paragraphs = { [WSPC] Paragraph }+
Paragraph = "\paragraph" heading { sequence | SubParagraphs }
SubParagraphs = { [WSPC] SubParagraph }+
SubParagraph = "\subparagraph" heading [ sequence ]
Bibliography = [WSPC] "\bibliography" heading
Index = [WSPC] "\printindex"
heading = block
#######################################################################
#
# document content
#
#######################################################################
#### block environments ####
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | tabular | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment LFF
end_generic_block = -&LB end_environment LFF
itemize = "\begin{itemize}" [WSPC] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [WSPC] {item } §"\end{enumerate}"
item = "\item" sequence
figure = "\begin{figure}" sequence §"\end{figure}"
quotation = ("\begin{quotation}" sequence §"\end{quotation}")
| ("\begin{quote}" sequence §"\end{quote}")
verbatim = "\begin{verbatim}" sequence §"\end{verbatim}"
tabular = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
tabular_row = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
"\\" ( hline | { cline } )
tabular_cell = { line_element //~ }
tabular_config = "{" /[lcr|]+/~ §"}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = "{" [sequence] §"}"
sequence = [WSPC] { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element //~ }+
text_element = line_element | LINEFEED
line_element = text | block | inline_environment | command
#### inline enivronments ####
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_inline_env //~ paragraph §end_inline_env
begin_inline_env = (-!LB begin_environment) | (begin_environment !LFF)
end_inline_env = end_environment
## (-!LB end_environment) | (end_environment !LFF) # ambiguity with generic_block when EOF
begin_environment = /\\begin{/ §NAME /}/
end_environment = /\\end{/ §::NAME /}/
inline_math = /\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | text_command | generic_command
known_command = citet | citep | footnote | includegraphics | caption
| multicolumn | hline | cline | documentclass | pdfinfo
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
citet = "\citet" [config] block
citep = ("\citep" | "\cite") [config] block
footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" [ config ] block
caption = "\caption" block
multicolumn = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
hline = "\hline"
cline = "\cline{" INTEGER "-" INTEGER "}"
documentclass = "\documentclass" [ config ] block
pdfinfo = "\pdfinfo" block
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
config = "[" cfg_text §"]"
cfg_text = { ([//~] text) | CMDNAME | SPECIAL }
block = /{/ //~ { !blockcmd text_element //~ } §/}/
text = TEXTCHUNK { //~ TEXTCHUNK }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
( "enumerate" | "itemize" | "figure" | "quote"
| "quotation" | "tabular") "}"
| structural | begin_generic_block | end_generic_block )
structural = "subsection" | "section" | "chapter" | "subsubsection"
| "paragraph" | "subparagraph" | "item"
#######################################################################
#
# primitives
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
SPECIAL = /[$&_\\\\\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
LINEFEED = /[\\][\\]/
NAME = /\w+/~
INTEGER = /\d+/~
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
BACKSLASH = /[\\]/
EOF = /(?!.)/ # End-Of-File
r"""Parser for a LaTeX source file.
"""
begin_generic_block = Forward()
block_environment = Forward()
......@@ -435,6 +255,7 @@ LaTeX_AST_transformation_table = {
"known_command": replace_by_single_child,
"text_command": [],
"generic_command": [flatten],
"citet, citep": [],
"footnote": [],
"includegraphics": [],
"caption": [],
......
......@@ -32,7 +32,7 @@ import DHParser.log
from DHParser.log import log_parsing_history
LOGGING = False
LOGGING = True
if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles Grammar only if it has changed
print('\nErrors while recompiling "LaTeX.ebnf":\n--------------------------------------\n\n')
......@@ -71,7 +71,7 @@ def tst_func():
if DHParser.log.is_logging():
print('Saving CST')
with open('REPORT/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
f.write(result.as_sxpr(compact=True))
print('Saving parsing history')
log_parsing_history(parser, os.path.basename(file), html=True)
......@@ -82,7 +82,7 @@ def tst_func():
if DHParser.log.is_logging():
print('Saving AST')
with open('LOGS/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=False))
f.write(result.as_sxpr(compact=True))
print('\nCompiling document: "%s"' % file)
output = compiler(result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment