Commit ce40ba38 authored by eckhart's avatar eckhart
Browse files

- LaTeX tree transformations extended

parent e4813a41
...@@ -437,7 +437,7 @@ class EBNFCompiler(Compiler): ...@@ -437,7 +437,7 @@ class EBNFCompiler(Compiler):
if rule.startswith('Alternative'): if rule.startswith('Alternative'):
transformations = '[replace_or_reduce]' transformations = '[replace_or_reduce]'
elif rule.startswith('Synonym'): elif rule.startswith('Synonym'):
transformations = '[replace_by_single_child]' transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations) transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,') transtable.append(' ":Token, :RE": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', ''] transtable += [' "*": replace_by_single_child', '}', '']
......
...@@ -384,7 +384,7 @@ class Parser(ParserBase): ...@@ -384,7 +384,7 @@ class Parser(ParserBase):
There are two different types of parsers: There are two different types of parsers:
1. *Named parsers* for which a name is set in field parser.name. 1. *Named parsers* for which a name is set in field `parser.name`.
The results produced by these parsers can later be retrieved in The results produced by these parsers can later be retrieved in
the AST by the parser name. the AST by the parser name.
......
...@@ -28,26 +28,27 @@ frontpages = sequence ...@@ -28,26 +28,27 @@ frontpages = sequence
####################################################################### #######################################################################
Chapters = { Chapter [WSPC] }+ Chapters = { Chapter [WSPC] }+
Chapter = "\chapter" block [WSPC] { sequence | Sections } Chapter = "\chapter" heading [WSPC] { sequence | Sections }
Sections = { Section [WSPC] }+ Sections = { Section [WSPC] }+
Section = "\section" block [WSPC] { sequence | SubSections } Section = "\section" heading [WSPC] { sequence | SubSections }
SubSections = { SubSection [WSPC] }+ SubSections = { SubSection [WSPC] }+
SubSection = "\subsection" block [WSPC] { sequence | SubSubSections } SubSection = "\subsection" heading [WSPC] { sequence | SubSubSections }
SubSubSections = { SubSubSection [WSPC] }+ SubSubSections = { SubSubSection [WSPC] }+
SubSubSection = "\subsubsection" block [WSPC] { sequence | Paragraphs } SubSubSection = "\subsubsection" heading [WSPC] { sequence | Paragraphs }
Paragraphs = { Paragraph [WSPC] }+ Paragraphs = { Paragraph [WSPC] }+
Paragraph = "\paragraph" block [WSPC] { sequence | SubParagraphs } Paragraph = "\paragraph" heading [WSPC] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [WSPC] }+ SubParagraphs = { SubParagraph [WSPC] }+
SubParagraph = "\subparagraph" block [WSPC] [ sequence ] SubParagraph = "\subparagraph" heading [WSPC] [ sequence ]
Bibliography = "\bibliography" block [WSPC] Bibliography = "\bibliography" heading [WSPC]
Index = "\printindex" [WSPC] Index = "\printindex" [WSPC]
heading = block
####################################################################### #######################################################################
# #
......
...@@ -22,7 +22,8 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern ...@@ -22,7 +22,8 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
PreprocessorFunc, TransformationDict, \ PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \ Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
reduce_single_child, replace_by_single_child, remove_whitespace, \ reduce_single_child, replace_by_single_child, remove_whitespace, \
flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first flatten, is_empty, collapse, replace_content, remove_brackets, is_one_of, remove_first, \
remove_parser
####################################################################### #######################################################################
...@@ -77,26 +78,27 @@ class LaTeXGrammar(Grammar): ...@@ -77,26 +78,27 @@ class LaTeXGrammar(Grammar):
####################################################################### #######################################################################
Chapters = { Chapter [WSPC] }+ Chapters = { Chapter [WSPC] }+
Chapter = "\chapter" block [WSPC] { sequence | Sections } Chapter = "\chapter" heading [WSPC] { sequence | Sections }
Sections = { Section [WSPC] }+ Sections = { Section [WSPC] }+
Section = "\section" block [WSPC] { sequence | SubSections } Section = "\section" heading [WSPC] { sequence | SubSections }
SubSections = { SubSection [WSPC] }+ SubSections = { SubSection [WSPC] }+
SubSection = "\subsection" block [WSPC] { sequence | SubSubSections } SubSection = "\subsection" heading [WSPC] { sequence | SubSubSections }
SubSubSections = { SubSubSection [WSPC] }+ SubSubSections = { SubSubSection [WSPC] }+
SubSubSection = "\subsubsection" block [WSPC] { sequence | Paragraphs } SubSubSection = "\subsubsection" heading [WSPC] { sequence | Paragraphs }
Paragraphs = { Paragraph [WSPC] }+ Paragraphs = { Paragraph [WSPC] }+
Paragraph = "\paragraph" block [WSPC] { sequence | SubParagraphs } Paragraph = "\paragraph" heading [WSPC] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [WSPC] }+ SubParagraphs = { SubParagraph [WSPC] }+
SubParagraph = "\subparagraph" block [WSPC] [ sequence ] SubParagraph = "\subparagraph" heading [WSPC] [ sequence ]
Bibliography = "\bibliography" block [WSPC] Bibliography = "\bibliography" heading [WSPC]
Index = "\printindex" [WSPC] Index = "\printindex" [WSPC]
heading = block
####################################################################### #######################################################################
# #
...@@ -228,7 +230,7 @@ class LaTeXGrammar(Grammar): ...@@ -228,7 +230,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward() paragraph = Forward()
tabular_config = Forward() tabular_config = Forward()
text_element = Forward() text_element = Forward()
source_hash__ = "70184539c7bcb0d72fcd390a8aade40b" source_hash__ = "fafffa29d26d712fde61c15c1a92dce8"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*' COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?' WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
...@@ -298,19 +300,20 @@ class LaTeXGrammar(Grammar): ...@@ -298,19 +300,20 @@ class LaTeXGrammar(Grammar):
generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2) generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim) known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block)) block_environment.set(Alternative(known_environment, generic_block))
heading = Synonym(block)
Index = Series(Token("\\printindex"), Option(WSPC)) Index = Series(Token("\\printindex"), Option(WSPC))
Bibliography = Series(Token("\\bibliography"), block, Option(WSPC)) Bibliography = Series(Token("\\bibliography"), heading, Option(WSPC))
SubParagraph = Series(Token("\\subparagraph"), block, Option(WSPC), Option(sequence)) SubParagraph = Series(Token("\\subparagraph"), heading, Option(WSPC), Option(sequence))
SubParagraphs = OneOrMore(Series(SubParagraph, Option(WSPC))) SubParagraphs = OneOrMore(Series(SubParagraph, Option(WSPC)))
Paragraph = Series(Token("\\paragraph"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubParagraphs))) Paragraph = Series(Token("\\paragraph"), heading, Option(WSPC), ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Paragraph, Option(WSPC))) Paragraphs = OneOrMore(Series(Paragraph, Option(WSPC)))
SubSubSection = Series(Token("\\subsubsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Paragraphs))) SubSubSection = Series(Token("\\subsubsection"), heading, Option(WSPC), ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSections = OneOrMore(Series(SubSubSection, Option(WSPC))) SubSubSections = OneOrMore(Series(SubSubSection, Option(WSPC)))
SubSection = Series(Token("\\subsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSubSections))) SubSection = Series(Token("\\subsection"), heading, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSections = OneOrMore(Series(SubSection, Option(WSPC))) SubSections = OneOrMore(Series(SubSection, Option(WSPC)))
Section = Series(Token("\\section"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSections))) Section = Series(Token("\\section"), heading, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSections)))
Sections = OneOrMore(Series(Section, Option(WSPC))) Sections = OneOrMore(Series(Section, Option(WSPC)))
Chapter = Series(Token("\\chapter"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Sections))) Chapter = Series(Token("\\chapter"), heading, Option(WSPC), ZeroOrMore(Alternative(sequence, Sections)))
Chapters = OneOrMore(Series(Chapter, Option(WSPC))) Chapters = OneOrMore(Series(Chapter, Option(WSPC)))
frontpages = Synonym(sequence) frontpages = Synonym(sequence)
document = Series(Option(WSPC), Token("\\begin{document}"), Option(WSPC), frontpages, Option(WSPC), Alternative(Chapters, Sections), Option(WSPC), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), EOF, mandatory=12) document = Series(Option(WSPC), Token("\\begin{document}"), Option(WSPC), frontpages, Option(WSPC), Alternative(Chapters, Sections), Option(WSPC), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), EOF, mandatory=12)
...@@ -362,24 +365,16 @@ LaTeX_AST_transformation_table = { ...@@ -362,24 +365,16 @@ LaTeX_AST_transformation_table = {
"preamble": [], "preamble": [],
"document": [], "document": [],
"frontpages": reduce_single_child, "frontpages": reduce_single_child,
"Chapters": [], "Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs": [],
"Chapter": [], "Chapter, Section, SubSection, SubSubSection, Paragraph, SubParagraph":
"Sections": [], [remove_first, flatten(is_anonymous, False)],
"Section": [], "heading": reduce_single_child,
"SubSections": [],
"SubSection": [],
"SubSubSections": [],
"SubSubSection": [],
"Paragraphs": [],
"Paragraph": [],
"SubParagraphs": [],
"SubParagraph": [],
"Bibliography": [], "Bibliography": [],
"Index": [], "Index": [],
"block_environment": replace_by_single_child, "block_environment": replace_by_single_child,
"known_environment": replace_by_single_child, "known_environment": replace_by_single_child,
"generic_block": [], "generic_block": [],
"begin_generic_block, end_generic_block": replace_by_single_child, "begin_generic_block, end_generic_block": [remove_parser('NEW_LINE'), replace_by_single_child],
"itemize, enumerate": [remove_brackets, flatten], "itemize, enumerate": [remove_brackets, flatten],
"item": [remove_first], "item": [remove_first],
"figure": [], "figure": [],
...@@ -405,8 +400,8 @@ LaTeX_AST_transformation_table = { ...@@ -405,8 +400,8 @@ LaTeX_AST_transformation_table = {
"footnote": [], "footnote": [],
"includegraphics": [], "includegraphics": [],
"caption": [], "caption": [],
"config": [remove_brackets], "config": [remove_brackets, reduce_single_child],
"block": [remove_brackets, flatten], "block": [remove_brackets, flatten, replace_by_single_child],
"text": collapse, "text": collapse,
"no_command, blockcmd": [], "no_command, blockcmd": [],
"structural": [], "structural": [],
......
...@@ -20,6 +20,7 @@ limitations under the License. ...@@ -20,6 +20,7 @@ limitations under the License.
""" """
import cProfile as profile import cProfile as profile
import fnmatch
import os import os
import pstats import pstats
import sys import sys
...@@ -54,7 +55,7 @@ def tst_func(): ...@@ -54,7 +55,7 @@ def tst_func():
files = os.listdir('testdata') files = os.listdir('testdata')
files.sort() files.sort()
for file in files: for file in files:
if file.lower().endswith('.tex') and file.lower().find('error') < 0: if fnmatch.fnmatch(file, '*1.tex') and file.lower().find('error') < 0:
with open(os.path.join('testdata', file), 'r', encoding='utf-8') as f: with open(os.path.join('testdata', file), 'r', encoding='utf-8') as f:
doc = f.read() doc = f.read()
print('\n\nParsing document: "%s"\n' % file) print('\n\nParsing document: "%s"\n' % file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment