Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
db9e1654
Commit
db9e1654
authored
Jul 08, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- Extended LaTeX Grammar and tests
parent
e8c626df
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
427 additions
and
97 deletions
+427
-97
DHParser/dsl.py
DHParser/dsl.py
+1
-1
DHParser/ebnf.py
DHParser/ebnf.py
+3
-4
DHParser/parsers.py
DHParser/parsers.py
+4
-6
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+17
-25
DHParser/testing.py
DHParser/testing.py
+6
-9
DHParser/toolkit.py
DHParser/toolkit.py
+24
-41
examples/LaTeX/LaTeX.ebnf
examples/LaTeX/LaTeX.ebnf
+1
-1
examples/LaTeX/LaTeXCompiler.py
examples/LaTeX/LaTeXCompiler.py
+352
-0
examples/LaTeX/grammar_tests/test_paragraph.ini
examples/LaTeX/grammar_tests/test_paragraph.ini
+10
-5
examples/LaTeX/tst_grammar.py
examples/LaTeX/tst_grammar.py
+5
-2
test/test_parsers.py
test/test_parsers.py
+4
-3
No files found.
DHParser/dsl.py
View file @
db9e1654
...
...
@@ -85,7 +85,7 @@ from DHParser.syntaxtree import Node, traverse, remove_children_if, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
TransformationFunc, remove_parser, remove_content, remove_brackets,
\\
keep_children, has_name, has_content
keep_children, has_name, has_content
, apply_if
'''
...
...
DHParser/ebnf.py
View file @
db9e1654
...
...
@@ -71,8 +71,7 @@ def get_ebnf_scanner() -> ScannerFunc:
#
########################################################################
# TODO: Introduce dummy/rename-parser, for simple assignments (e.g. jahr = JAHRESZAHL) or substition!
# TODO: Raise Error for unconnected parsers!
class
EBNFGrammar
(
Grammar
):
r
"""Parser for an EBNF source file, with this grammar:
...
...
@@ -194,8 +193,6 @@ def get_ebnf_grammar() -> EBNFGrammar:
########################################################################
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_transformation_table
=
{
# AST Transformations for EBNF-grammar
"+"
:
...
...
@@ -300,6 +297,8 @@ class EBNFCompilerError(Exception):
pass
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!? Is this possible at compile time?
class
EBNFCompiler
(
Compiler
):
"""Generates a Parser from an abstract syntax tree of a grammar specified
in EBNF-Notation.
...
...
DHParser/parsers.py
View file @
db9e1654
...
...
@@ -188,7 +188,7 @@ def add_parser_guard(parser_func):
# if location has already been visited by the current parser,
# return saved result
if
location
in
parser
.
visited
:
return
parser
.
visited
[
location
]
# TODO: might not work with Capture-Retrieve-Pop-Parsers!!!
return
parser
.
visited
[
location
]
# break left recursion at the maximum allowed depth
if
parser
.
recursion_counter
.
setdefault
(
location
,
0
)
>
LEFT_RECURSION_DEPTH
:
return
None
,
text
...
...
@@ -200,7 +200,8 @@ def add_parser_guard(parser_func):
if
node
is
not
None
:
# in case of a recursive call saves the result of the first
# (or left-most) call that matches; but not for variable manipulating parsers,
# (or left-most) call that matches
# variable manipulating parsers will be excluded, though,
# because caching would interfere with changes of variable state
if
grammar
.
last_rb__loc__
>
location
:
parser
.
visited
[
location
]
=
(
node
,
rest
)
...
...
@@ -970,13 +971,10 @@ class Alternative(NaryOperator):
self
.
been_here
=
dict
()
# type: Dict[int, int]
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
location
=
len
(
text
)
pindex
=
self
.
been_here
.
setdefault
(
location
,
0
)
for
parser
in
self
.
parsers
[
pindex
:]:
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text
)
if
node
:
return
Node
(
self
,
node
),
text_
# self.been_here[location] += 1
return
None
,
text
def
__repr__
(
self
):
...
...
DHParser/syntaxtree.py
View file @
db9e1654
...
...
@@ -52,6 +52,7 @@ __all__ = ['WHITESPACE_PTYPE',
'collapse'
,
'join'
,
'replace_content'
,
'apply_if'
,
'is_whitespace'
,
'is_empty'
,
'is_expendable'
,
...
...
@@ -758,6 +759,14 @@ def join(node, tag_names: List[str]):
# ------------------------------------------------
@
transformation_factory
def
replace_content
(
node
,
func
:
Callable
):
# Callable[[Node], ResultType]
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
node
.
result
=
func
(
node
.
result
)
def
is_whitespace
(
node
):
"""Removes whitespace and comments defined with the
``@comment``-directive."""
...
...
@@ -787,6 +796,14 @@ def has_content(node, contents: AbstractSet[str]) -> bool:
return
str
(
node
)
in
contents
@
transformation_factory
def
apply_if
(
node
,
transformation
:
Callable
,
condition
:
Callable
):
"""Applies a transformation only if a certain condition is met.
"""
if
condition
(
node
):
transformation
(
node
)
@
transformation_factory
def
keep_children
(
node
,
section
:
slice
=
slice
(
None
,
None
,
None
),
condition
=
lambda
node
:
True
):
"""Keeps only the nodes which fall into a slice of the result field
...
...
@@ -809,23 +826,6 @@ remove_empty = remove_children_if(is_empty)
remove_expendables
=
remove_children_if
(
is_expendable
)
# partial(remove_children_if, condition=is_expendable)
remove_brackets
=
keep_children
(
slice
(
1
,
-
1
))
# @transformation_factory(Callable)
# def remove_first(node, condition=lambda node: True):
# """Removes the first child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[0]):
# node.result = node.result[1:]
#
#
# @transformation_factory(Callable)
# def remove_last(node, condition=lambda node: True):
# """Removes the last child if the condition is met.
# Otherwise does nothing."""
# if node.children:
# if condition(node.children[-1]):
# node.result = node.result[:-1]
@
transformation_factory
def
remove_tokens
(
node
,
tokens
:
AbstractSet
[
str
]
=
frozenset
()):
...
...
@@ -847,14 +847,6 @@ def remove_content(node, contents: AbstractSet[str]):
remove_children_if
(
node
,
partial
(
has_content
,
contents
=
contents
))
@
transformation_factory
def
replace_content
(
node
,
func
:
Callable
):
# Callable[[Node], ResultType]
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
node
.
result
=
func
(
node
.
result
)
########################################################################
#
# AST semantic validation functions
...
...
DHParser/testing.py
View file @
db9e1654
...
...
@@ -38,11 +38,7 @@ def mock_syntax_tree(sexpr):
Example:
>>> mock_syntax_tree("(a (b c))").as_sxpr()
(a
(b
"c"
)
)
'(a
\\
n (b
\\
n "c"
\\
n )
\\
n)'
"""
def
next_block
(
s
):
s
=
s
.
strip
()
...
...
@@ -109,6 +105,7 @@ def recompile_grammar(ebnf_filename, force=False) -> bool:
base
,
ext
=
os
.
path
.
splitext
(
ebnf_filename
)
compiler_name
=
base
+
'Compiler.py'
error_file_name
=
base
+
'_ebnf_ERRORS.txt'
errors
=
[]
if
(
not
os
.
path
.
exists
(
compiler_name
)
or
force
or
grammar_changed
(
compiler_name
,
ebnf_filename
)):
...
...
@@ -116,14 +113,14 @@ def recompile_grammar(ebnf_filename, force=False) -> bool:
errors
=
compile_on_disk
(
ebnf_filename
)
if
errors
:
# print("Errors while compiling: " + ebnf_filename + '!')
with
open
(
base
+
'_errors.txt'
,
'w'
)
as
f
:
with
open
(
error_file_name
,
'w'
)
as
f
:
for
e
in
errors
:
f
.
write
(
e
)
f
.
write
(
'
\n
'
)
return
False
if
not
errors
and
os
.
path
.
exists
(
base
+
'_errors.txt'
):
os
.
remove
(
base
+
'_errors.txt'
)
if
not
errors
and
os
.
path
.
exists
(
error_file_name
):
os
.
remove
(
error_file_name
)
return
True
...
...
@@ -133,7 +130,7 @@ UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
def
unit_from_configfile
(
config_filename
):
"""Reads a grammar unit test from a config file.
"""
cfg
=
configparser
.
ConfigParser
()
cfg
=
configparser
.
ConfigParser
(
interpolation
=
None
)
cfg
.
read
(
config_filename
)
OD
=
collections
.
OrderedDict
unit
=
OD
()
...
...
DHParser/toolkit.py
View file @
db9e1654
...
...
@@ -154,9 +154,7 @@ def is_logging() -> bool:
def
repr_call
(
f
,
parameter_list
)
->
str
:
"""Turns a list of items into a string resembling the parameter
list of a function call by omitting default values at the end:
>>> def(a, b=1):
print(a, b)
>>> def f(a, b=1): print(a, b)
>>> repr_call(f, (5,1))
'f(5)'
>>> repr_call(f, (5,2))
...
...
@@ -206,8 +204,8 @@ def compact_sexpr(s) -> str:
whitespace.
Example:
>>> compact_sexpr(
"
(a
\n
(b
\n
c
\n
)
\n
)
\
n
"
)
(a (b c))
>>> compact_sexpr(
'
(a
\
\
n (b
\
\
n c
\
\
n )
\
\
n)
\
\
n'
)
'
(a (b c))
'
"""
return
re
.
sub
(
'\s(?=\))'
,
''
,
re
.
sub
(
'\s+'
,
' '
,
s
)).
strip
()
...
...
@@ -306,26 +304,29 @@ def smart_list(arg) -> list:
If the argument is a string, it will be interpreted as a list of
comma separated values, trying ';', ',', ' ' as possible delimiters
in this order, e.g.
>>> smart_list("1; 2, 3; 4")
["1", "2, 3", "4"]
>>> smart_list("2, 3")
["2", "3"]
>>> smart_list("a b cd")
["a", "b", "cd"]
>>> smart_list('1; 2, 3; 4')
['1', '2, 3', '4']
>>> smart_list('2, 3')
['2', '3']
>>> smart_list('a b cd')
['a', 'b', 'cd']
If the argument is a collection other than a string, it will be
returned as is, e.g.
>>> smart_list((1, 2, 3))
(1, 2, 3)
>>> smart_list({1, 2, 3})
{1, 2, 3}
>>> smart_list((1, 2, 3))
(1, 2, 3)
>>> smart_list({1, 2, 3})
{1, 2, 3}
If the argument is another iterable than a collection, it will
be converted into a list, e.g.
>>> smart_list(i for i in {1,2,3})
[1, 2, 3]
>>> smart_list(i for i in {1,2,3})
[1, 2, 3]
Finally, if none of the above is true, the argument will be
wrapped in a list and returned, e.g.
>>> smart_list(125)
[125]
>>> smart_list(125)
[125]
"""
if
isinstance
(
arg
,
str
):
for
delimiter
in
(
';'
,
','
):
...
...
@@ -333,8 +334,8 @@ def smart_list(arg) -> list:
if
len
(
lst
)
>
1
:
return
[
s
.
strip
()
for
s
in
lst
]
return
[
s
.
strip
()
for
s
in
arg
.
strip
().
split
(
' '
)]
#
elif isinstance(arg, collections.abc.
Sequence): # python 3.6: collections.abc.Collection
#
return arg
elif
isinstance
(
arg
,
collections
.
abc
.
Container
):
return
arg
elif
isinstance
(
arg
,
collections
.
abc
.
Iterable
):
return
list
(
arg
)
else
:
...
...
@@ -346,8 +347,8 @@ def expand_table(compact_table):
containing comma separated words into single keyword entries with
the same values. Returns the expanded table.
Example:
>>> expand_table({"a, b": 1,
"b": 1,
('d','e','f'):5, "c":3})
{'a': 1, 'b': 1,
'c': 3,
'd': 5, 'e': 5, 'f': 5}
>>> expand_table({"a, b": 1, ('d','e','f'):5, "c":3})
{'a': 1, 'b': 1, 'd': 5, 'e': 5, 'f': 5
, 'c': 3
}
"""
expanded_table
=
{}
keys
=
list
(
compact_table
.
keys
())
...
...
@@ -359,24 +360,6 @@ def expand_table(compact_table):
expanded_table
[
k
]
=
value
return
expanded_table
# # commented, because this approach is too error-prone in connection with smart_list
# def as_partial(partial_ellipsis) -> functools.partial:
# """Transforms ``partial_ellipsis`` into a partial function
# application, i.e. string "remove_tokens({'(', ')'})" will be
# transformed into the partial "partial(remove_tokens, {'(', ')'})".
# Partial ellipsises can be considered as a short hand notation for
# partials, which look like function, calls but aren't. Plain
# function names are returned as is. Also, if ``partial_ellipsis``
# already is a callable, it will be returned as is.
# """
# if callable(partial_ellipsis):
# return partial_ellipsis
# m = re.match('\s*(\w+)(?:\(([^)]*)\))?\s*$', partial_ellipsis)
# if m:
# fname, fargs = m.groups()
# return eval("functools.partial(%s, %s)" % (fname, fargs)) if fargs else eval(fname)
# raise SyntaxError(partial_ellipsis + " does not resemble a partial function ellipsis!")
def
sane_parser_name
(
name
)
->
bool
:
"""Checks whether given name is an acceptable parser name. Parser names
...
...
examples/LaTeX/LaTeX.ebnf
View file @
db9e1654
# latex Grammar
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/
# optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
...
...
examples/LaTeX/LaTeXCompiler.py
0 → 100644
View file @
db9e1654
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from
functools
import
partial
import
os
import
sys
try
:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser.toolkit
import
logging
,
is_filename
,
load_if_file
from
DHParser.parsers
import
Grammar
,
Compiler
,
nil_scanner
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Optional
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
last_value
,
counterpart
,
accumulate
,
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_brackets
,
keep_children
,
\
remove_children_if
,
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
remove_expendables
,
remove_tokens
,
flatten
,
is_whitespace
,
is_expendable
,
join
,
\
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
TransformationFunc
,
\
remove_empty
,
replace_parser
,
apply_if
#######################################################################
#
# SCANNER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def
LaTeXScanner
(
text
):
return
text
def
get_scanner
()
->
ScannerFunc
:
return
LaTeXScanner
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class
LaTeXGrammar
(
Grammar
):
r
"""Parser for a LaTeX source file, with this grammar:
# latex Grammar
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF
blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) //~ }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [[ //~ config ] //~ block ]
config = "[" cfgtext §"]"
block = /{/ { command | text | block } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
"""
block
=
Forward
()
command
=
Forward
()
source_hash__
=
"936e76e84dd027b0af532abfad617d15"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
wspL__
=
''
wspR__
=
WSP__
EOF
=
NegativeLookahead
(
RE
(
'.'
,
wR
=
''
))
PARSEP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n[
\\
t]*'
,
wR
=
''
)
LF
=
Series
(
NegativeLookahead
(
PARSEP
),
RE
(
'[
\\
t]*
\\
n[
\\
t]*'
,
wR
=
''
))
WSPC
=
RE
(
'[
\\
t]+'
,
wR
=
''
)
TEXTCHUNK
=
RE
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
,
wR
=
''
)
BRACKETS
=
RE
(
'[
\\
[
\\
]]'
,
wR
=
''
)
ESCAPED
=
RE
(
'
\\\\
[%$&_/]'
,
wR
=
''
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
CMDNAME
=
RE
(
'
\\\\
(?:(?!_)
\\
w)+'
)
blockcmd
=
Alternative
(
Token
(
"
\\
subsection"
),
Token
(
"
\\
section"
),
Token
(
"
\\
chapter"
),
Token
(
"
\\
subsubsection"
),
Token
(
"
\\
paragraph"
),
Token
(
"
\\
subparagraph"
),
Token
(
"
\\
begin{enumerate}"
),
Token
(
"
\\
begin{itemize}"
),
Token
(
"
\\
item"
),
Token
(
"
\\
begin{figure}"
))
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
block
.
set
(
Series
(
RE
(
'{'
,
wR
=
''
),
ZeroOrMore
(
Alternative
(
command
,
text
,
block
)),
Required
(
RE
(
'}'
,
wR
=
''
))))
config
=
Series
(
Token
(
"["
),
cfgtext
,
Required
(
Token
(
"]"
)))
command
.
set
(
Series
(
CMDNAME
,
Optional
(
Series
(
Optional
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
))))
endenv
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
beginenv
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
inlineenv
=
Series
(
beginenv
,
OneOrMore
(
Alternative
(
command
,
block
,
text
)),
endenv
)
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
Alternative
(
command
,
block
,
text
),
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
paragraph
,
Optional
(
PARSEP
)))
parblock
=
Series
(
Token
(
"{"
),
sequence
,
Required
(
Token
(
"}"
)))
blockenv
=
Series
(
beginenv
,
sequence
,
Required
(
endenv
))
document
=
Series
(
Optional
(
PARSEP
),
ZeroOrMore
(
Series
(
Optional
(
PARSEP
),
paragraph
)),
Required
(
EOF
))
preamble
=
OneOrMore
(
command
)
latexdoc
=
Series
(
preamble
,
document
)
root__
=
latexdoc
def
get_grammar
()
->
LaTeXGrammar
:
global
thread_local_LaTeX_grammar_singleton
try
:
grammar
=
thread_local_LaTeX_grammar_singleton
return
grammar
except
NameError
:
thread_local_LaTeX_grammar_singleton
=
LaTeXGrammar
()
return
thread_local_LaTeX_grammar_singleton
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def
streamline_whitespace
(
node
):
assert
node
.
tag_name
in
[
'WSPC'
,
':Whitespace'
]
s
=
str
(
node
)
c
=
s
.
find
(
'%'
)
n
=
s
.
find
(
'
\n
'
)
if
c
>=
0
:
node
.
result
=
(
' '
if
(
n
>=
c
)
or
(
n
<
0
)
else
'
\n
'
)
+
s
[
c
:].
rstrip
(
'
\t
'
)
elif
s
.
find
(
'
\n
'
)
>=
0
:
node
.
result
=
'
\n
'
else
:
node
.
result
=
' '
LaTeX_AST_transformation_table
=
{
# AST Transformations for the LaTeX-grammar
"+"
:
remove_empty
,
"latexdoc"
:
[],
"preamble"
:
[],
"document"
:
[],
"blockenv"
:
[],
"parblock"
:
[],
"sequence"
:
flatten
,
"paragraph"
:
[
flatten
(
lambda
node
:
not
node
.
parser
.
name
or
node
.
parser
.
name
==
"text"
),
join
(
'text'
,
':Whitespace'
)],
"inlineenv"
:
[],
"beginenv"
:
[],
"endenv"
:
[],
"command"
:
[],
"config"
:
[],
"block"
:
[
remove_brackets
,
reduce_single_child
],
"text"
:
[
reduce_single_child
,
join
(
'text'
,
'word_sequence'
,
':Whitespace'
)],
"cfgtext"
:
[
flatten
,
reduce_single_child
],
"word_sequence"
:
[
collapse
],
"blockcmd"
:
[],
"CMDNAME"
:
[
remove_expendables
,
reduce_single_child
],
"NAME"
:
[],
"ESCAPED"
:
[
reduce_single_child
],
"BRACKETS"
:
[],
"TEXTCHUNK"
:
[],
"WSPC, :Whitespace"
:
streamline_whitespace
,
"LF"
:
replace_content
(
lambda
node
:
'
\n
'
),
"PARSEP"
:
replace_content
(
lambda
node
:
'
\n\n
'
),
"EOF"
:
[],
"*"
:
replace_by_single_child
,
}
LaTeXTransform
=
partial
(
traverse
,
processing_table
=
LaTeX_AST_transformation_table
)
# LaTeXTransform = lambda tree : 1
def
get_transformer
()
->
TransformationFunc
:
return
LaTeXTransform
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class
LaTeXCompiler
(
Compiler
):
"""Compiler for the abstract-syntax-tree of a LaTeX source file.
"""
def
__init__
(
self
,
grammar_name
=
"LaTeX"
,
grammar_source
=
""
):
super
(
LaTeXCompiler
,
self
).
__init__
(
grammar_name
,
grammar_source
)
assert
re
.
match
(
'\w+\Z'
,
grammar_name
)
def
on_latexdoc
(
self
,
node
):
return
node
.
as_sexpr
()
def
on_preamble
(
self
,
node
):
pass
def
on_document
(
self
,
node
):
pass
def
on_blockenv
(
self
,
node
):
pass
def
on_parblock
(
self
,
node
):
pass
def
on_sequence
(
self
,
node
):
pass
def
on_paragraph
(
self
,
node
):
pass
def
on_inlineenv
(
self
,
node
):
pass
def
on_beginenv
(
self
,
node
):
pass
def
on_endenv
(
self
,
node
):
pass
def
on_command
(
self
,
node
):
pass
def
on_config
(
self
,
node
):
pass
def
on_block
(
self
,
node
):
pass
def
on_text
(
self
,
node
):
pass
def
on_cfgtext
(
self
,
node
):
pass
def
on_word_sequence
(
self
,
node
):
pass
def
on_blockcmd
(
self
,
node
):
pass
def
on_CMDNAME
(
self
,
node
):
pass
def
on_NAME
(
self
,
node
):
pass
def
on_ESCAPED
(
self
,
node
):
pass
def
on_BRACKETS
(
self
,
node
):
pass
def
on_TEXTCHUNK
(
self
,
node
):
pass
def
on_WSPC
(
self
,
node
):
pass
def
on_LF
(
self
,
node
):
pass
def
on_PARSEP
(
self
,
node
):
pass
def
on_EOF
(
self
,
node
):
pass
def
get_compiler
(
grammar_name
=
"LaTeX"
,
grammar_source
=
""
)
->
LaTeXCompiler
: