Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
3978f82c
Commit
3978f82c
authored
Aug 06, 2017
by
Eckhart Arnold
Browse files
- imporvements to AST transformations and LaTeX-example
parent
a100f00d
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
3978f82c
...
...
@@ -80,13 +80,13 @@ from DHParser import logging, is_filename, load_if_file, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, PreprocessorFunc,
\\
Node, TransformationFunc,
\\
traverse, remove_children_if,
join
_children,
\\
Node, TransformationFunc,
TRUE_CONDITION,
\\
traverse, remove_children_if,
merge
_children,
is_anonymous,
\\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
remove_parser, remove_content, remove_brackets,
\\
keep_children,
has_name
, has_content, apply_if, remove_first, remove_last
remove_parser, remove_content, remove_brackets,
replace_parser,
\\
keep_children,
is_one_of
, has_content, apply_if, remove_first, remove_last
'''
...
...
DHParser/ebnf.py
View file @
3978f82c
...
...
@@ -35,7 +35,7 @@ from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, R
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
Node
,
TransformationFunc
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
key_tag_name
remove_tokens
,
flatten
,
forbid
,
assert_content
,
key_tag_name
,
remove_infix_operator
from
DHParser.versionnumber
import
__version__
__all__
=
(
'get_ebnf_preprocessor'
,
...
...
@@ -200,7 +200,7 @@ EBNF_transformation_table = {
"directive, definition"
:
remove_tokens
(
'@'
,
'='
),
"expression"
:
[
replace_by_single_child
,
flatten
,
remove_tokens
(
'|'
)],
[
replace_by_single_child
,
flatten
,
remove_tokens
(
'|'
)],
# remove_infix_operator],
"term"
:
[
replace_by_single_child
,
flatten
],
# supports both idioms: "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop"
:
...
...
@@ -215,7 +215,7 @@ EBNF_transformation_table = {
(
TOKEN_PTYPE
,
WHITESPACE_PTYPE
):
reduce_single_child
,
"list_"
:
[
flatten
,
remove_
tokens
(
','
)
],
[
flatten
,
remove_
infix_operator
],
"*"
:
replace_by_single_child
}
...
...
@@ -353,7 +353,7 @@ class EBNFCompiler(Compiler):
for
name
in
self
.
rules
:
transtable
.
append
(
' "'
+
name
+
'": [],'
)
transtable
.
append
(
' ":Token, :RE": reduce_single_child,'
)
transtable
+=
[
' "*": replace_by_single_child'
,
'}'
,
''
,
tf_name
+
transtable
+=
[
'
#
"*": replace_by_single_child'
,
'}'
,
''
,
tf_name
+
' = partial(traverse, processing_table=%s)'
%
tt_name
,
''
]
transtable
+=
[
TRANSFORMER_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)]
return
'
\n
'
.
join
(
transtable
)
...
...
@@ -719,7 +719,7 @@ class EBNFCompiler(Compiler):
def
on_literal
(
self
,
node
)
->
str
:
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.
join
_children([node.result]) + ')' ?
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.
merge
_children([node.result]) + ')' ?
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
...
...
DHParser/syntaxtree.py
View file @
3978f82c
...
...
@@ -34,6 +34,7 @@ except ImportError:
from
DHParser.toolkit
import
is_logging
,
log_dir
,
line_col
,
identity
__all__
=
(
'WHITESPACE_PTYPE'
,
'MockParser'
,
'TOKEN_PTYPE'
,
'ZOMBIE_PARSER'
,
'ParserBase'
,
...
...
DHParser/toolkit.py
View file @
3978f82c
...
...
@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0:
# parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".
join
_children(repr(item) for item in parameter_list))
# return "%s(%s)" % (name, ", ".
merge
_children(repr(item) for item in parameter_list))
def
line_col
(
text
:
str
,
pos
:
int
)
->
Tuple
[
int
,
int
]:
...
...
DHParser/transform.py
View file @
3978f82c
...
...
@@ -43,14 +43,15 @@ __all__ = ('transformation_factory',
'reduce_single_child'
,
'replace_parser'
,
'collapse'
,
'
join
_children'
,
'
merge
_children'
,
'replace_content'
,
'apply_if'
,
'is_anonymous'
,
'is_whitespace'
,
'is_empty'
,
'is_expendable'
,
'is_token'
,
'
has_name
'
,
'
is_one_of
'
,
'has_content'
,
'remove_children_if'
,
'remove_parser'
,
...
...
@@ -61,12 +62,17 @@ __all__ = ('transformation_factory',
'remove_empty'
,
'remove_expendables'
,
'remove_brackets'
,
'remove_infix_operator'
,
'remove_single_child'
,
'remove_tokens'
,
'keep_children'
,
'flatten'
,
'forbid'
,
'require'
,
'assert_content'
)
'assert_content'
,
'assert_condition'
,
'assert_has_children'
,
'TRUE_CONDITION'
)
def
transformation_factory
(
t
=
None
):
...
...
@@ -225,12 +231,18 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# ------------------------------------------------
def
replace_by_single_child
(
node
):
"""Remove single branch node, replacing it by its immediate descendant.
def
TRUE_CONDITION
(
node
):
return
True
@
transformation_factory
(
Callable
)
def
replace_by_single_child
(
node
,
condition
=
TRUE_CONDITION
):
"""Remove single branch node, replacing it by its immediate descendant
if and only if the condision on the descendant is true.
(In case the descendant's name is empty (i.e. anonymous) the
name of this node's parser is kept.)
"""
if
node
.
children
and
len
(
node
.
result
)
==
1
:
if
node
.
children
and
len
(
node
.
result
)
==
1
and
condition
(
node
.
children
[
0
])
:
if
not
node
.
result
[
0
].
parser
.
name
:
node
.
result
[
0
].
parser
.
name
=
node
.
parser
.
name
node
.
parser
=
node
.
result
[
0
].
parser
...
...
@@ -238,11 +250,14 @@ def replace_by_single_child(node):
node
.
result
=
node
.
result
[
0
].
result
def
reduce_single_child
(
node
):
@
transformation_factory
(
Callable
)
def
reduce_single_child
(
node
,
condition
=
TRUE_CONDITION
):
"""Reduce a single branch node, by transferring the result of its
immediate descendant to this node, but keeping this node's parser entry.
If the condition evaluates to false on the descendant, it will not
be reduced.
"""
if
node
.
children
and
len
(
node
.
result
)
==
1
:
if
node
.
children
and
len
(
node
.
result
)
==
1
and
condition
(
node
.
children
[
0
])
:
node
.
_errors
.
extend
(
node
.
result
[
0
].
_errors
)
node
.
result
=
node
.
result
[
0
].
result
...
...
@@ -288,19 +303,20 @@ def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
def
collapse
(
node
):
"""Collapses all sub-nodes by replacing the
node's result
with
it's
string representation.
"""Collapses all sub-nodes
of a node
by replacing the
m
with
the
string representation
of the node
.
"""
node
.
result
=
str
(
node
)
@
transformation_factory
def
join
_children
(
node
,
tag_names
:
List
[
str
]):
def
merge
_children
(
node
,
tag_names
:
List
[
str
]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
names into a single child node with mock parser with the name of
the first tag name in the list.
"""
result
=
[]
name
,
ptype
=
(
tag_names
[
0
]
.
split
(
':'
)
+
[
''
])[:
2
]
name
,
ptype
=
(
''
,
tag_names
[
0
]
)
if
tag_names
[
0
][:
1
]
==
':'
else
(
tag_names
[
0
],
''
)
if
node
.
children
:
i
=
0
L
=
len
(
node
.
children
)
...
...
@@ -356,9 +372,14 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return
node
.
parser
.
ptype
==
TOKEN_PTYPE
and
(
not
tokens
or
node
.
result
in
tokens
)
def
has_name
(
node
,
regexp
:
str
)
->
bool
:
"""Checks a node's tag name against a regular expression."""
return
bool
(
re
.
match
(
regexp
,
node
.
tag_name
))
def
is_anonymous
(
node
):
return
not
node
.
parser
.
name
def
is_one_of
(
node
,
tag_name_set
:
AbstractSet
[
str
])
->
bool
:
"""Returns true, if the node's tag_name is on of the
given tag names."""
return
node
.
tag_name
in
tag_name_set
def
has_content
(
node
,
regexp
:
str
)
->
bool
:
...
...
@@ -395,9 +416,11 @@ def remove_children_if(node, condition: Callable, section: slice = slice(None)):
remove_whitespace
=
remove_children_if
(
is_whitespace
)
# partial(remove_children_if, condition=is_whitespace)
remove_empty
=
remove_children_if
(
is_empty
)
remove_expendables
=
remove_children_if
(
is_expendable
)
# partial(remove_children_if, condition=is_expendable)
remove_first
=
keep_children
(
slice
(
1
,
None
))
remove_last
=
keep_children
(
slice
(
None
,
-
1
))
remove_brackets
=
keep_children
(
slice
(
1
,
-
1
))
remove_first
=
apply_if
(
keep_children
(
slice
(
1
,
None
)),
lambda
nd
:
len
(
nd
.
children
)
>
1
)
remove_last
=
apply_if
(
keep_children
(
slice
(
None
,
-
1
)),
lambda
nd
:
len
(
nd
.
children
)
>
1
)
remove_brackets
=
apply_if
(
keep_children
(
slice
(
1
,
-
1
)),
lambda
nd
:
len
(
nd
.
children
)
>=
2
)
remove_infix_operator
=
keep_children
(
slice
(
0
,
None
,
2
))
remove_single_child
=
apply_if
(
keep_children
(
slice
(
0
)),
lambda
nd
:
len
(
nd
.
children
)
==
1
)
@
transformation_factory
...
...
@@ -411,7 +434,7 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@
transformation_factory
def
remove_parser
(
node
,
regexp
:
str
):
"""Removes children by 'tag name'."""
remove_children_if
(
node
,
partial
(
has_name
,
regexp
=
regexp
))
remove_children_if
(
node
,
partial
(
is_one_of
,
regexp
=
regexp
))
@
transformation_factory
...
...
@@ -451,7 +474,7 @@ def assert_content(node, regexp: str):
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not
has_name
(node, regexp):
# if not
is_one_of
(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
...
...
DevScripts/create_standalone.py
View file @
3978f82c
...
...
@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".
join
_children(errors))
print("\n\n".
merge
_children(errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
...
...
Introduction.md
View file @
3978f82c
...
...
@@ -369,8 +369,8 @@ scrool down to the AST section, you'll see something like this:
"bibliographisches": [remove_parser('NZ'), remove_tokens],
"autor, werk, untertitel, ort": [],
"jahr": [reduce_single_child],
"wortfolge": [flatten(
has_name
('WORT'), recursive=False), remove_last(is_whitespace), collapse],
"namenfolge": [flatten(
has_name
('NAME'), recursive=False), remove_last(is_whitespace), collapse],
"wortfolge": [flatten(
is_one_of
('WORT'), recursive=False), remove_last(is_whitespace), collapse],
"namenfolge": [flatten(
is_one_of
('NAME'), recursive=False), remove_last(is_whitespace), collapse],
"verknüpfung": [remove_tokens('<', '>'), reduce_single_child],
"ziel": reduce_single_child,
"gedicht, strophe, text": [flatten, remove_parser('LEERZEILE'), remove_parser('NZ')],
...
...
dhparser.py
View file @
3978f82c
...
...
@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler)
# print(result)
# if errors:
# print('\n\n'.
join
_children(errors))
# print('\n\n'.
merge
_children(errors))
# sys.exit(1)
# else:
# # compile the grammar again using the result of the previous
...
...
examples/LaTeX/LaTeX.ebnf
View file @
3978f82c
...
...
@@ -77,17 +77,18 @@ table_config = "{" /[lcr|]+/~ "}"
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element
s
//~ }+
text_element
s
= command | text | block | inline_environment
paragraph = { !blockcmd text_element //~ }+
text_element = command | text | block | inline_environment
#### inline enivronments ####
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = (begin_inline_env { text_element
s
}+ §end_
environm
en
t
)
generic_inline_env = (begin_inline_env { text_element }+ §end_
inline_
en
v
)
begin_inline_env = (-!LB begin_environment) | (begin_environment -!LB)
# end_inline_env = (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
end_inline_env = end_environment
# (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
...
...
@@ -113,7 +114,7 @@ caption = "\caption" block
config = "[" cfgtext §"]"
block = /{/ { text_element
s
} §/}/
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
...
...
@@ -143,7 +144,6 @@ ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
3978f82c
...
...
@@ -7,23 +7,26 @@
#######################################################################
from
functools
import
partial
import
os
import
sys
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser
import
logging
,
is_filename
,
Grammar
,
Compiler
,
Lookbehind
,
Alternative
,
Pop
,
\
Required
,
Token
,
Synonym
,
\
Optional
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Series
,
RE
,
Capture
,
\
from
DHParser
import
logging
,
is_filename
,
load_if_file
,
\
Grammar
,
Compiler
,
nil_preprocessor
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Optional
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
\
traverse
,
join_children
,
remove_whitespace
,
remove_parser
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
remove_empty
,
flatten
,
\
collapse
,
replace_content
,
remove_brackets
,
remove_first
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
MockParser
,
\
traverse
,
remove_children_if
,
merge_children
,
TRUE_CONDITION
,
is_anonymous
,
\
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
remove_parser
,
remove_content
,
remove_brackets
,
replace_parser
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
#######################################################################
...
...
@@ -35,7 +38,6 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
def
LaTeXPreprocessor
(
text
):
return
text
def
get_preprocessor
()
->
PreprocessorFunc
:
return
LaTeXPreprocessor
...
...
@@ -128,17 +130,18 @@ class LaTeXGrammar(Grammar):
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element
s
//~ }+
text_element
s
= command | text | block | inline_environment
paragraph = { !blockcmd text_element //~ }+
text_element = command | text | block | inline_environment
#### inline enivronments ####
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = (begin_inline_env { text_element
s
}+ §end_
environm
en
t
)
generic_inline_env = (begin_inline_env { text_element }+ §end_
inline_
en
v
)
begin_inline_env = (-!LB begin_environment) | (begin_environment -!LB)
# end_inline_env = (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
end_inline_env = end_environment
# (-!LB end_environment) | (end_environment -!LB) # ambiguity with genric_block when EOF
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
...
...
@@ -164,7 +167,7 @@ class LaTeXGrammar(Grammar):
config = "[" cfgtext §"]"
block = /{/ { text_element
s
} §/}/
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
...
...
@@ -194,7 +197,6 @@ class LaTeXGrammar(Grammar):
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
...
...
@@ -208,8 +210,8 @@ class LaTeXGrammar(Grammar):
block_environment
=
Forward
()
block_of_paragraphs
=
Forward
()
end_generic_block
=
Forward
()
text_element
s
=
Forward
()
source_hash__
=
"
06385bac4dd7cb009bd29712a8fc692c
"
text_element
=
Forward
()
source_hash__
=
"
4e001b31490278efccfe43953bfdcf58
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
...
...
@@ -220,7 +222,6 @@ class LaTeXGrammar(Grammar):
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
PARSEP
=
RegExp
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n[
\\
t]*'
)
LF
=
Series
(
NegativeLookahead
(
PARSEP
),
RegExp
(
'[
\\
t]*
\\
n[
\\
t]*'
))
WSPC
=
RegExp
(
'[
\\
t]+'
)
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/]'
)
...
...
@@ -232,7 +233,7 @@ class LaTeXGrammar(Grammar):
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
block
=
Series
(
RegExp
(
'{'
),
ZeroOrMore
(
text_element
s
),
Required
(
RegExp
(
'}'
)))
block
=
Series
(
RegExp
(
'{'
),
ZeroOrMore
(
text_element
),
Required
(
RegExp
(
'}'
)))
config
=
Series
(
Token
(
"["
),
cfgtext
,
Required
(
Token
(
"]"
)))
caption
=
Series
(
Token
(
"
\\
caption"
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
config
,
block
)
...
...
@@ -243,12 +244,13 @@ class LaTeXGrammar(Grammar):
inline_math
=
Series
(
Token
(
"$"
),
RegExp
(
'[^$]*'
),
Token
(
"$"
))
end_environment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_environment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
end_inline_env
=
Synonym
(
end_environment
)
begin_inline_env
=
Alternative
(
Series
(
NegativeLookbehind
(
LB
),
begin_environment
),
Series
(
begin_environment
,
NegativeLookbehind
(
LB
)))
generic_inline_env
=
Series
(
begin_inline_env
,
OneOrMore
(
text_element
s
),
Required
(
end_
environm
en
t
))
generic_inline_env
=
Series
(
begin_inline_env
,
OneOrMore
(
text_element
),
Required
(
end_
inline_
en
v
))
known_inline_env
=
Synonym
(
inline_math
)
inline_environment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_element
s
.
set
(
Alternative
(
command
,
text
,
block
,
inline_environment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
s
,
RE
(
''
)))
text_element
.
set
(
Alternative
(
command
,
text
,
block
,
inline_environment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_environment
),
Optional
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
RegExp
(
'{'
),
sequence
,
Required
(
RegExp
(
'}'
))))
table_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Token
(
"}"
))
...
...
@@ -307,66 +309,93 @@ def streamline_whitespace(node):
c
=
s
.
find
(
'%'
)
n
=
s
.
find
(
'
\n
'
)
if
c
>=
0
:
node
.
result
=
(
' '
if
(
n
>=
c
)
or
(
n
<
0
)
else
'
\n
'
)
+
s
[
c
:].
rstrip
(
'
\t
'
)
node
.
result
=
'
\n
'
# node.result = (' ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
# node.parser = MockParser('COMMENT', '')
elif
s
.
find
(
'
\n
'
)
>=
0
:
node
.
result
=
'
\n
'
else
:
node
.
result
=
' '
def
watch
(
node
):
print
(
node
.
as_sxpr
())
LaTeX_AST_transformation_table
=
{
# AST Transformations for the LaTeX-grammar
"+"
:
remove_empty
,
"+"
:
remove_empty
,
"latexdoc"
:
[],
"preamble"
:
[],
"document"
:
[],
"blockenv"
:
[],
"parblock"
:
[],
"sequence"
:
[
flatten
,
remove_parser
(
'PARSEP'
),
replace_by_single_child
],
"enumerate, itemize"
:
[
remove_brackets
,
remove_parser
(
'PARSEP'
),
reduce_single_child
],
"item"
:
[
remove_first
,
remove_parser
(
'PARSEP'
)],
"paragraph"
:
[
flatten
(
lambda
node
:
not
node
.
parser
.
name
or
node
.
parser
.
name
==
"text"
),
join_children
(
'text'
,
':Whitespace'
)],
"quotation, generic_bloc, generic_inline_env, inline_math"
:
[
remove_brackets
],
"inline_environment"
:
[],
"begin_environment"
:
[],
"end_environment"
:
[],
# "command": [],
"generic_command"
:
[],
"config, block"
:
[
remove_brackets
,
reduce_single_child
],
"text"
:
[
reduce_single_child
,
join_children
(
'text'
,
'word_sequence'
,
':Whitespace'
)],
"cfgtext"
:
[
flatten
,
reduce_single_child
],
"word_sequence"
:
[
collapse
],
"frontpages"
:
[],
"Chapters"
:
[],
"Chapter"
:
[],
"Sections"
:
[],
"Section"
:
[],
"SubSections"
:
[],
"SubSection"
:
[],
"SubSubSections"
:
[],
"SubSubSection"
:
[],
"Paragraphs"
:
[],
"Paragraph"
:
[],
"SubParagraphs"
:
[],
"SubParagraph"
:
[],
"Bibliography"
:
[],
"Index"
:
[],
"block_environment"
:
replace_by_single_child
,
"known_environment"
:
replace_by_single_child
,
"generic_block"
:
[],
"begin_generic_block, end_generic_block"
:
reduce_single_child
,
"itemize, enumerate"
:
[
remove_brackets
,
flatten
],
"item"
:
[
remove_first
],
"figure"
:
[],
"quotation"
:
[
reduce_single_child
,
remove_brackets
],
"verbatim"
:
[],
"table"
:
[],
"table_config"
:
[],
"block_of_paragraphs"
:
[],
"sequence"
:
[
flatten
],
"paragraph"
:
[
flatten
],
"text_element"
:
[],
"inline_environment"
:
replace_by_single_child
,
"known_inline_env"
:
replace_by_single_child
,
"generic_inline_env"
:
[],
"begin_inline_env, end_inline_env"
:
[
reduce_single_child
],
"begin_environment, end_environment"
:
[
remove_brackets
,
reduce_single_child
],
"inline_math"
:
[
remove_brackets
,
reduce_single_child
],
"command"
:
[],
"known_command"
:
[],
"generic_command"
:
[
flatten
],
"footnote"
:
[],
"includegraphics"
:
[],
"caption"
:
[],
"config"
:
[
remove_brackets
],
"block"
:
[
remove_brackets
,
reduce_single_child
(
is_anonymous
)],
"text"
:
[
reduce_single_child
,
merge_children
(
'word_sequence'
,
':Whitespace'
,
'TEXTCHUNK'
)],
"cfgtext"
:
[
flatten
,
reduce_single_child
,
replace_parser
(
'text'
)],
"word_sequence"
:
collapse
,
# [flatten, merge_children('TEXTCHUNK', ':Whitespace'), reduce_single_child],
"no_command"
:
[],
"blockcmd"
:
[],
"
CMDNAME"
:
[
remove_expendables
,
reduce_single_child
],
"NAME"
:
[
reduce_single_child
],
"ESCAPED"
:
[
re
duce_single_child
],
"
structural"
:
[],
"CMDNAME"
:
[
remove_whitespace
,
reduce_single_child
(
is_anonymous
)
],
"NAME"
:
[
reduce_single_child
,
remove_whitespace
,
reduce_single_child
],
"ESCAPED"
:
[
re
place_content
(
lambda
node
:
str
(
node
)[
1
:])
],
"BRACKETS"
:
[],
"TEXTCHUNK"
:
[],
"WSPC, :Whitespace"
:
[],
# streamline_whitespace, # whitespace will be removed anyway
"LF"
:
replace_content
(
lambda
node
:
'
\n
'
),
"PARSEP"
:
[],
# replace_content(lambda node: '\n\n'),
"LF"
:
[],
"PARSEP"
:
replace_content
(
lambda
node
:
'
\n\n
'
),
"LB"
:
[],
"BACKSLASH"
:
[],
"EOF"
:
[],
":Token"
:
[],
# [remove_whitespace, reduce_single_child], # Tokens will be removed anyway?
"
*"
:
replace_by_single_child
,
":Token"
:
[],
":RE"
:
replace_by_single_child
,
"
:Whitespace"
:
streamline_whitespace
,
"*"
:
replace_by_single_child
}
LaTeXTransform
=
partial
(
traverse
,
processing_table
=
LaTeX_AST_transformation_table
)
# LaTeXTransform = lambda tree : 1
def
get_transformer
()
->
TransformationFunc
:
return
LaTeXTransform
...
...
@@ -488,7 +517,7 @@ class LaTeXCompiler(Compiler):
def
on_paragraph
(
self
,
node
):
pass
def
on_text_element
s
(
self
,
node
):
def
on_text_element
(
self
,
node
):
pass
def
on_inline_environment
(
self
,
node
):
...
...
@@ -503,9 +532,6 @@ class LaTeXCompiler(Compiler):
def
on_begin_inline_env
(
self
,
node
):
pass
def
on_end_inline_env
(
self
,
node
):
pass
def
on_begin_environment
(
self
,
node
):
pass
...
...
@@ -548,6 +574,9 @@ class LaTeXCompiler(Compiler):
def
on_word_sequence
(
self
,
node
):
pass
def
on_no_command
(
self
,
node
):
pass
def
on_blockcmd
(
self
,
node
):
pass
...
...
@@ -560,9 +589,6 @@ class LaTeXCompiler(Compiler):
def
on_NAME
(
self
,
node
):
pass
def
on_MATH
(
self
,
node
):
pass
def
on_ESCAPED
(
self
,
node
):
pass
...
...
@@ -581,13 +607,13 @@ class LaTeXCompiler(Compiler):
def
on_PARSEP
(
self
,
node
):
pass
def
on_
EOF
(
self
,
node
):
def
on_
LB
(
self
,
node
):
pass
def
on_
SUCC_LB
(
self
,
node
):
def
on_
BACKSLASH
(
self
,
node
):
pass
def
on_
PRED_LB
(
self
,
node
):
def
on_
EOF
(
self
,
node
):
pass