Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
813bebe5
Commit
813bebe5
authored
Jul 30, 2017
by
Eckhart Arnold
Browse files
- bugfixes
parent
821cb67c
Changes
12
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
813bebe5
...
...
@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, PreprocessorFunc,
\\
Node, TransformationFunc,
\\
traverse, remove_children_if,
\\
traverse, remove_children_if,
join,
\\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
...
...
DHParser/parser.py
View file @
813bebe5
...
...
@@ -132,7 +132,7 @@ PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH
=
8
# type: int
# because of python's recursion depth limit, this value ought not to be
# set too high. PyPy allows higher values than CPython
MAX_DROPOUTS
=
5
# type: int
MAX_DROPOUTS
=
3
# type: int
# stop trying to recover parsing after so many errors
...
...
@@ -231,7 +231,8 @@ def add_parser_guard(parser_func):
# in case of left recursion, the first recursive step that
# matches will store its result in the cache
parser
.
visited
[
location
]
=
(
node
,
rest
)
grammar
.
last_node__
=
node
# store last node for Lookbehind parser
# store last non-empty node for Lookbehind parser
if
len
(
rest
)
<
location
:
grammar
.
last_node__
=
node
parser
.
recursion_counter
[
location
]
-=
1
...
...
@@ -293,6 +294,15 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
2. *Anonymous parsers* where the name-field just contains the empty
string. AST-transformation of Anonymous parsers can be hooked
only to their class name, and not to the individual parser.
Parser objects are callable and parsing is done by calling a parser
object with the text to parse. If the parser matches it returns
a tuple consisting of a node representing the root of the concrete
syntax tree resulting from the match as well as the substring
`text[i:]` where i is the length of matched text (which can be
zero in the case of parsers like `ZeroOrMore` or `Optional`).
If `i > 0` then the parser has "moved forward". If the parser does
not match it returns `(None, text).
"""
ApplyFunc
=
Callable
[[
'Parser'
],
None
]
...
...
@@ -304,15 +314,27 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
self
.
reset
()
def
__deepcopy__
(
self
,
memo
):
"""Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
return
self
.
__class__
(
self
.
name
)
def
reset
(
self
):
"""Initializes or resets any parser variables. If overwritten,
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self
.
visited
=
dict
()
# type: Dict[int, Tuple[Node, str]]
self
.
recursion_counter
=
dict
()
# type: Dict[int, int]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
return
self
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the matching string."""
return
None
,
text
# default behaviour: don't match
def
__add__
(
self
,
other
:
'Parser'
)
->
'Series'
:
...
...
@@ -332,10 +354,12 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
@
grammar
.
setter
def
grammar
(
self
,
grammar
:
'Grammar'
):
assert
self
.
_grammar
is
None
or
self
.
_grammar
==
grammar
,
\
"Parser has already been assigned to a Grammar object!"
self
.
_grammar
=
grammar
self
.
_grammar_assigned_notifier
()
if
self
.
_grammar
is
None
:
self
.
_grammar
=
grammar
self
.
_grammar_assigned_notifier
()
else
:
assert
self
.
_grammar
==
grammar
,
\
"Parser has already been assigned to a different Grammar object!"
def
_grammar_assigned_notifier
(
self
):
"""A function that notifies the parser object that it has been
...
...
@@ -345,7 +369,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
def
apply
(
self
,
func
:
ApplyFunc
):
"""
Applies function `func(parser)` recursively to this parser and all
descendant
s of the tree of parsers
. The same function can never
descendant
parsers if any exist
. The same function can never
be applied twice between calls of the ``reset()``-method!
"""
if
func
in
self
.
cycle_detection
:
...
...
@@ -387,7 +411,7 @@ class Grammar:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number_parser = Grammar(number)
>>> number_parser("3.1416").
show
()
>>> number_parser("3.1416").
content
()
'3.1416'
Collecting the parsers that define a grammar in a descentand class of
...
...
@@ -518,7 +542,7 @@ class Grammar:
# parsers not connected to the root object will be copied later
# on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing.
self
.
root__
=
root
if
root
else
copy
.
deepcopy
(
self
.
__class__
.
root__
)
self
.
root__
=
copy
.
deepcopy
(
root
)
if
root
else
copy
.
deepcopy
(
self
.
__class__
.
root__
)
if
self
.
wspL__
:
self
.
wsp_left_parser__
=
Whitespace
(
self
.
wspL__
)
# type: ParserBase
...
...
@@ -556,7 +580,7 @@ class Grammar:
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
self
.
last_rb__loc__
=
-
1
# type: int
# previously parsed node, needed by Lookbehind parser
self
.
last_node__
=
No
ne
# type: Node
self
.
last_node__
=
No
de
(
ZOMBIE_PARSER
,
''
)
# type: Node
# support for call stack tracing
self
.
call_stack__
=
[]
# type: List[Parser]
# snapshots of call stacks
...
...
@@ -807,13 +831,20 @@ class PreprocessorToken(Parser):
class
RegExp
(
Parser
):
"""
Regular expression parser.
"""Regular expression parser.
The RegExp-parser parses text that matches a regular expression.
RegExp can also be considered as the "atomic parser", because all
other parsers delegate part of the parsing job to other parsers,
but do not match text directly.
Example:
>>> word = RegExp(r'\w+')
>>> Grammar(word)("Haus").content()
'Haus'
EBNF-Notation: `/ ... /`
EBNF-Example: `word = /\w+/`
"""
def
__init__
(
self
,
regexp
,
name
:
str
=
''
)
->
None
:
...
...
@@ -856,6 +887,21 @@ class RE(Parser):
string, e.g. use r'\s*' or r'[
\t
]+', but not r'\s+'. If the
respective parameters in the constructor are set to ``None`` the
default whitespace expression from the Grammar object will be used.
Example (allowing whitespace on the right hand side, but not on
the left hand side of a regular expression):
>>> word = RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word)
>>> result = parser('Haus ')
>>> result.content()
'Haus '
>>> result.structure()
'(:RE (:RegExp "Haus") (:Whitespace " "))'
>>> parser(' Haus').content()
' <<< Error on " Haus" | Parser did not match! Invalid source file? >>> '
EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~`
EBNF-Example: `word = /\w+/~`
"""
def
__init__
(
self
,
regexp
,
wL
=
None
,
wR
=
None
,
name
=
''
):
"""Constructor for class RE.
...
...
@@ -1004,6 +1050,30 @@ class NaryOperator(Parser):
class
Optional
(
UnaryOperator
):
"""
Parser `Optional` always matches, even if its child-parser
did not match.
If the child-parser did not match `Optional` returns a node
with no content and does not move forward in the text.
If the child-parser did match, `Optional` returns the a node
with the node returnd by the child-parser as its single
child and the text at the position where the child-parser
left it.
Examples:
>>> number = Optional(Token('-')) + RegExp(r'\d+') + Optional(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content()
'3.14159'
>>> Grammar(number)('3.14159').structure()
'(:Series (:Optional) (:RegExp "3") (:Optional (:RegExp ".14159")))'
>>> Grammar(number)('-1').content()
'-1'
EBNF-Notation: `[ ... ]`
EBNF-Example: `number = ["-"] /\d+/ [ /\.\d+/ ]
"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Optional
,
self
).
__init__
(
parser
,
name
)
# assert isinstance(parser, Parser)
...
...
@@ -1024,6 +1094,7 @@ class Optional(UnaryOperator):
return
'['
+
(
self
.
parser
.
repr
[
1
:
-
1
]
if
isinstance
(
self
.
parser
,
Alternative
)
and
not
self
.
parser
.
name
else
self
.
parser
.
repr
)
+
']'
class
ZeroOrMore
(
Optional
):
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
results
=
()
# type: Tuple[Node, ...]
...
...
@@ -1120,12 +1191,12 @@ class Alternative(NaryOperator):
# the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> Grammar(number)("3.1416").
show
()
>>> Grammar(number)("3.1416").
content
()
'3 <<< Error on ".1416" | Parser stopped before end! trying to recover... >>> '
# the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> Grammar(number)("3.1416").
show
()
>>> Grammar(number)("3.1416").
content
()
'3.1416'
"""
...
...
@@ -1246,7 +1317,6 @@ class Lookbehind(FlowOperator):
assert
isinstance
(
p
,
RegExp
),
str
(
type
(
p
))
self
.
regexp
=
p
.
main
.
regexp
if
isinstance
(
p
,
RE
)
else
p
.
regexp
super
(
Lookbehind
,
self
).
__init__
(
parser
,
name
)
print
(
"WARNING: Lookbehind Operator is experimental!"
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
if
self
.
sign
(
self
.
condition
()):
...
...
@@ -1262,7 +1332,10 @@ class Lookbehind(FlowOperator):
def
condition
(
self
):
node
=
self
.
grammar
.
last_node__
return
node
and
self
.
regexp
.
match
(
str
(
node
))
assert
node
is
not
None
# can be removed
s
=
str
(
node
)
assert
s
or
node
.
parser
.
name
==
'__ZOMBIE__'
,
str
(
node
.
parser
)
return
self
.
regexp
.
match
(
s
)
class
NegativeLookbehind
(
Lookbehind
):
...
...
DHParser/syntaxtree.py
View file @
813bebe5
...
...
@@ -132,6 +132,17 @@ StrictResultType = Union[ChildrenType, str]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
str
,
None
]
def
oneliner_sxpr
(
sxpr
:
str
)
->
str
:
"""Returns S-expression `sxpr` as a one liner without unnecessary
whitespace.
Example:
>>> oneliner_sxpr('(a
\\
n (b
\\
n c
\\
n )
\\
n)
\\
n')
'(a (b c))'
"""
return
re
.
sub
(
'\s(?=\))'
,
''
,
re
.
sub
(
'\s+'
,
' '
,
sxpr
)).
strip
()
class
Node
:
"""
Represents a node in the concrete or abstract syntax tree.
...
...
@@ -259,13 +270,34 @@ class Node:
def
errors
(
self
)
->
List
[
Error
]:
return
[
Error
(
self
.
pos
,
err
)
for
err
in
self
.
_errors
]
def
show
(
self
)
->
str
:
"""Returns content as string, inserting error messages where
errors occurred.
def
add_error
(
self
,
error_str
)
->
'Node'
:
self
.
_errors
.
append
(
error_str
)
self
.
error_flag
=
True
return
self
def
propagate_error_flags
(
self
)
->
None
:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
s
=
""
.
join
(
child
.
show
()
for
child
in
self
.
children
)
if
self
.
children
\
else
str
(
self
.
result
)
return
(
' <<< Error on "%s" | %s >>> '
%
(
s
,
'; '
.
join
(
self
.
_errors
)))
if
self
.
_errors
else
s
for
child
in
self
.
children
:
child
.
propagate_error_flags
()
self
.
error_flag
=
self
.
error_flag
or
child
.
error_flag
def
collect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]:
"""
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
errors
=
self
.
errors
if
clear_errors
:
self
.
_errors
=
[]
self
.
error_flag
=
False
if
self
.
children
:
for
child
in
self
.
children
:
errors
.
extend
(
child
.
collect_errors
(
clear_errors
))
return
errors
def
_tree_repr
(
self
,
tab
,
openF
,
closeF
,
dataF
=
identity
,
density
=
0
)
->
str
:
"""
...
...
@@ -363,39 +395,20 @@ class Node:
return
self
.
_tree_repr
(
' '
,
opening
,
closing
,
density
=
1
)
def
add_error
(
self
,
error_str
)
->
'Node'
:
self
.
_errors
.
append
(
error_str
)
self
.
error_flag
=
True
return
self
def
propagate_error_flags
(
self
)
->
None
:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for
child
in
self
.
children
:
child
.
propagate_error_flags
()
self
.
error_flag
=
self
.
error_flag
or
child
.
error_flag
def
structure
(
self
)
->
str
:
"""Return structure (and content) as S-expression on a single line
without any line breaks."""
return
oneliner_sxpr
(
self
.
as_sxpr
())
def
co
llect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]
:
def
co
ntent
(
self
)
->
str
:
"""
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
Returns content as string, inserting error messages where
errors occurred.
"""
errors
=
self
.
errors
if
clear_errors
:
self
.
_errors
=
[]
self
.
error_flag
=
False
if
self
.
children
:
for
child
in
self
.
children
:
errors
.
extend
(
child
.
collect_errors
(
clear_errors
))
return
errors
def
log
(
self
,
log_file_name
):
st_file_name
=
log_file_name
with
open
(
os
.
path
.
join
(
log_dir
(),
st_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
self
.
as_sxpr
())
s
=
""
.
join
(
child
.
content
()
for
child
in
self
.
children
)
if
self
.
children
\
else
str
(
self
.
result
)
return
(
' <<< Error on "%s" | %s >>> '
%
(
s
,
'; '
.
join
(
self
.
_errors
)))
if
self
.
_errors
else
s
def
find
(
self
,
match_function
:
Callable
)
->
Iterator
[
'Node'
]:
"""Finds nodes in the tree that match a specific criterion.
...
...
@@ -458,6 +471,11 @@ class Node:
# return self.result,
# return nav(path.split('/'))
def
log
(
self
,
log_file_name
):
st_file_name
=
log_file_name
with
open
(
os
.
path
.
join
(
log_dir
(),
st_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
self
.
as_sxpr
())
def
mock_syntax_tree
(
sxpr
):
"""
...
...
@@ -511,17 +529,6 @@ def mock_syntax_tree(sxpr):
return
Node
(
MockParser
(
name
,
':'
+
class_name
),
result
)
def
compact_sxpr
(
s
)
->
str
:
"""Returns S-expression ``s`` as a one liner without unnecessary
whitespace.
Example:
>>> compact_sxpr('(a
\\
n (b
\\
n c
\\
n )
\\
n)
\\
n')
'(a (b c))'
"""
return
re
.
sub
(
'\s(?=\))'
,
''
,
re
.
sub
(
'\s+'
,
' '
,
s
)).
strip
()
TransformationFunc
=
Union
[
Callable
[[
Node
],
Any
],
partial
]
...
...
DHParser/testing.py
View file @
813bebe5
...
...
@@ -28,7 +28,7 @@ except ImportError:
from
DHParser
import
error_messages
from
DHParser.toolkit
import
is_logging
from
DHParser.syntaxtree
import
mock_syntax_tree
,
compact
_sxpr
from
DHParser.syntaxtree
import
mock_syntax_tree
,
oneliner
_sxpr
__all__
=
(
'unit_from_configfile'
,
'unit_from_json'
,
...
...
@@ -171,8 +171,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errata
.
append
(
'Abstract syntax tree test "%s" for parser "%s" failed:'
'
\n\t
Expr.: %s
\n\t
Expected: %s
\n\t
Received: %s'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
compact
_sxpr
(
compare
.
as_sxpr
()),
compact
_sxpr
(
ast
.
as_sxpr
())))
oneliner
_sxpr
(
compare
.
as_sxpr
()),
oneliner
_sxpr
(
ast
.
as_sxpr
())))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
verbose
:
print
(
infostr
+
(
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
))
...
...
examples/LaTeX/LaTeX.ebnf
View file @
813bebe5
...
...
@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment
&PRED
_LB
end_generic_block = -&SUCC_LB end_environment
&PRED
_LB
begin_generic_block = -&SUCC_LB begin_environment
-&SUCC
_LB
end_generic_block = -&SUCC_LB end_environment
-&SUCC
_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -86,8 +86,8 @@ text_elements = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment
!PRED
_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment
!PRED
_LB)
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment
-!SUCC
_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment
-!SUCC
_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
...
...
@@ -144,7 +144,7 @@ WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF =
!/.
/
EOF =
/(?!.)
/
SUCC_LB = /(?:.*\n)+\s*$/
# linebreak succeeding an arbitrary chunk of text
PRED_LB = /\s*
?\n/
# linebreak preeceding any text
SUCC_LB = /(?
!.)|(?
:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
#
PRED_LB = /\s*
(?!.)|\s*?\n/
# linebreak preeceding any text
examples/LaTeX/LaTeXCompiler.py
View file @
813bebe5
...
...
@@ -15,29 +15,29 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser
.toolkit
import
logging
,
is_filename
from
DHParser.parser
import
Grammar
,
Compiler
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Optional
,
OneOrMore
,
Series
,
RE
,
Capture
,
\
from
DHParser
import
logging
,
is_filename
,
Grammar
,
Compiler
,
Lookbehind
,
Alternative
,
Pop
,
\
Required
,
Token
,
Synonym
,
\
Optional
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
from
DHParser.syntaxtree
import
traverse
,
remove_brackets
,
reduce_single_child
,
replace_by_single_child
,
\
remove_expendables
,
flatten
,
join
,
\
collapse
,
replace_content
,
TransformationFunc
,
\
remove_empty
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
\
traverse
,
join
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
remove_empty
,
flatten
,
\
collapse
,
replace_content
,
remove_brackets
#######################################################################
#
#
SCANNE
R SECTION - Can be edited. Changes will be preserved.
#
PREPROCESSO
R SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def
LaTeX
Scanne
r
(
text
):
def
LaTeX
Preprocesso
r
(
text
):
return
text
def
get_
scanne
r
()
->
PreprocessorFunc
:
return
LaTeX
Scanne
r
def
get_
preprocesso
r
()
->
PreprocessorFunc
:
return
LaTeX
Preprocesso
r
#######################################################################
...
...
@@ -104,12 +104,12 @@ class LaTeXGrammar(Grammar):
#### block environments ####
# TODO: ambiguity between generic bock envieronments and generic inline environments
block_environment = known_environment | generic_environment
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_environment = begin_environment sequence §end_environment
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment -&SUCC_LB
end_generic_block = -&SUCC_LB end_environment -&SUCC_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -136,7 +136,9 @@ class LaTeXGrammar(Grammar):
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_environment { text_elements }+ §end_environment
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment -!SUCC_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment -!SUCC_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
...
...
@@ -190,41 +192,45 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ #
LF
but not an empty line
LF = !PARSEP /[ \t]*\n[ \t]*/ #
linefeed
but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
EOF = /(?!.)/
SUCC_LB = /(?!.)|(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
# PRED_LB = /\s*(?!.)|\s*?\n/ # linebreak preeceding any text
"""
block_environment
=
Forward
()
block_of_paragraphs
=
Forward
()
text_elements
=
Forward
()
source_hash__
=
"
9a8cba2b425d276af78e141d7dda162c
"
source_hash__
=
"
eb91cd592f8a8c60a796ba705a121b72
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
wspL__
=
''
wspR__
=
WSP__
EOF
=
NegativeLookahead
(
RE
(
'.'
,
wR
=
''
))
PARSEP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n[
\\
t]*'
,
wR
=
''
)
LF
=
Series
(
NegativeLookahead
(
PARSEP
),
RE
(
'[
\\
t]*
\\
n[
\\
t]*'
,
wR
=
''
))
WSPC
=
RE
(
'[
\\
t]+'
,
wR
=
''
)
TEXTCHUNK
=
RE
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
,
wR
=
''
)
BRACKETS
=
RE
(
'[
\\
[
\\
]]'
,
wR
=
''
)
ESCAPED
=
RE
(
'
\\\\
[%$&_/]'
,
wR
=
''
)
SUCC_LB
=
RegExp
(
'(?!.)|(?:.*
\\
n)+
\\
s*$'
)
EOF
=
RegExp
(
'(?!.)'
)
PARSEP
=
RegExp
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n[
\\
t]*'
)
LF
=
Series
(
NegativeLookahead
(
PARSEP
),
RegExp
(
'[
\\
t]*
\\
n[
\\
t]*'
))
WSPC
=
RegExp
(
'[
\\
t]+'
)
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/]'
)
MATH
=
RE
(
'[
\\
w_^{}[
\\
]]*'
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
CMDNAME
=
RE
(
'
\\\\
(?:(?!_)
\\
w)+'
)
structural
=
Alternative
(
Token
(
"subsection"
),
Token
(
"section"
),
Token
(
"chapter"
),
Token
(
"subsubsection"
),
Token
(
"paragraph"
),
Token
(
"subparagraph"
),
Token
(
"item"
))
blockcmd
=
Series
(
RE
(
'[
\\\\
]'
,
wR
=
''
),
Alternative
(
Series
(
Alternative
(
Token
(
"begin{"
),
Token
(
"end{
"
))
,
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"
figure
"
),
Token
(
"
quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)
),
Token
(
"}"
)),
structural
))
structural
=
Alternative
(
Token
(
"subsection"
),
Token
(
"section"
),
Token
(
"chapter"
),
Token
(
"subsubsection"
),
Token
(
"paragraph"
),
Token
(
"subparagraph"
),
Token
(
"item
"
))
blockcmd
=
Series
(
RegExp
(
'[
\\\\
]'
),
Alternative
(
Series
(
Alternative
(
Token
(
"
begin{
"
),
Token
(
"
end{"
)
),
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"figure"
),
Token
(
"quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)),
Token
(
"}"
)),
structural
))
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
block
=
Series
(
R
E
(
'{'
,
wR
=
'
'
),
ZeroOrMore
(
text_elements
),
Required
(
R
E
(
'}'
,
wR
=
'
'
)))
block
=
Series
(
R
egExp
(
'{
'
),
ZeroOrMore
(
text_elements
),
Required
(
R
egExp
(
'}
'
)))
config
=
Series
(
Token
(
"["
),
cfgtext
,
Required
(
Token
(
"]"
)))
caption
=
Series
(
Token
(
"
\\
caption"
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
config
,
block
)
...
...
@@ -235,13 +241,18 @@ class LaTeXGrammar(Grammar):
inline_math
=
Series
(
Token
(
"$"
),
MATH
,
Token
(
"$"
))
end_environment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_environment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
generic_inline_env
=
Series
(
begin_environment
,
OneOrMore
(
text_elements
),
Required
(
end_environment
))
end_inline_env
=
Alternative
(
Series
(
NegativeLookbehind
(
SUCC_LB
),
end_environment
),
Series
(
end_environment
,
NegativeLookbehind
(
SUCC_LB
)))
begin_inline_env
=
Alternative
(
Series
(
NegativeLookbehind
(
SUCC_LB
),
begin_environment
),
Series
(
begin_environment
,
NegativeLookbehind
(
SUCC_LB
)))
generic_inline_env
=
Series
(
begin_inline_env
,
OneOrMore
(
text_elements
),
Required
(
end_inline_env
))
known_inline_env
=
Synonym
(
inline_math
)
inline_environment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_elements
.
set
(
Alternative
(
command
,
text
,
block
,
inline_environment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_elements
,
RE
(
''
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_environment
),
Optional
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
R
E
(
'{'
,
wR
=
'
'
),
sequence
,
Required
(
R
E
(
'}'
,
wR
=
'
'
))))
block_of_paragraphs
.
set
(
Series
(
R
egExp
(
'{
'
),
sequence
,
Required
(
R
egExp
(
'}
'
))))
table_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Token
(
"}"
))
table
=
Series
(
Token
(
"
\\
begin{tabular}"
),
table_config
,
sequence
,
Token
(
"
\\
end{tabular}"
))
verbatim
=
Series
(
Token
(
"
\\
begin{verbatim}"
),
sequence
,
Token
(
"
\\
end{verbatim}"
))
...
...
@@ -251,9 +262,11 @@ class LaTeXGrammar(Grammar):
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
generic_environment
=
Series
(
begin_environment
,
sequence
,
Required
(
end_environment
))
end_generic_block
=
Series
(
Lookbehind
(
SUCC_LB
),
end_environment
,
Lookbehind
(
SUCC_LB
))
begin_generic_block
=
Series
(
Lookbehind
(
SUCC_LB
),
begin_environment
,
Lookbehind
(
SUCC_LB
))
generic_block
=
Series
(
begin_generic_block
,
sequence
,
Required
(
end_generic_block
))
known_environment
=
Alternative
(
itemize
,
enumerate
,
figure
,
table
,
quotation
,
verbatim
)
block_environment
.
set
(
Alternative
(
known_environment
,
generic_
environment
))
block_environment
.
set
(
Alternative
(
known_environment
,
generic_
block
))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Optional
(
PARSEP
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Optional
(
PARSEP
))
SubParagraph
=
Series
(
Token
(
"
\\
subparagpaph"
),
block
,
Optional
(
PARSEP
),
ZeroOrMore
(
sequence
))
...
...
@@ -369,7 +382,7 @@ class LaTeXCompiler(Compiler):
assert
re
.
match
(
'\w+\Z'
,
grammar_name
)
def
on_latexdoc
(
self
,
node
):
return
node
.
as_sexpr
()
return
node
def
on_preamble
(
self
,
node
):
pass
...
...
@@ -377,10 +390,91 @@ class LaTeXCompiler(Compiler):
def
on_document
(
self
,
node
):