Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
a4bed6e6
Commit
a4bed6e6
authored
Aug 08, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- Refactoring LaTeX.ebnf
parent
a765df39
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
202 additions
and
144 deletions
+202
-144
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+29
-7
DHParser/testing.py
DHParser/testing.py
+6
-9
examples/LaTeX/LaTeX.ebnf
examples/LaTeX/LaTeX.ebnf
+23
-21
examples/LaTeX/LaTeXCompiler.py
examples/LaTeX/LaTeXCompiler.py
+57
-51
examples/LaTeX/OLDSTUFF/LaTeX.ebnf
examples/LaTeX/OLDSTUFF/LaTeX.ebnf
+0
-36
examples/LaTeX/grammar_tests/01_test_text.ini
examples/LaTeX/grammar_tests/01_test_text.ini
+28
-0
examples/LaTeX/grammar_tests/02_test_paragraph.ini
examples/LaTeX/grammar_tests/02_test_paragraph.ini
+5
-0
examples/LaTeX/grammar_tests/03_test_environment.ini
examples/LaTeX/grammar_tests/03_test_environment.ini
+11
-5
examples/LaTeX/grammar_tests/PENDING/test_tabular.ini
examples/LaTeX/grammar_tests/PENDING/test_tabular.ini
+36
-0
examples/LaTeX/grammar_tests/test_text.ini
examples/LaTeX/grammar_tests/test_text.ini
+0
-8
examples/LaTeX/tst_LaTeX_grammar.py
examples/LaTeX/tst_LaTeX_grammar.py
+1
-1
test/test_testing.py
test/test_testing.py
+6
-6
No files found.
DHParser/syntaxtree.py
View file @
a4bed6e6
...
...
@@ -133,12 +133,12 @@ StrictResultType = Union[ChildrenType, str]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
str
,
None
]
def
oneliner
_sxpr
(
sxpr
:
str
)
->
str
:
"""Returns S-expression `sxpr` as a one
liner without unnecessary
def
flatten
_sxpr
(
sxpr
:
str
)
->
str
:
"""Returns S-expression `sxpr` as a one
-
liner without unnecessary
whitespace.
Example:
>>>
oneliner
_sxpr('(a
\\
n (b
\\
n c
\\
n )
\\
n)
\\
n')
>>>
flatten
_sxpr('(a
\\
n (b
\\
n c
\\
n )
\\
n)
\\
n')
'(a (b c))'
"""
return
re
.
sub
(
'\s(?=\))'
,
''
,
re
.
sub
(
'\s+'
,
' '
,
sxpr
)).
strip
()
...
...
@@ -199,11 +199,13 @@ class Node:
self
.
_pos
=
-
1
# type: int
self
.
parser
=
parser
or
ZOMBIE_PARSER
def
__str__
(
self
):
if
self
.
children
:
return
""
.
join
(
str
(
child
)
for
child
in
self
.
children
)
return
str
(
self
.
result
)
def
__repr__
(
self
):
mpargs
=
{
'name'
:
self
.
parser
.
name
,
'ptype'
:
self
.
parser
.
ptype
}
parg
=
"MockParser({name}, {ptype})"
.
format
(
**
mpargs
)
...
...
@@ -211,30 +213,35 @@ class Node:
"("
+
", "
.
join
(
repr
(
child
)
for
child
in
self
.
children
)
+
")"
return
"Node(%s, %s)"
%
(
parg
,
rarg
)
def
__eq__
(
self
,
other
):
# return str(self.parser) == str(other.parser) and self.result == other.result
return
self
.
tag_name
==
other
.
tag_name
and
self
.
result
==
other
.
result
def
__hash__
(
self
):
return
hash
(
self
.
tag_name
)
def
__deepcopy__
(
self
,
memodict
=
{}):
result
=
copy
.
deepcopy
(
self
.
result
)
other
=
Node
(
self
.
parser
,
result
)
other
.
_pos
=
self
.
_pos
return
other
@
property
# this needs to be a (dynamic) property, in case sef.parser gets updated
def
tag_name
(
self
)
->
str
:
return
self
.
parser
.
name
or
self
.
parser
.
ptype
@
property
def
result
(
self
)
->
StrictResultType
:
return
self
.
_result
@
result
.
setter
def
result
(
self
,
result
:
ResultType
):
# # made obsolete by static type checking with mypy
is done
# # made obsolete by static type checking with mypy
# assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result))
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
...
...
@@ -244,15 +251,18 @@ class Node:
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
_children
)
\
if
self
.
_children
else
False
# type: bool
@
property
def
children
(
self
)
->
ChildrenType
:
return
self
.
_children
@
property
def
len
(
self
)
->
int
:
# DEBUGGING: print(self.tag_name, str(self.pos), str(self._len), str(self)[:10].replace('\n','.'))
return
self
.
_len
@
property
def
pos
(
self
)
->
int
:
assert
self
.
_pos
>=
0
,
"position value not initialized!"
...
...
@@ -267,16 +277,19 @@ class Node:
child
.
pos
=
pos
+
offset
offset
+=
child
.
len
@
property
def
errors
(
self
)
->
List
[
Error
]:
return
[
Error
(
self
.
pos
,
err
)
for
err
in
self
.
_errors
]
def
add_error
(
self
,
error_str
:
str
)
->
'Node'
:
assert
isinstance
(
error_str
,
str
)
self
.
_errors
.
append
(
error_str
)
self
.
error_flag
=
True
return
self
def
propagate_error_flags
(
self
)
->
None
:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
...
...
@@ -286,6 +299,7 @@ class Node:
child
.
propagate_error_flags
()
self
.
error_flag
=
self
.
error_flag
or
child
.
error_flag
def
collect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]:
"""
Returns all errors of this node or any child node in the form
...
...
@@ -301,6 +315,7 @@ class Node:
errors
.
extend
(
child
.
collect_errors
(
clear_errors
))
return
errors
def
_tree_repr
(
self
,
tab
,
openF
,
closeF
,
dataF
=
identity
,
density
=
0
)
->
str
:
"""
Generates a tree representation of this node and its children
...
...
@@ -346,6 +361,7 @@ class Node:
else
:
return
head
+
'
\n
'
.
join
([
tab
+
dataF
(
s
)
for
s
in
res
.
split
(
'
\n
'
)])
+
tail
.
lstrip
(
D
)
def
as_sxpr
(
self
,
src
:
str
=
None
)
->
str
:
"""
Returns content as S-expression, i.e. in lisp-like form.
...
...
@@ -373,6 +389,7 @@ class Node:
return
self
.
_tree_repr
(
' '
,
opening
,
lambda
node
:
'
\n
)'
,
pretty
,
density
=
0
)
def
as_xml
(
self
,
src
:
str
=
None
)
->
str
:
"""
Returns content as XML-tree.
...
...
@@ -397,10 +414,12 @@ class Node:
return
self
.
_tree_repr
(
' '
,
opening
,
closing
,
density
=
1
)
def
structure
(
self
)
->
str
:
"""Return structure (and content) as S-expression on a single line
without any line breaks."""
return
oneliner_sxpr
(
self
.
as_sxpr
())
return
flatten_sxpr
(
self
.
as_sxpr
())
def
content
(
self
)
->
str
:
"""
...
...
@@ -412,6 +431,7 @@ class Node:
return
(
' <<< Error on "%s" | %s >>> '
%
(
s
,
'; '
.
join
(
self
.
_errors
)))
if
self
.
_errors
else
s
def
find
(
self
,
match_function
:
Callable
)
->
Iterator
[
'Node'
]:
"""Finds nodes in the tree that match a specific criterion.
...
...
@@ -433,6 +453,7 @@ class Node:
for
nd
in
child
.
find
(
match_function
):
yield
nd
# def range(self, match_first, match_last):
# """Iterates over the range of nodes, starting from the first
# node for which ``match_first`` becomes True until the first node
...
...
@@ -473,13 +494,14 @@ class Node:
# return self.result,
# return nav(path.split('/'))
def
log
(
self
,
log_file_name
):
if
is_logging
():
st_file_name
=
log_file_name
with
open
(
os
.
path
.
join
(
log_dir
(),
st_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
os
.
path
.
join
(
log_dir
(),
log_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
self
.
as_sxpr
())
def
mock_syntax_tree
(
sxpr
):
"""
Generates a tree of nodes from an S-expression.
...
...
DHParser/testing.py
View file @
a4bed6e6
...
...
@@ -27,8 +27,7 @@ except ImportError:
import
re
from
DHParser
import
error_messages
from
DHParser.toolkit
import
is_logging
from
DHParser.syntaxtree
import
mock_syntax_tree
,
oneliner_sxpr
from
DHParser.syntaxtree
import
mock_syntax_tree
,
flatten_sxpr
__all__
=
(
'unit_from_configfile'
,
'unit_from_json'
,
...
...
@@ -150,15 +149,15 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr
=
' match-test "'
+
test_name
+
'" ... '
errflag
=
len
(
errata
)
cst
=
parser
(
test_code
,
parser_name
)
cst
.
log
(
"
match_%s_%s.cst"
%
(
parser_name
,
test_name
))
cst
.
log
(
"
%s_match_%s_%s.cst"
%
(
unit_name
,
parser_name
,
test_name
))
tests
.
setdefault
(
'__cst__'
,
{})[
test_name
]
=
cst
if
"ast"
in
tests
or
report
:
ast
=
copy
.
deepcopy
(
cst
)
transform
(
ast
)
tests
.
setdefault
(
'__ast__'
,
{})[
test_name
]
=
ast
ast
.
log
(
"
match_%s_%s.ast"
%
(
parser_name
,
test_name
))
ast
.
log
(
"
%s_match_%s_%s.ast"
%
(
unit_name
,
parser_name
,
test_name
))
if
cst
.
error_flag
:
errata
.
append
(
'Match test "%s" for parser "%s" failed:
\n\t
Expr.: %s
\n\n\t
%s'
%
errata
.
append
(
'Match test "%s" for parser "%s" failed:
\n\t
Expr.: %s
\n\n\t
%s
\n\n
'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
'
\n\t
'
.
join
(
m
.
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
error_messages
(
test_code
,
cst
.
collect_errors
()))))
...
...
@@ -174,8 +173,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errata
.
append
(
'Abstract syntax tree test "%s" for parser "%s" failed:'
'
\n\t
Expr.: %s
\n\t
Expected: %s
\n\t
Received: %s'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
oneliner
_sxpr
(
compare
.
as_sxpr
()),
oneliner
_sxpr
(
ast
.
as_sxpr
())))
flatten
_sxpr
(
compare
.
as_sxpr
()),
flatten
_sxpr
(
ast
.
as_sxpr
())))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
verbose
:
print
(
infostr
+
(
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
))
...
...
@@ -187,8 +186,6 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr
=
' fail-test "'
+
test_name
+
'" ... '
errflag
=
len
(
errata
)
cst
=
parser
(
test_code
,
parser_name
)
# doesn't make sense to write cst for fail-tests
# cst.log("fail_%s_%s.cst" % (parser_name, test_name))
if
not
cst
.
error_flag
:
errata
.
append
(
'Fail test "%s" for parser "%s" yields match instead of '
'expected failure!'
%
(
test_name
,
parser_name
))
...
...
examples/LaTeX/LaTeX.ebnf
View file @
a4bed6e6
...
...
@@ -54,11 +54,11 @@ Index = "\printindex" [PARSEP]
#### block environments ####
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | tab
le
| quotation
known_environment = itemize | enumerate | figure | tab
ular
| quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment
-&LB
end_generic_block = -&LB end_environment
-&LB
begin_generic_block = -&LB begin_environment
LFF
end_generic_block = -&LB end_environment
LFF
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
...
...
@@ -68,13 +68,13 @@ figure = "\begin{figure}" sequence §"\end{figure}"
quotation = ("\begin{quotation}" sequence §"\end{quotation}")
| ("\begin{quote}" sequence §"\end{quote}")
verbatim = "\begin{verbatim}" sequence §"\end{verbatim}"
tab
le = "\begin{tabular}" table
_config sequence §"\end{tabular}"
tab
le_config
= "{" /[lcr|]+/~ §"}"
tab
ular = "\begin{tabular}" tabular
_config sequence §"\end{tabular}"
tab
ular_config
= "{" /[lcr|]+/~ §"}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
block_of_paragraphs = /{/
~
sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element //~ }+
...
...
@@ -85,20 +85,21 @@ text_element = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env =
(begin_inline_env { text_element }+ §end_inline_env)
begin_inline_env = (-!LB begin_environment) | (begin_environment
-!LB
)
generic_inline_env =
begin_inline_env //~ paragraph §end_inline_env
begin_inline_env = (-!LB begin_environment) | (begin_environment
!LFF
)
end_inline_env = end_environment
#
(-!LB end_environment) | (end_environment -!LB
) # ambiguity with genric_block when EOF
begin_environment =
"\begin{" §NAME §"}"
end_environment =
"\end{" §::NAME §"}"
#
# (-!LB end_environment) | (end_environment !LFF
) # ambiguity with genric_block when EOF
begin_environment =
/\\begin{/ §NAME §/}/
end_environment =
/\\end{/ §::NAME §/}/
inline_math =
"$" /[^$]*/ §"$"
inline_math =
/\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | generic_command
command = known_command |
text_command |
generic_command
known_command = footnote | includegraphics | caption
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
footnote = "\footnote" block_of_paragraphs
...
...
@@ -113,12 +114,9 @@ caption = "\caption" block
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
config = "[" text §"]"
block = /{/ //~ { !blockcmd text_element //~ } §/}/
text = TEXTCHUNK { //~ TEXTCHUNK }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
...
...
@@ -138,13 +136,17 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
CMDNAME = /\\(?:(?!_)\w)+/~
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
NAME = /\w+/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = //~ -&LB WSPC # at least one linefeed
WSPC = { ~/\s+/~ } # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
a4bed6e6
...
...
@@ -107,27 +107,27 @@ class LaTeXGrammar(Grammar):
#### block environments ####
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | tab
le
| quotation
known_environment = itemize | enumerate | figure | tab
ular
| quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&LB begin_environment
-&LB
end_generic_block = -&LB end_environment
-&LB
begin_generic_block = -&LB begin_environment
LFF
end_generic_block = -&LB end_environment
LFF
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
item = "\item" [PARSEP] sequence
figure = "\begin{figure}" sequence "\end{figure}"
quotation = ("\begin{quotation}" sequence "\end{quotation}")
| ("\begin{quote}" sequence "\end{quote}")
verbatim = "\begin{verbatim}" sequence "\end{verbatim}"
tab
le = "\begin{tabular}" table_config sequence
"\end{tabular}"
tab
le_config = "{" /[lcr|]+/~
"}"
figure = "\begin{figure}" sequence
§
"\end{figure}"
quotation = ("\begin{quotation}" sequence
§
"\end{quotation}")
| ("\begin{quote}" sequence
§
"\end{quote}")
verbatim = "\begin{verbatim}" sequence
§
"\end{verbatim}"
tab
ular = "\begin{tabular}" tabular_config sequence §
"\end{tabular}"
tab
ular_config = "{" /[lcr|]+/~ §
"}"
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
block_of_paragraphs = /{/
~
sequence §/}/
sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd text_element //~ }+
...
...
@@ -138,20 +138,21 @@ class LaTeXGrammar(Grammar):
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env =
(begin_inline_env { text_element }+ §end_inline_env)
begin_inline_env = (-!LB begin_environment) | (begin_environment
-!LB
)
generic_inline_env =
begin_inline_env //~ paragraph §end_inline_env
begin_inline_env = (-!LB begin_environment) | (begin_environment
!LFF
)
end_inline_env = end_environment
#
(-!LB end_environment) | (end_environment -!LB
) # ambiguity with genric_block when EOF
begin_environment =
"\begin{" §NAME §"}"
end_environment =
"\end{" §::NAME §"}"
#
# (-!LB end_environment) | (end_environment !LFF
) # ambiguity with genric_block when EOF
begin_environment =
/\\begin{/ §NAME §/}/
end_environment =
/\\end{/ §::NAME §/}/
inline_math =
"$" /[^$]*/ "$"
inline_math =
/\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | generic_command
command = known_command |
text_command |
generic_command
known_command = footnote | includegraphics | caption
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
footnote = "\footnote" block_of_paragraphs
...
...
@@ -166,12 +167,9 @@ class LaTeXGrammar(Grammar):
#######################################################################
config = "[" cfgtext §"]"
block = /{/ { text_element } §/}/
text = { cfgtext | (BRACKETS //~) }+
cfgtext = { word_sequence | (ESCAPED //~) }+
word_sequence = { TEXTCHUNK //~ }+
config = "[" text §"]"
block = /{/ //~ { !blockcmd text_element //~ } §/}/
text = TEXTCHUNK { //~ TEXTCHUNK }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
...
...
@@ -191,13 +189,17 @@ class LaTeXGrammar(Grammar):
CMDNAME = /\\(?:(?!_)\w)+/~
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
NAME = /\w+/~
ESCAPED = /\\[%$&_\/]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = //~ -&LB WSPC # at least one linefeed
WSPC = { ~/\s+/~ } # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
...
...
@@ -211,8 +213,9 @@ class LaTeXGrammar(Grammar):
block_environment
=
Forward
()
block_of_paragraphs
=
Forward
()
end_generic_block
=
Forward
()
paragraph
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
9cdeab7d908861b396d3667373fdcb9a
"
source_hash__
=
"
b06aca9481c1e5bd756caadb8b707dff
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r'%.*(?:\n|$)'
)
...
...
@@ -223,50 +226,52 @@ class LaTeXGrammar(Grammar):
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
GAP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n'
)
PARSEP
=
OneOrMore
(
GAP
)
WSPC
=
ZeroOrMore
(
RE
(
'
\\
s+'
,
wL
=
WSP__
))
LFF
=
Series
(
RE
(
''
),
Lookbehind
(
LB
),
WSPC
)
LF
=
Series
(
NegativeLookahead
(
GAP
),
RegExp
(
'[
\\
t]*
\\
n[
\\
t]*'
))
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/]'
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/{}]'
)
TXTCOMMAND
=
RegExp
(
'
\\\\
text
\\
w+'
)
CMDNAME
=
RE
(
'
\\\\
(?:(?!_)
\\
w)+'
)
structural
=
Alternative
(
Token
(
"subsection"
),
Token
(
"section"
),
Token
(
"chapter"
),
Token
(
"subsubsection"
),
Token
(
"paragraph"
),
Token
(
"subparagraph"
),
Token
(
"item"
))
blockcmd
=
Series
(
BACKSLASH
,
Alternative
(
Series
(
Alternative
(
Token
(
"begin{"
),
Token
(
"end{"
)),
Alternative
(
Token
(
"enumerate"
),
Token
(
"itemize"
),
Token
(
"figure"
),
Token
(
"quote"
),
Token
(
"quotation"
),
Token
(
"tabular"
)),
Token
(
"}"
)),
structural
,
begin_generic_block
,
end_generic_block
))
no_command
=
Alternative
(
Token
(
"
\\
begin{"
),
Token
(
"
\\
end"
),
Series
(
BACKSLASH
,
structural
))
word_sequence
=
OneOrMore
(
Series
(
TEXTCHUNK
,
RE
(
''
)))
cfgtext
=
OneOrMore
(
Alternative
(
word_sequence
,
Series
(
ESCAPED
,
RE
(
''
))))
text
=
OneOrMore
(
Alternative
(
cfgtext
,
Series
(
BRACKETS
,
RE
(
''
))))
block
=
Series
(
RegExp
(
'{'
),
ZeroOrMore
(
text_element
),
Required
(
RegExp
(
'}'
)))
config
=
Series
(
Token
(
"["
),
cfgtext
,
Required
(
Token
(
"]"
)))
text
=
Series
(
TEXTCHUNK
,
ZeroOrMore
(
Series
(
RE
(
''
),
TEXTCHUNK
)))
block
=
Series
(
RegExp
(
'{'
),
RE
(
''
),
ZeroOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
))),
Required
(
RegExp
(
'}'
)))
config
=
Series
(
Token
(
"["
),
text
,
Required
(
Token
(
"]"
)))
caption
=
Series
(
Token
(
"
\\
caption"
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
Optional
(
config
),
block
)
footnote
=
Series
(
Token
(
"
\\
footnote"
),
block_of_paragraphs
)
generic_command
=
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Optional
(
Series
(
Optional
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
)))
text_command
=
Alternative
(
TXTCOMMAND
,
ESCAPED
,
BRACKETS
)
known_command
=
Alternative
(
footnote
,
includegraphics
,
caption
)
command
=
Alternative
(
known_command
,
generic_command
)
inline_math
=
Series
(
Token
(
"$"
),
RegExp
(
'[^$]*'
),
Token
(
"$"
))
end_environment
=
Series
(
Token
(
"
\\
end{"
),
Required
(
Pop
(
NAME
)),
Required
(
Token
(
"}"
)))
begin_environment
=
Series
(
Token
(
"
\\
begin{"
),
Required
(
NAME
),
Required
(
Token
(
"}"
)))
command
=
Alternative
(
known_command
,
text_command
,
generic_command
)
inline_math
=
Series
(
RegExp
(
'
\\
$'
),
RegExp
(
'[^$]*'
),
Required
(
RegExp
(
'
\\
$'
)
))
end_environment
=
Series
(
RegExp
(
'
\\\\
end{'
),
Required
(
Pop
(
NAME
)),
Required
(
RegExp
(
'}'
)))
begin_environment
=
Series
(
RegExp
(
'
\\\\
begin{'
),
Required
(
NAME
),
Required
(
RegExp
(
'}'
)))
end_inline_env
=
Synonym
(
end_environment
)
begin_inline_env
=
Alternative
(
Series
(
NegativeLookbehind
(
LB
),
begin_environment
),
Series
(
begin_environment
,
NegativeLook
behind
(
LB
)))
generic_inline_env
=
Series
(
begin_inline_env
,
OneOrMore
(
text_element
)
,
Required
(
end_inline_env
))
begin_inline_env
=
Alternative
(
Series
(
NegativeLookbehind
(
LB
),
begin_environment
),
Series
(
begin_environment
,
NegativeLook
ahead
(
LFF
)))
generic_inline_env
=
Series
(
begin_inline_env
,
RE
(
''
),
paragraph
,
Required
(
end_inline_env
))
known_inline_env
=
Synonym
(
inline_math
)
inline_environment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
text_element
.
set
(
Alternative
(
command
,
text
,
block
,
inline_environment
))
paragraph
=
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
)))
paragraph
.
set
(
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
)
)))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_environment
),
Optional
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
R
egExp
(
'{'
),
sequence
,
Required
(
RegExp
(
'}'
))))
tab
le_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Token
(
"}"
))
tab
le
=
Series
(
Token
(
"
\\
begin{tabular}"
),
table_config
,
sequence
,
Token
(
"
\\
end{tabular}"
))
verbatim
=
Series
(
Token
(
"
\\
begin{verbatim}"
),
sequence
,
Token
(
"
\\
end{verbatim}"
))
quotation
=
Alternative
(
Series
(
Token
(
"
\\
begin{quotation}"
),
sequence
,
Token
(
"
\\
end{quotation}"
)),
Series
(
Token
(
"
\\
begin{quote}"
),
sequence
,
Token
(
"
\\
end{quote}"
)))
figure
=
Series
(
Token
(
"
\\
begin{figure}"
),
sequence
,
Token
(
"
\\
end{figure}"
))
block_of_paragraphs
.
set
(
Series
(
R
E
(
'{'
),
sequence
,
Required
(
RegExp
(
'}'
))))
tab
ular_config
=
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Required
(
Token
(
"}"
)
))
tab
ular
=
Series
(
Token
(
"
\\
begin{tabular}"
),
tabular_config
,
sequence
,
Required
(
Token
(
"
\\
end{tabular}"
)
))
verbatim
=
Series
(
Token
(
"
\\
begin{verbatim}"
),
sequence
,
Required
(
Token
(
"
\\
end{verbatim}"
)
))
quotation
=
Alternative
(
Series
(
Token
(
"
\\
begin{quotation}"
),
sequence
,
Required
(
Token
(
"
\\
end{quotation}"
))),
Series
(
Token
(
"
\\
begin{quote}"
),
sequence
,
Required
(
Token
(
"
\\
end{quote}"
)
)))
figure
=
Series
(
Token
(
"
\\
begin{figure}"
),
sequence
,
Required
(
Token
(
"
\\
end{figure}"
)
))
item
=
Series
(
Token
(
"
\\
item"
),
Optional
(
PARSEP
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Optional
(
PARSEP
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
end_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
end_environment
,
L
ookbehind
(
LB
)
))
begin_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
begin_environment
,
L
ookbehind
(
LB
)
))
end_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
end_environment
,
L
FF
))
begin_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
begin_environment
,
L
FF
))
generic_block
=
Series
(
begin_generic_block
,
sequence
,
Required
(
end_generic_block
))
known_environment
=
Alternative
(
itemize
,
enumerate
,
figure
,
tab
le
,
quotation
,
verbatim
)
known_environment
=
Alternative
(
itemize
,
enumerate
,
figure
,
tab
ular
,
quotation
,
verbatim
)
block_environment
.
set
(
Alternative
(
known_environment
,
generic_block
))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Optional
(
PARSEP
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Optional
(
PARSEP
))
...
...
@@ -369,17 +374,18 @@ LaTeX_AST_transformation_table = {
"inline_math"
:
[
remove_brackets
,
reduce_single_child
],
"command"
:
[],
"known_command"
:
[],
"text_command"
:
[],
"generic_command"
:
[
flatten
],
"footnote"
:
[],
"includegraphics"
:
[],
"caption"
:
[],
"config"
:
[
remove_brackets
],
"block"
:
[
remove_brackets
,
reduce_single_child
(
is_anonymous
)
],
"block"
:
[
remove_brackets
,
flatten
],
"text"
:
collapse
,
"cfgtext, word_sequence"
:
[],
"no_command, blockcmd"
:
[],
"structural"
:
[],
"CMDNAME"
:
[
remove_whitespace
,
reduce_single_child
(
is_anonymous
)],
"TXTCOMMAND"
:
[
remove_whitespace
,
reduce_single_child
(
is_anonymous
)],
"NAME"
:
[
reduce_single_child
,
remove_whitespace
,
reduce_single_child
],
"ESCAPED"
:
[
replace_content
(
lambda
node
:
str
(
node
)[
1
:])],
"BRACKETS"
:
[],
...
...
examples/LaTeX/OLDSTUFF/LaTeX.ebnf
deleted
100644 → 0
View file @
a765df39
# latex Grammar
@ whitespace = /[ \t]*\n?(?!\s*\n)[ \t]*/ # whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
genericenv = beginenv sequence §endenv
beginenv = "\begin" §( "{" name "}" )
endenv = "\end" §( "{" ::name "}" )
name = /\w+/~
comand = cmdname [ config ] block
cmdname = /\\\w+/
config = "[" cfgtext §"]"
sequence = { partext | parblock }
parblock = "{" { partext | parblock } §"}"
block = "{" { text | block } §"}"
partext = text | PARSEP
text = cfgtext | brackets
cfgtext = chunk | escaped | WSPC
ESCAPED = /\\[%$&]/
BRACKET = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]*\n?(?!\s*\n)[ \t]*/ # whitespace, including at most one linefeed
LF = /[ \t]*\n(?!\s*\n)/ # a linefeed, but not an empty line (i.e. par)
PARSEP = /\s*\n\s*\n/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
examples/LaTeX/grammar_tests/01_test_text.ini
0 → 100644
View file @
a4bed6e6
[match:text]
1
:
Some
plain
text
[fail:text]
1
:
Low-level
text
must
not
contain
\&
escaped
characters.
2
:
Low-level
text
must
not
contain
]
[
brackets.
3
:
Low-level
text
must
not
contain
{
environments
}.
4
:
Low-level
text
must
not
contain
any
\commands.
[match:text_element]
1
:
\command
2
:
\textbackslash
3
:
\footnote{footnote}
4
:
[
5
:
\begin{generic}
unknown
inline
environment
\end{generic}
6
:
\begin{small}
known
inline
environment
\end{small}
7:
{\em
block}
examples/LaTeX/grammar_tests/test_paragraph.ini
→
examples/LaTeX/grammar_tests/
02_
test_paragraph.ini
View file @
a4bed6e6
...
...
@@ -26,6 +26,11 @@
8
:
Unknwon
\xy
commands
within
paragraphs
may
be
simple
or
\xy{complex}.
9
:
paragraphs
may
contain
all
of
these:
\{
escaped
\}
characters,
{\bf
blocks},
[ brackets ]
,
\begin{tiny}
environments
\end{tiny}
and
\textbackslash
text-commands
or
other
commands
like
this
\footnote{footnote}
[fail:paragraph]
1
:
\begin{enumerate}
...
...
examples/LaTeX/grammar_tests/test_environment.ini
→
examples/LaTeX/grammar_tests/
03_
test_environment.ini
View file @
a4bed6e6
...
...
@@ -34,20 +34,27 @@
[match:inline_environment]
1
:
"""\begin{generic}inline
environment\end{generic}
"""
1
:
"""\begin{generic}inline
environment\end{generic}"""
2
:
"""\begin{generic}inline
environment
\end{generic}
"""
\end{generic}"""
3
:
"$
inline
math
$"
[fail:inline_environment]
3
:
"""\begin{generic}