Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
40fd996b
Commit
40fd996b
authored
Apr 01, 2018
by
eckhart
Browse files
- log.py: Better html-output; LaTeX-example continued
parent
426f5db8
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
40fd996b
...
...
@@ -44,7 +44,7 @@ from DHParser.parse import Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
from
DHParser.log
import
log_parsing_history
,
log_ST
,
is_logging
,
logfile_basename
from
DHParser.toolkit
import
typing
,
sane_parser_name
,
load_if_file
from
typing
import
Any
,
Optional
,
Tuple
,
List
from
typing
import
Any
,
Optional
,
Tuple
,
List
,
Callable
__all__
=
(
'CompilerError'
,
'Compiler'
,
'compile_source'
)
...
...
@@ -186,9 +186,8 @@ class Compiler:
result
=
compiler
(
node
)
self
.
context
.
pop
()
if
result
is
None
:
raise
CompilerError
((
'Method %s returned `None` instead of a '
'valid compilation result!'
)
%
str
(
compiler
))
raise
CompilerError
(
'Method on_%s returned `None` instead of a '
'valid compilation result!'
%
elem
)
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
...
...
DHParser/log.py
View file @
40fd996b
...
...
@@ -204,20 +204,22 @@ class HistoryRecord:
Snapshot
=
collections
.
namedtuple
(
'Snapshot'
,
[
'line'
,
'column'
,
'stack'
,
'status'
,
'text'
])
COLGROUP
=
'<colgroup>
\n
<col style="width:2%"/><col style="width:2%"/><col style="width:75"/>'
\
'<col style="width:6%"/><col style="width:15%"/>
\n
</colgroup>
\n
'
HTML_LEAD_IN
=
(
'<col style="width:6%"/><col style="width:15%"/>
\n
</colgroup>'
HEADINGS
=
(
'<tr><th>L</th><th>C</th><th>parser calling sequence</th>'
'<th>success</th><th>text to parse</th></tr>'
)
HTML_LEAD_IN
=
(
'<!DOCTYPE html>
\n
'
'<html>
\n
<head>
\n
<meta charset="utf-8"/>
\n
<style>
\n
'
'td
.line, td.column
{font-family:monospace;
color:darkgrey}
\n
'
'
td.stack{font-family:monospace}
\n
'
'td.
status{font-family:monospace;font-weight:bold
}
\n
'
'td.
text{font-family:monospace;color:darkblue
}
\n
'
't
able{border-spacing: 0px; border: thin solid darkgrey; width:100%
}
\n
'
't
d
{border-
right: thin solid grey
; border
-bottom
: thin solid
grey
}
\n
'
'span
.delimiter
{color:grey;}
\n
span.match{color:darkgreen}
\n
'
'span.fail{color:darkgrey}
\n
span.error{color:red}
\n
'
'span.matchstack{font-weight:bold;color:darkred}'
'
\n
</style>
\n
</head>
\n
<body>
\n
<table>
\n
'
+
COLGROUP
)
HTML_LEAD_OUT
=
'
\n
</
table>
\n
</
body>
\n
</html>
\n
'
'td
,th
{font-family:monospace;
'
'
border-right: thin solid grey; border-bottom: thin solid grey}
\n
'
'td.
line, td.column {color:darkgrey}
\n
'
# 'td.stack {
}\n'
'td.
status {font-weight:bold
}
\n
'
't
d.text {color:darkblue
}
\n
'
't
able
{border-
spacing: 0px
; border: thin solid
darkgrey; width:100%
}
\n
'
'span
{color:grey;}
\n
span.match
{color:darkgreen}
\n
'
'span.fail
{color:darkgrey}
\n
span.error
{color:red}
\n
'
'span.matchstack
{font-weight:bold;color:darkred}'
'
\n
</style>
\n
</head>
\n
<body>
\n
'
)
HTML_LEAD_OUT
=
'
\n
</body>
\n
</html>
\n
'
def
__init__
(
self
,
call_stack
:
List
[
'Parser'
],
node
:
Node
,
text
:
StringView
)
->
None
:
# copy call stack, dropping uninformative Forward-Parsers
...
...
@@ -252,7 +254,7 @@ class HistoryRecord:
Returns history record formatted as an html table row.
"""
stack
=
html
.
escape
(
self
.
stack
).
replace
(
'->'
,
'<span
class="delimiter"
>­-></span>'
)
'->'
,
'<span>­-></span>'
)
status
=
html
.
escape
(
self
.
status
)
excerpt
=
html
.
escape
(
self
.
excerpt
)
if
status
==
self
.
MATCH
:
...
...
@@ -372,6 +374,10 @@ def log_ST(syntax_tree, log_file_name):
f
.
write
(
syntax_tree
.
as_sxpr
())
LOG_SIZE_THRESHOLD
=
100000
# maximum number of history records to log
LOG_TAIL_THRESHOLD
=
500
# maximum number of history recors for "tail log"
def
log_parsing_history
(
grammar
,
log_file_name
:
str
=
''
,
html
:
bool
=
False
)
->
None
:
"""
Writes a log of the parsing history of the most recently parsed document.
...
...
@@ -395,9 +401,9 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=False) -> N
if
history
:
with
open
(
path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
if
html
:
f
.
write
(
HistoryRecord
.
HTML_LEAD_IN
)
f
.
write
(
HistoryRecord
.
HTML_LEAD_IN
+
'
\n
'
)
f
.
write
(
"
\n
"
.
join
(
history
))
f
.
write
(
HistoryRecord
.
HTML_LEAD_OUT
)
f
.
write
(
'
\n
</table>
\n
'
+
HistoryRecord
.
HTML_LEAD_OUT
)
else
:
f
.
write
(
"
\n
"
.
join
(
history
))
...
...
@@ -406,8 +412,8 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=False) -> N
table every 100 rows to allow browser to speed up rendering.
Does this really work...?"""
log
.
append
(
line
)
if
html
and
len
(
log
)
%
10
0
==
0
:
log
.
append
(
'
\n
</table>
\n
<table>
\n
'
+
HistoryRecord
.
COLGROUP
)
if
html
and
len
(
log
)
%
5
0
==
0
:
log
.
append
(
'
\n
'
.
join
([
'
</table>
\n
<table>
'
,
HistoryRecord
.
COLGROUP
])
)
if
not
is_logging
():
raise
AssertionError
(
"Cannot log history when logging is turned off!"
)
...
...
@@ -418,10 +424,26 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=False) -> N
log_file_name
=
name
[:
-
7
]
if
name
.
lower
().
endswith
(
'grammar'
)
else
name
elif
log_file_name
.
lower
().
endswith
(
'.log'
):
log_file_name
=
log_file_name
[:
-
4
]
full_history
=
[]
# type: List[str]
match_history
=
[]
# type: List[str]
errors_only
=
[]
# type: List[str]
for
record
in
grammar
.
history__
:
full_history
=
[
'<h1>Full parsing history of "%s"</h1>'
%
log_file_name
]
# type: List[str]
match_history
=
[
'<h1>Match history of parsing "%s"</h1>'
%
log_file_name
]
# type: List[str]
errors_only
=
[
'<h1>Errors when parsing "%s"</h1>'
%
log_file_name
]
# type: List[str]
if
len
(
grammar
.
history__
)
>
LOG_SIZE_THRESHOLD
:
warning
=
(
'Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
%
(
len
(
grammar
.
history__
)
//
1000
,
LOG_SIZE_THRESHOLD
//
1000
))
html_warning
=
'<p><strong>'
+
warning
+
'</strong></p>'
full_history
.
append
(
html_warning
)
match_history
.
append
(
html_warning
)
errors_only
.
append
(
html_warning
)
lead_in
=
'
\n
'
.
join
([
'<table>'
,
HistoryRecord
.
COLGROUP
,
HistoryRecord
.
HEADINGS
])
full_history
.
append
(
lead_in
)
match_history
.
append
(
lead_in
)
errors_only
.
append
(
lead_in
)
for
record
in
grammar
.
history__
[
-
LOG_SIZE_THRESHOLD
:]:
line
=
record
.
as_html_tr
()
if
html
else
str
(
record
)
append_line
(
full_history
,
line
)
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
...
...
@@ -429,7 +451,10 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=False) -> N
if
record
.
node
.
error_flag
:
append_line
(
errors_only
,
line
)
write_log
(
full_history
,
log_file_name
+
'_full'
)
if
len
(
full_history
)
>
500
:
write_log
(
full_history
[
-
500
:],
log_file_name
+
'_full.tail'
)
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
heading
=
'<h1>Last 500 records of parsing history of "%s"</h1>'
%
log_file_name
+
lead_in
write_log
([
heading
]
+
full_history
[
-
LOG_TAIL_THRESHOLD
:],
log_file_name
+
'_full.tail'
)
write_log
(
match_history
,
log_file_name
+
'_match'
)
write_log
(
errors_only
,
log_file_name
+
'_errors'
)
if
(
len
(
errors_only
)
>
3
or
(
len
(
grammar
.
history__
)
<=
LOG_SIZE_THRESHOLD
and
len
(
errors_only
)
>
2
)):
write_log
(
errors_only
,
log_file_name
+
'_errors'
)
DHParser/parse.py
View file @
40fd996b
...
...
@@ -30,6 +30,7 @@ for an example.
"""
from
collections
import
defaultdict
import
copy
from
DHParser.error
import
Error
,
linebreaks
...
...
@@ -39,7 +40,7 @@ from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
from
typing
import
Callable
,
cast
,
Dict
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
typing
import
Callable
,
cast
,
Dict
,
DefaultDict
,
List
,
Set
,
Tuple
,
Union
,
Optional
__all__
=
(
'Parser'
,
...
...
@@ -114,7 +115,7 @@ def add_parser_guard(parser_func):
# break left recursion at the maximum allowed depth
if
grammar
.
left_recursion_handling__
:
if
parser
.
recursion_counter
.
setdefault
(
location
,
0
)
>
LEFT_RECURSION_DEPTH
:
if
parser
.
recursion_counter
[
location
]
>
LEFT_RECURSION_DEPTH
:
grammar
.
recursion_locations__
.
add
(
location
)
return
None
,
text
parser
.
recursion_counter
[
location
]
+=
1
...
...
@@ -124,6 +125,7 @@ def add_parser_guard(parser_func):
if
grammar
.
left_recursion_handling__
:
parser
.
recursion_counter
[
location
]
-=
1
# don't clear recursion_locations__ !!!
if
node
is
None
:
# retrieve an earlier match result (from left recursion) if it exists
...
...
@@ -259,9 +261,9 @@ class Parser(ParserBase):
"""Initializes or resets any parser variables. If overwritten,
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self
.
visited
=
dict
()
# type: Dict[int, Tuple[Optional[Node], StringView]]
self
.
recursion_counter
=
d
ict
(
)
# type: Dict[int, int]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
self
.
visited
=
dict
()
# type: Dict[int, Tuple[Optional[Node], StringView]]
self
.
recursion_counter
=
d
efaultdict
(
lambda
:
0
)
# type:
Default
Dict[int, int]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
"""Applies the parser to the given `text` and returns a node with
...
...
@@ -624,7 +626,7 @@ class Grammar:
self
.
document_length__
=
0
# type: int
self
.
document_lbreaks__
=
[]
# type: List[int]
# variables stored and recalled by Capture and Retrieve parsers
self
.
variables__
=
d
ict
()
# type: Dict[str, List[str]]
self
.
variables__
=
d
efaultdict
(
lambda
:[])
# type:
Default
Dict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
self
.
last_rb__loc__
=
-
1
# type: int
# support for call stack tracing
...
...
@@ -1677,7 +1679,7 @@ class Capture(UnaryOperator):
node
,
text_
=
self
.
parser
(
text
)
if
node
:
assert
self
.
name
,
"""Tried to apply an unnamed capture-parser!"""
stack
=
self
.
grammar
.
variables__
.
setdefault
(
self
.
name
,
[])
stack
=
self
.
grammar
.
variables__
[
self
.
name
]
stack
.
append
(
node
.
content
)
location
=
self
.
grammar
.
document_length__
-
len
(
text
)
self
.
grammar
.
push_rollback__
(
location
,
lambda
:
stack
.
pop
())
...
...
DHParser/syntaxtree.py
View file @
40fd996b
...
...
@@ -232,7 +232,7 @@ class Node(collections.abc.Sized):
At any rate, it should only be reassigned during the parsing
stage and never during or after the AST-transformation.
xml_
attr (dict): An optional dictionary of XML-attributes. This
attr
ibutes
(dict): An optional dictionary of XML-attributes. This
dictionary is created lazily upon first usage. The attributes
will only be shown in the XML-Representation, not in the
S-Expression-output.
...
...
@@ -356,7 +356,7 @@ class Node(collections.abc.Sized):
return
True
return
False
raise
ValueError
(
'Leave node cannot contain other nodes'
)
# generator = self.select_tag
s
(tag_name, False)
# generator = self.select_
by_
tag(tag_name, False)
# try:
# generator.__next__()
# return True
...
...
@@ -604,7 +604,7 @@ class Node(collections.abc.Sized):
@
property
def
xml_
attr
(
self
):
def
attr
ibutes
(
self
):
"""Returns a dictionary of XML-Attributes attached to the Node."""
if
not
hasattr
(
self
,
'_xml_attr'
):
self
.
_xml_attr
=
dict
()
...
...
@@ -626,7 +626,7 @@ class Node(collections.abc.Sized):
txt
=
[
'<'
,
node
.
tag_name
]
# s += ' pos="%i"' % node.pos
if
hasattr
(
node
,
'_xml_attr'
):
txt
.
extend
(
' %s="%s"'
%
(
k
,
v
)
for
k
,
v
in
node
.
xml_
attr
.
items
())
txt
.
extend
(
' %s="%s"'
%
(
k
,
v
)
for
k
,
v
in
node
.
attr
ibutes
.
items
())
if
src
:
txt
.
append
(
' line="%i" col="%i"'
%
line_col
(
line_breaks
,
node
.
pos
))
if
showerrors
and
node
.
errors
:
...
...
@@ -648,7 +648,9 @@ class Node(collections.abc.Sized):
`select` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order, depth last.
traversed pre-order.
See function `Node.select_by_tag` for some examples.
Args:
match_function (function): A function that takes as Node
...
...
@@ -667,22 +669,25 @@ class Node(collections.abc.Sized):
yield
node
def
select_tag
s
(
self
,
tag_names
:
Union
[
str
,
Set
[
str
]],
include_root
:
bool
=
True
)
->
Iterator
[
'Node'
]:
def
select_
by_
tag
(
self
,
tag_names
:
Union
[
str
,
Set
[
str
]],
include_root
:
bool
=
True
)
->
Iterator
[
'Node'
]:
"""
Returns an iterator that runs through all descendants that have the
given tag name.
Example::
Returns an iterator that runs through all descendants that have one
of the given tag names.
Examples::
>>> tree = mock_syntax_tree('(a (b "X") (X (c "d")) (e (X "F")))')
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tag
s
("X", False))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_
by_
tag("X", False))
['(X (c "d"))', '(X "F")']
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tag
s
({"X", "b"}, False))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_
by_
tag({"X", "b"}, False))
['(b "X")', '(X (c "d"))', '(X "F")']
>>> any(tree.select_tag
s
('a', False))
>>> any(tree.select_
by_
tag('a', False))
False
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tag
s
('a', True))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_
by_
tag('a', True))
['(a (b "X") (X (c "d")) (e (X "F")))']
>>> flatten_sxpr(next(tree.select_by_tag("X", False)).as_sxpr())
'(X (c "d"))'
Args:
tag_name(set): A tag name or set of tag names that is being
...
...
@@ -697,6 +702,21 @@ class Node(collections.abc.Sized):
return
self
.
select
(
lambda
node
:
node
.
tag_name
in
tag_names
,
include_root
)
def
pick
(
self
,
tag_names
:
Union
[
str
,
Set
[
str
]])
->
Optional
[
'Node'
]:
"""
Picks the first descendant with one of the given tag_names.
This function is just syntactic sugar for
``next(node.select_by_tag(tag_names, False))``. However, rather than
raising a StopIterationError if no descendant with the given tag-name
exists, it returns None.
"""
try
:
return
next
(
self
.
select_by_tag
(
tag_names
,
False
))
except
StopIteration
:
return
None
def
tree_size
(
self
)
->
int
:
"""
Recursively counts the number of nodes in the tree including the root node.
...
...
DHParser/testing.py
View file @
40fd996b
...
...
@@ -50,9 +50,11 @@ __all__ = ('unit_from_configfile',
'get_report'
,
'grammar_unit'
,
'grammar_suite'
,
'reset_unit'
,
'runner'
)
UNIT_STAGES
=
{
'match*'
,
'match'
,
'fail'
,
'ast'
,
'cst'
,
'__ast__'
,
'__cst__'
}
UNIT_STAGES
=
{
'match*'
,
'match'
,
'fail'
,
'ast'
,
'cst'
}
RESULT_STAGES
=
{
'__cst__'
,
'__ast__'
,
'__err__'
}
# def unit_from_configfile(config_filename):
# """
...
...
@@ -261,7 +263,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
_
,
unit_name
=
os
.
path
.
split
(
os
.
path
.
splitext
(
test_unit
)[
0
])
test_unit
=
unit_from_file
(
test_unit
)
else
:
unit_name
=
str
(
id
(
test_unit
))
unit_name
=
'unit_test_'
+
str
(
id
(
test_unit
))
if
verbose
:
print
(
"
\n
Unit: "
+
unit_name
)
errata
=
[]
...
...
@@ -269,7 +271,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
transform
=
transformer_factory
()
for
parser_name
,
tests
in
test_unit
.
items
():
assert
parser_name
,
"Missing parser name in test %s!"
%
unit_name
assert
set
(
tests
.
keys
()).
issubset
(
UNIT_STAGES
)
assert
not
any
(
test_type
in
RESULT_STAGES
for
test_type
in
tests
),
\
(
"Test %s in %s already has results. Use reset_unit() before running again!"
%
(
parser_name
,
unit_name
))
assert
set
(
tests
.
keys
()).
issubset
(
UNIT_STAGES
),
\
'Unknown test-types: %s ! Must be one of %s'
\
%
(
set
(
tests
.
keys
())
-
UNIT_STAGES
,
UNIT_STAGES
)
if
verbose
:
print
(
' Match-Tests for parser "'
+
parser_name
+
'"'
)
match_tests
=
set
(
tests
[
'match'
].
keys
())
if
'match'
in
tests
else
set
()
...
...
@@ -357,6 +364,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
return
errata
def
reset_unit
(
test_unit
):
"""Resets the tests in ``test_unit`` by removing all results and
error messages."""
for
parser
,
tests
in
test_unit
.
items
():
for
key
in
list
(
tests
.
keys
()):
if
key
not
in
UNIT_STAGES
:
if
key
not
in
RESULT_STAGES
:
print
(
'Removing unknown component %s from test %s'
%
(
key
,
parser
))
del
tests
[
key
]
def
grammar_suite
(
directory
,
parser_factory
,
transformer_factory
,
fn_patterns
=
[
'*test*'
],
ignore_unknown_filetypes
=
False
,
...
...
examples/LaTeX/LaTeX.ebnf
View file @
40fd996b
...
...
@@ -107,16 +107,21 @@ inline_math = /\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | text_command | generic_command
known_command = footnote | includegraphics | caption | multicolumn | hline | cline
known_command = citet | citep | footnote | includegraphics | caption
| multicolumn | hline | cline | documentclass | pdfinfo
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
citet = "\citet" [config] block
citep = ("\citep" | "\cite") [config] block
footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" [ config ] block
caption = "\caption" block
multicolumn = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
hline = "\hline"
cline = "\cline{" INTEGER "-" INTEGER "}"
documentclass = "\documentclass" [ config ] block
pdfinfo = "\pdfinfo" block
#######################################################################
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
40fd996b
...
...
@@ -7,6 +7,7 @@
#######################################################################
from
collections
import
defaultdict
import
os
import
sys
from
functools
import
partial
...
...
@@ -23,7 +24,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Po
Node
,
TransformationFunc
,
traverse
,
remove_children_if
,
is_anonymous
,
\
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
flatten
,
is_empty
,
collapse
,
replace_content
,
remove_brackets
,
is_one_of
,
remove_first
,
\
remove_tokens
,
remove_nodes
,
TOKEN_PTYPE
traverse_locally
,
remove_tokens
,
remove_nodes
,
TOKEN_PTYPE
,
Error
from
DHParser.log
import
logging
...
...
@@ -158,16 +159,21 @@ class LaTeXGrammar(Grammar):
#### commands ####
command = known_command | text_command | generic_command
known_command = footnote | includegraphics | caption | multicolumn | hline | cline
known_command = citet | citep | footnote | includegraphics | caption
| multicolumn | hline | cline | documentclass | pdfinfo
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ //~ config ] //~ block ]
citet = "\citet" [config] block
citep = ("\citep" | "\cite") [config] block
footnote = "\footnote" block_of_paragraphs
includegraphics = "\includegraphics" [ config ] block
caption = "\caption" block
multicolumn = "\multicolumn" "{" INTEGER "}" tabular_config block_of_paragraphs
hline = "\hline"
cline = "\cline{" INTEGER "-" INTEGER "}"
documentclass = "\documentclass" [ config ] block
pdfinfo = "\pdfinfo" block
#######################################################################
...
...
@@ -231,7 +237,7 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
tabular_config
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
96b3c5ce2f75505a279d4d27f7712323
"
source_hash__
=
"
8dcbc88ac7db7a9bee51f440394aaa18
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*'
WHITESPACE__
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
...
...
@@ -263,15 +269,19 @@ class LaTeXGrammar(Grammar):
block
=
Series
(
RegExp
(
'{'
),
RE
(
''
),
ZeroOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
))),
RegExp
(
'}'
),
mandatory
=
3
)
cfg_text
=
ZeroOrMore
(
Alternative
(
Series
(
Option
(
RE
(
''
)),
text
),
CMDNAME
,
SPECIAL
))
config
=
Series
(
Token
(
"["
),
cfg_text
,
Token
(
"]"
),
mandatory
=
2
)
pdfinfo
=
Series
(
Token
(
"
\\
pdfinfo"
),
block
)
documentclass
=
Series
(
Token
(
"
\\
documentclass"
),
Option
(
config
),
block
)
cline
=
Series
(
Token
(
"
\\
cline{"
),
INTEGER
,
Token
(
"-"
),
INTEGER
,
Token
(
"}"
))
hline
=
Token
(
"
\\
hline"
)
multicolumn
=
Series
(
Token
(
"
\\
multicolumn"
),
Token
(
"{"
),
INTEGER
,
Token
(
"}"
),
tabular_config
,
block_of_paragraphs
)
caption
=
Series
(
Token
(
"
\\
caption"
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
Option
(
config
),
block
)
footnote
=
Series
(
Token
(
"
\\
footnote"
),
block_of_paragraphs
)
citep
=
Series
(
Alternative
(
Token
(
"
\\
citep"
),
Token
(
"
\\
cite"
)),
Option
(
config
),
block
)
citet
=
Series
(
Token
(
"
\\
citet"
),
Option
(
config
),
block
)
generic_command
=
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
(
Series
(
Option
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
)))
text_command
=
Alternative
(
TXTCOMMAND
,
ESCAPED
,
BRACKETS
)
known_command
=
Alternative
(
footnote
,
includegraphics
,
caption
,
multicolumn
,
hline
,
cline
)
known_command
=
Alternative
(
citet
,
citep
,
footnote
,
includegraphics
,
caption
,
multicolumn
,
hline
,
cline
,
documentclass
,
pdfinfo
)
command
=
Alternative
(
known_command
,
text_command
,
generic_command
)
inline_math
=
Series
(
RegExp
(
'
\\
$'
),
RegExp
(
'[^$]*'
),
RegExp
(
'
\\
$'
),
mandatory
=
2
)
end_environment
=
Series
(
RegExp
(
'
\\\\
end{'
),
Pop
(
NAME
),
RegExp
(
'}'
),
mandatory
=
1
)
...
...
@@ -361,7 +371,7 @@ def watch(node):
flatten_structure
=
flatten
(
lambda
context
:
is_anonymous
(
context
)
or
is_one_of
(
context
,
{
"Chapters"
,
"Sections"
,
"SubSections"
,
"SubSubSections"
,
"Paragraphs"
,
"SubParagraphs"
,
"sequence"
}),
True
)
"SubParagraphs"
,
"sequence"
}),
recursive
=
True
)
def
is_commandname
(
context
):
...
...
@@ -387,7 +397,7 @@ LaTeX_AST_transformation_table = {
# AST Transformations for the LaTeX-grammar
"+"
:
[
drop_expendables
,
flatten_structure
],
"latexdoc"
:
[],
"preamble"
:
[],
"preamble"
:
[
traverse_locally
({
'+'
:
remove_whitespace
,
'block'
:
replace_by_single_child
})
],
"document"
:
[
flatten_structure
],
"frontpages"
:
reduce_single_child
,
"Chapters, Sections, SubSections, SubSubSections, Paragraphs, SubParagraphs"
:
[],
...
...
@@ -455,6 +465,7 @@ LaTeX_AST_transformation_table = {
def
LaTeXTransform
()
->
TransformationDict
:
return
partial
(
traverse
,
processing_table
=
LaTeX_AST_transformation_table
.
copy
())
def
get_transformer
()
->
TransformationFunc
:
global
thread_local_LaTeX_transformer_singleton
try
:
...
...
@@ -472,21 +483,30 @@ def get_transformer() -> TransformationFunc:
#
#######################################################################
def
empty_defaultdict
():
"""Returns a defaultdict with an empty defaultdict as default value."""
return
defaultdict
(
empty_defaultdict
)
class
LaTeXCompiler
(
Compiler
):
"""Compiler for the abstract-syntax-tree of a LaTeX source file.
"""
KNOWN_DOCUMENT_CLASSES
=
{
'book'
,
'article'
}
KNOWN_LANGUAGES
=
{
'english'
,
'german'
}
def
__init__
(
self
,
grammar_name
=
"LaTeX"
,
grammar_source
=
""
):
super
(
LaTeXCompiler
,
self
).
__init__
(
grammar_name
,
grammar_source
)
assert
re
.
match
(
'\w+\Z'
,
grammar_name
)
self
.
metadata
=
defaultdict
(
empty_defaultdict
)
def
on_latexdoc
(
self
,
node
):
self
.
compile
(
node
[
'preamble'
])
self
.
compile
(
node
[
'document'
])
return
node
#
def on_latexdoc(self, node):
#
self.compile(node['preamble'])
#
self.compile(node['document'])
#
return node
def
on_preamble
(
self
,
node
):
return
node
#
def on_preamble(self, node):
#
return node
# def on_document(self, node):
# return node
...
...
@@ -653,6 +673,23 @@ class LaTeXCompiler(Compiler):
# def on_cline(self, node):
# return node
def
on_documentclass
(
self
,
node
):
if
'config'
in
node
:
for
it
in
{
part
.
strip
()
for
part
in
node
[
'config'
].
content
.
split
(
','
)}:
if
it
in
self
.
KNOWN_LANGUAGES
:
if
'language'
in
node
.
attributes
:
self
.
metadata
[
'language'
]
=
it
else
:
node
.
add_error
(
'Only one document language supported. '
'Using %s, ignoring %s.'
%
(
self
.
metadata
[
'language'
],
it
),
Error
.
WARNING
)
if
node
[
'text'
]
in
self
.
KNOWN_DOCUMENT_CLASSES
:
self
.
metadata
[
'documentclass'
]
=
node
[
'text'
]
return
node
def
on_pdfinfo
(
self
,
node
):
return
node
# def on_config(self, node):
# return node
...
...
examples/LaTeX/grammar_tests/REPORT/01_test_text.md
View file @
40fd996b
...
...
@@ -82,9 +82,6 @@ Match-test "3"
(text
"footnote"
)
(:Whitespace
" "
)
)
)
...
...
@@ -163,21 +160,12 @@ Match-test "7"
### AST
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"block"
)
(:Whitespace
" "
)
)
\ No newline at end of file
examples/LaTeX/grammar_tests/REPORT/02_test_paragraph.md
View file @
40fd996b
...
...
@@ -19,9 +19,6 @@ Match-test "1"
"Professoren, Philister und Vieh; welche vier Stände doch nichts weniger"
"als streng geschieden sind. Der Viehstand ist der bedeutendste."
)
(:Whitespace
" "
)
)
Match-test "2"
...
...
@@ -40,23 +37,14 @@ Match-test "2"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"inline blocks"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
...
...
@@ -71,19 +59,8 @@ Match-test "2"
(CMDNAME
"
\e
mph"
)
(:Whitespace
" "
)
(block
(:Whitespace