Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
37399df4
Commit
37399df4
authored
May 19, 2021
by
Eckhart Arnold
Browse files
preprocess.py: Fehlerkorrekturen
parent
a7817711
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/preprocess.py
View file @
37399df4
...
@@ -30,6 +30,7 @@ cannot completely be described entirely with context-free grammars.
...
@@ -30,6 +30,7 @@ cannot completely be described entirely with context-free grammars.
import
bisect
import
bisect
import
functools
import
functools
import
os
from
typing
import
Union
,
Optional
,
Callable
,
Tuple
,
NamedTuple
,
List
,
Any
from
typing
import
Union
,
Optional
,
Callable
,
Tuple
,
NamedTuple
,
List
,
Any
from
DHParser.toolkit
import
re
,
dataclasses
from
DHParser.toolkit
import
re
,
dataclasses
...
@@ -42,6 +43,7 @@ __all__ = ('RX_TOKEN_NAME',
...
@@ -42,6 +43,7 @@ __all__ = ('RX_TOKEN_NAME',
'SourceMap'
,
'SourceMap'
,
'SourceMapFunc'
,
'SourceMapFunc'
,
'PreprocessorFunc'
,
'PreprocessorFunc'
,
'Preprocessed'
,
'PreprocessorResult'
,
'PreprocessorResult'
,
'make_token'
,
'make_token'
,
'strip_tokens'
,
'strip_tokens'
,
...
@@ -51,7 +53,9 @@ __all__ = ('RX_TOKEN_NAME',
...
@@ -51,7 +53,9 @@ __all__ = ('RX_TOKEN_NAME',
'neutral_mapping'
,
'neutral_mapping'
,
'tokenized_to_original_mapping'
,
'tokenized_to_original_mapping'
,
'source_map'
,
'source_map'
,
'with_source_mapping'
)
'with_source_mapping'
,
'gen_find_include_func'
,
'preprocess_includes'
)
#######################################################################
#######################################################################
...
@@ -96,8 +100,7 @@ class IncludeMap(SourceMap):
...
@@ -96,8 +100,7 @@ class IncludeMap(SourceMap):
file_names
:
List
[
str
]
# list of file_names to which the source locations relate
file_names
:
List
[
str
]
# list of file_names to which the source locations relate
def
has_includes
(
self
)
->
bool
:
def
has_includes
(
self
)
->
bool
:
L
=
len
(
self
.
file_names
)
return
any
(
fname
!=
self
.
source_name
for
fname
in
self
.
file_names
)
return
L
>
1
or
(
L
==
1
and
self
.
file_names
[
0
]
!=
self
.
source_name
)
class
IncludeInfo
(
NamedTuple
):
class
IncludeInfo
(
NamedTuple
):
...
@@ -111,7 +114,7 @@ PreprocessorResult = Union[str, Preprocessed]
...
@@ -111,7 +114,7 @@ PreprocessorResult = Union[str, Preprocessed]
FindIncludeFunc
=
Union
[
Callable
[[
str
,
int
],
IncludeInfo
],
# (document: str, start: int)
FindIncludeFunc
=
Union
[
Callable
[[
str
,
int
],
IncludeInfo
],
# (document: str, start: int)
functools
.
partial
]
functools
.
partial
]
PreprocessorFunc
=
Union
[
Callable
[[
str
,
str
],
PreprocessorResult
],
PreprocessorFunc
=
Union
[
Callable
[[
str
,
str
],
PreprocessorResult
],
# text: str, filename: str
functools
.
partial
]
functools
.
partial
]
...
@@ -302,8 +305,16 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
...
@@ -302,8 +305,16 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
"""
"""
if
isinstance
(
result
,
str
):
if
isinstance
(
result
,
str
):
srcmap
=
tokenized_to_original_mapping
(
result
)
srcmap
=
tokenized_to_original_mapping
(
result
)
mapping_func
=
functools
.
partial
(
source_map
,
srcmap
=
srcmap
)
token_mapping
=
functools
.
partial
(
source_map
,
srcmap
=
srcmap
)
return
Preprocessed
(
result
,
mapping_func
)
return
Preprocessed
(
result
,
token_mapping
)
# else: # DOES NOT WORK, because there is no way to reliably find out whether
# # token back-mapping has already been done by the provided mapping
# text, mapping = cast(Preprocessed, result)
# if not (hasattr(mapping, 'func') and mapping.func == source_map):
# srcmap = tokenized_to_original_mapping(text)
# token_mapping = functools.partial(source_map, srcmap=srcmap)
# return Preprocessed(
# text, functools.partial(_apply_mappings, mappings=[token_mapping, mapping]))
return
result
return
result
...
@@ -314,8 +325,8 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
...
@@ -314,8 +325,8 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
#######################################################################
#######################################################################
def
gen
erate
_find_include_func
(
rx
:
Union
[
str
,
Any
],
def
gen_find_include_func
(
rx
:
Union
[
str
,
Any
],
comment_rx
:
Optional
[
Union
[
str
,
Any
]]
=
None
)
->
FindIncludeFunc
:
comment_rx
:
Optional
[
Union
[
str
,
Any
]]
=
None
)
->
FindIncludeFunc
:
if
isinstance
(
rx
,
str
):
rx
=
re
.
compile
(
rx
)
if
isinstance
(
rx
,
str
):
rx
=
re
.
compile
(
rx
)
if
isinstance
(
comment_rx
,
str
):
comment_rx
=
re
.
compile
(
comment_rx
)
if
isinstance
(
comment_rx
,
str
):
comment_rx
=
re
.
compile
(
comment_rx
)
...
@@ -362,11 +373,13 @@ def generate_include_map(source_name: str,
...
@@ -362,11 +373,13 @@ def generate_include_map(source_name: str,
raise
ValueError
(
f
'Circular include of
{
source_name
}
detected!'
)
raise
ValueError
(
f
'Circular include of
{
source_name
}
detected!'
)
file_names
.
add
(
source_name
)
file_names
.
add
(
source_name
)
dirname
=
os
.
path
.
dirname
(
source_name
)
source_pointer
=
0
source_pointer
=
0
source_offset
=
0
source_offset
=
0
result_pointer
=
0
result_pointer
=
0
last_begin
=
-
1
last_begin
=
-
1
begin
,
length
,
include_name
=
find_next
(
source_text
,
0
)
begin
,
length
,
include_name
=
find_next
(
source_text
,
0
)
include_name
=
os
.
path
.
join
(
dirname
,
include_name
)
while
begin
>=
0
:
while
begin
>=
0
:
assert
begin
>
last_begin
assert
begin
>
last_begin
source_delta
=
begin
-
source_pointer
source_delta
=
begin
-
source_pointer
...
@@ -396,6 +409,7 @@ def generate_include_map(source_name: str,
...
@@ -396,6 +409,7 @@ def generate_include_map(source_name: str,
source_offset
+=
length
-
inner_length
source_offset
+=
length
-
inner_length
map
.
offsets
.
append
(
source_offset
)
map
.
offsets
.
append
(
source_offset
)
begin
,
length
,
include_name
=
find_next
(
source_text
,
source_pointer
)
begin
,
length
,
include_name
=
find_next
(
source_text
,
source_pointer
)
include_name
=
os
.
path
.
join
(
dirname
,
include_name
)
rest
=
source_text
[
source_pointer
:]
rest
=
source_text
[
source_pointer
:]
if
rest
:
if
rest
:
result
.
append
(
rest
)
result
.
append
(
rest
)
...
@@ -417,8 +431,8 @@ def srcmap_includes(position: int, inclmap: IncludeMap) -> SourceLocation:
...
@@ -417,8 +431,8 @@ def srcmap_includes(position: int, inclmap: IncludeMap) -> SourceLocation:
raise
ValueError
raise
ValueError
def
preprocess_includes
(
source_
name
:
str
,
def
preprocess_includes
(
source_
text
:
Optional
[
str
]
,
source_
text
:
Optional
[
str
]
,
source_
name
:
str
,
find_next_include
:
FindIncludeFunc
)
->
Preprocessed
:
find_next_include
:
FindIncludeFunc
)
->
Preprocessed
:
if
not
source_text
:
if
not
source_text
:
with
open
(
source_name
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
source_name
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
...
...
examples/LaTeX/LaTeX.ebnf
View file @
37399df4
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
# preamble
# preamble
@ literalws = right
@ literalws = right
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # insignificant whitespace, including at most one linefeed
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # insignificant whitespace, including at most one linefeed
@ comment = /%.*/
@ comment = /%.*/
# note: trailing linefeed is not part of the comment proper
@ reduction = merge_treetops
@ reduction = merge_treetops
@ disposable = _WSPC, _GAP, _LB, _PARSEP, _LETTERS, _NAME, INTEGER, FRAC,
@ disposable = _WSPC, _GAP, _LB, _PARSEP, _LETTERS, _NAME, INTEGER, FRAC,
_QUALIFIED, TEXT_NOPAR, TEXT, _block_content,
_QUALIFIED, TEXT_NOPAR, TEXT, _block_content,
...
@@ -18,11 +18,11 @@
...
@@ -18,11 +18,11 @@
#
#
########################################################################
########################################################################
latexdoc = preamble document
latexdoc = preamble
§
document
preamble = { [_WSPC] command }+
preamble = { [_WSPC] command }+
document = [_WSPC] "\begin{document}"
document = [_WSPC] "\begin{document}"
frontpages
§
frontpages
(Chapters | Sections)
(Chapters | Sections)
[Bibliography] [Index] [_WSPC]
[Bibliography] [Index] [_WSPC]
"\end{document}" [_WSPC] §EOF
"\end{document}" [_WSPC] §EOF
...
@@ -115,11 +115,12 @@ inline_math = /\$/ /[^$]*/ §/\$/
...
@@ -115,11 +115,12 @@ inline_math = /\$/ /[^$]*/ §/\$/
#### commands ####
#### commands ####
command = known_command | text_command | generic_command
command = known_command | text_command | generic_command
known_command = citet | citep | footnote | includegraphics | caption
known_command = citet | citep | footnote | includegraphics | caption
| multicolumn | hline | cline | documentclass | pdfinfo
| multicolumn | hline | cline | documentclass | pdfinfo
| hypersetup
| hypersetup
text_command = TXTCOMMAND | ESCAPED | BRACKETS
text_command = TXTCOMMAND | ESCAPED | BRACKETS
generic_command = !no_command CMDNAME [[ ~ config ] ~ block ]
generic_command = !no_command CMDNAME [[ ~ config ]
{
~ block
}+
]
| `{` CMDNAME _block_content §`}`
| `{` CMDNAME _block_content §`}`
citet = "\citet" [config] block
citet = "\citet" [config] block
...
@@ -195,10 +196,9 @@ WARN_Komma = ","
...
@@ -195,10 +196,9 @@ WARN_Komma = ","
#
#
#######################################################################
#######################################################################
CMDNAME = /\\(?:(?![\d_])\w)+/~
CMDNAME = /\\(?:(?![\d_])\w)+/~
TXTCOMMAND = /\\text\w+/
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
ESCAPED = /\\[%$&_\/{}
]/
SPECIAL = /[$&_\/\\\\]/
SPECIAL = /[$&_\/\\\\]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
LINEFEED = /[\\][\\]/
LINEFEED = /[\\][\\]/
...
...
examples/LaTeX/LaTeXParser.py
View file @
37399df4
...
@@ -48,7 +48,8 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
...
@@ -48,7 +48,8 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
trace_history
,
has_descendant
,
neg
,
has_ancestor
,
optional_last_value
,
insert
,
\
trace_history
,
has_descendant
,
neg
,
has_ancestor
,
optional_last_value
,
insert
,
\
positions_of
,
replace_tag_names
,
add_attributes
,
delimit_children
,
merge_connected
,
\
positions_of
,
replace_tag_names
,
add_attributes
,
delimit_children
,
merge_connected
,
\
has_attr
,
has_parent
,
ThreadLocalSingletonFactory
,
Error
,
canonical_error_strings
,
\
has_attr
,
has_parent
,
ThreadLocalSingletonFactory
,
Error
,
canonical_error_strings
,
\
has_errors
,
apply_unless
,
WARNING
,
ERROR
,
FATAL
,
EMPTY_NODE
,
TreeReduction
,
CombinedParser
has_errors
,
apply_unless
,
WARNING
,
ERROR
,
FATAL
,
EMPTY_NODE
,
TreeReduction
,
CombinedParser
,
\
Preprocessed
,
neutral_mapping
,
preprocess_includes
,
gen_find_include_func
,
flatten_sxpr
#######################################################################
#######################################################################
...
@@ -57,12 +58,13 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
...
@@ -57,12 +58,13 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
#
#
#######################################################################
#######################################################################
def
nop
(
arg
):
return
arg
RX_TEX_INPUT
=
r
'\\input{(?P<name>.*)}'
def
LaTeXPreprocessor
(
text
):
return
text
,
nop
def
LaTeXPreprocessor
(
text
:
str
,
file_name
:
str
)
->
Preprocessed
:
find_includes
=
gen_find_include_func
(
RX_TEX_INPUT
,
LaTeXGrammar
.
comment_rx__
)
return
preprocess_includes
(
text
,
file_name
,
find_includes
)
def
get_preprocessor
()
->
PreprocessorFunc
:
def
get_preprocessor
()
->
PreprocessorFunc
:
...
@@ -83,7 +85,7 @@ class LaTeXGrammar(Grammar):
...
@@ -83,7 +85,7 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
paragraph
=
Forward
()
param_block
=
Forward
()
param_block
=
Forward
()
text_element
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
74b31b1a6754004694c1d25e614d7f32
"
source_hash__
=
"
49543176de36a2f3271970b00b62761d
"
disposable__
=
re
.
compile
(
'_WSPC$|_GAP$|_LB$|_PARSEP$|_LETTERS$|_NAME$|INTEGER$|FRAC$|_QUALIFIED$|TEXT_NOPAR$|TEXT$|_block_content$|block_environment$|known_environment$|text_element$|line_element$|inline_environment$|known_inline_env$|info_block$|begin_inline_env$|end_inline_env$|command$|known_command$'
)
disposable__
=
re
.
compile
(
'_WSPC$|_GAP$|_LB$|_PARSEP$|_LETTERS$|_NAME$|INTEGER$|FRAC$|_QUALIFIED$|TEXT_NOPAR$|TEXT$|_block_content$|block_environment$|known_environment$|text_element$|line_element$|inline_environment$|known_inline_env$|info_block$|begin_inline_env$|end_inline_env$|command$|known_command$'
)
static_analysis_pending__
=
[]
# type: List[bool]
static_analysis_pending__
=
[]
# type: List[bool]
parser_initialization__
=
[
"upon instantiation"
]
parser_initialization__
=
[
"upon instantiation"
]
...
@@ -120,7 +122,7 @@ class LaTeXGrammar(Grammar):
...
@@ -120,7 +122,7 @@ class LaTeXGrammar(Grammar):
LINEFEED
=
RegExp
(
'[
\\\\
][
\\\\
]'
)
LINEFEED
=
RegExp
(
'[
\\\\
][
\\\\
]'
)
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
BRACKETS
=
RegExp
(
'[
\\
[
\\
]]'
)
SPECIAL
=
RegExp
(
'[$&_/
\\\\\\\\
]'
)
SPECIAL
=
RegExp
(
'[$&_/
\\\\\\\\
]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/{}]'
)
ESCAPED
=
RegExp
(
'
\\\\
[%$&_/{}
]'
)
TXTCOMMAND
=
RegExp
(
'
\\\\
text
\\
w+'
)
TXTCOMMAND
=
RegExp
(
'
\\\\
text
\\
w+'
)
CMDNAME
=
Series
(
RegExp
(
'
\\\\
(?:(?![
\\
d_])
\\
w)+'
),
dwsp__
)
CMDNAME
=
Series
(
RegExp
(
'
\\\\
(?:(?![
\\
d_])
\\
w)+'
),
dwsp__
)
WARN_Komma
=
Series
(
Text
(
","
),
dwsp__
)
WARN_Komma
=
Series
(
Text
(
","
),
dwsp__
)
...
@@ -169,7 +171,7 @@ class LaTeXGrammar(Grammar):
...
@@ -169,7 +171,7 @@ class LaTeXGrammar(Grammar):
generic_inline_env
=
Series
(
begin_inline_env
,
dwsp__
,
paragraph
,
end_inline_env
,
mandatory
=
3
)
generic_inline_env
=
Series
(
begin_inline_env
,
dwsp__
,
paragraph
,
end_inline_env
,
mandatory
=
3
)
known_inline_env
=
Synonym
(
inline_math
)
known_inline_env
=
Synonym
(
inline_math
)
inline_environment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
inline_environment
=
Alternative
(
known_inline_env
,
generic_inline_env
)
generic_command
=
Alternative
(
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
(
Series
(
Option
(
Series
(
dwsp__
,
config
)),
dwsp__
,
block
))),
Series
(
Drop
(
Text
(
"{"
)),
CMDNAME
,
_block_content
,
Drop
(
Text
(
"}"
)),
mandatory
=
3
))
generic_command
=
Alternative
(
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
(
Series
(
Option
(
Series
(
dwsp__
,
config
)),
OneOrMore
(
Series
(
dwsp__
,
block
)))
))
,
Series
(
Drop
(
Text
(
"{"
)),
CMDNAME
,
_block_content
,
Drop
(
Text
(
"}"
)),
mandatory
=
3
))
SubParagraph
=
Series
(
Series
(
Drop
(
Text
(
"
\\
subparagraph"
)),
dwsp__
),
heading
,
Option
(
sequence
))
SubParagraph
=
Series
(
Series
(
Drop
(
Text
(
"
\\
subparagraph"
)),
dwsp__
),
heading
,
Option
(
sequence
))
SubParagraphs
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
SubParagraph
))
SubParagraphs
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
SubParagraph
))
frontpages
=
Synonym
(
sequence
)
frontpages
=
Synonym
(
sequence
)
...
@@ -201,13 +203,13 @@ class LaTeXGrammar(Grammar):
...
@@ -201,13 +203,13 @@ class LaTeXGrammar(Grammar):
Sections
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
Section
))
Sections
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
Section
))
Chapter
=
Series
(
Series
(
Drop
(
Text
(
"
\\
chapter"
)),
dwsp__
),
heading
,
ZeroOrMore
(
Alternative
(
sequence
,
Sections
)))
Chapter
=
Series
(
Series
(
Drop
(
Text
(
"
\\
chapter"
)),
dwsp__
),
heading
,
ZeroOrMore
(
Alternative
(
sequence
,
Sections
)))
Chapters
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
Chapter
))
Chapters
=
OneOrMore
(
Series
(
Option
(
_WSPC
),
Chapter
))
document
=
Series
(
Option
(
_WSPC
),
Series
(
Drop
(
Text
(
"
\\
begin{document}"
)),
dwsp__
),
frontpages
,
Alternative
(
Chapters
,
Sections
),
Option
(
Bibliography
),
Option
(
Index
),
Option
(
_WSPC
),
Series
(
Drop
(
Text
(
"
\\
end{document}"
)),
dwsp__
),
Option
(
_WSPC
),
EOF
,
mandatory
=
9
)
document
=
Series
(
Option
(
_WSPC
),
Series
(
Drop
(
Text
(
"
\\
begin{document}"
)),
dwsp__
),
frontpages
,
Alternative
(
Chapters
,
Sections
),
Option
(
Bibliography
),
Option
(
Index
),
Option
(
_WSPC
),
Series
(
Drop
(
Text
(
"
\\
end{document}"
)),
dwsp__
),
Option
(
_WSPC
),
EOF
,
mandatory
=
2
)
param_block
.
set
(
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
Option
(
parameters
),
Series
(
Drop
(
Text
(
"}"
)),
dwsp__
)))
param_block
.
set
(
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
Option
(
parameters
),
Series
(
Drop
(
Text
(
"}"
)),
dwsp__
)))
block
.
set
(
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
_block_content
,
Drop
(
Text
(
"}"
)),
mandatory
=
2
))
block
.
set
(
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
_block_content
,
Drop
(
Text
(
"}"
)),
mandatory
=
2
))
text_element
.
set
(
Alternative
(
line_element
,
LINEFEED
))
text_element
.
set
(
Alternative
(
line_element
,
LINEFEED
))
paragraph
.
set
(
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
Option
(
S
))))
paragraph
.
set
(
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
Option
(
S
))))
block_environment
.
set
(
Alternative
(
known_environment
,
generic_block
))
block_environment
.
set
(
Alternative
(
known_environment
,
generic_block
))
latexdoc
=
Series
(
preamble
,
document
)
latexdoc
=
Series
(
preamble
,
document
,
mandatory
=
1
)
root__
=
TreeReduction
(
latexdoc
,
CombinedParser
.
MERGE_TREETOPS
)
root__
=
TreeReduction
(
latexdoc
,
CombinedParser
.
MERGE_TREETOPS
)
...
@@ -267,12 +269,15 @@ def transform_generic_command(context: List[Node]):
...
@@ -267,12 +269,15 @@ def transform_generic_command(context: List[Node]):
def
transform_generic_block
(
context
:
List
[
Node
]):
def
transform_generic_block
(
context
:
List
[
Node
]):
node
=
context
[
-
1
]
node
=
context
[
-
1
]
# assert node.children[0].tag_name == "begin_generic_block"
if
not
node
.
children
or
not
node
.
children
[
0
].
children
:
# assert node.children[0].children[0].tag_name == "begin_environment"
context
[
0
].
new_error
(
node
,
'unknown kind of block: '
+
flatten_sxpr
(
node
.
as_sxpr
()))
# assert node.children[-1].tag_name == "end_generic_block"
else
:
# assert node.children[-1].children[0].tag_name == "end_environment"
# assert node.children[0].tag_name == "begin_generic_block"
node
.
tag_name
=
'env_'
+
node
.
children
[
0
].
children
[
0
].
content
.
lstrip
(
'
\\
'
)
# assert node.children[0].children[0].tag_name == "begin_environment"
node
.
result
=
node
.
children
[
1
:
-
1
]
# assert node.children[-1].tag_name == "end_generic_block"
# assert node.children[-1].children[0].tag_name == "end_environment"
node
.
tag_name
=
'env_'
+
node
.
children
[
0
].
children
[
0
].
content
.
lstrip
(
'
\\
'
)
node
.
result
=
node
.
children
[
1
:
-
1
]
def
is_expendable
(
context
:
List
[
Node
]):
def
is_expendable
(
context
:
List
[
Node
]):
...
@@ -346,6 +351,7 @@ LaTeX_AST_transformation_table = {
...
@@ -346,6 +351,7 @@ LaTeX_AST_transformation_table = {
"structural"
:
[],
"structural"
:
[],
"CMDNAME"
:
[
remove_whitespace
,
reduce_single_child
],
"CMDNAME"
:
[
remove_whitespace
,
reduce_single_child
],
"TXTCOMMAND"
:
[
remove_whitespace
,
reduce_single_child
],
"TXTCOMMAND"
:
[
remove_whitespace
,
reduce_single_child
],
"NO_CMD"
:
[
add_error
(
"unknown kind of command"
)],
"NAME"
:
[
reduce_single_child
,
remove_whitespace
,
reduce_single_child
],
"NAME"
:
[
reduce_single_child
,
remove_whitespace
,
reduce_single_child
],
"ESCAPED"
:
[
transform_content
(
lambda
node
:
str
(
node
)[
1
:])],
"ESCAPED"
:
[
transform_content
(
lambda
node
:
str
(
node
)[
1
:])],
"BRACKETS"
:
[],
"BRACKETS"
:
[],
...
@@ -940,7 +946,8 @@ if __name__ == "__main__":
...
@@ -940,7 +946,8 @@ if __name__ == "__main__":
if
errors
:
if
errors
:
for
err_str
in
canonical_error_strings
(
errors
,
file_names
[
0
]):
for
err_str
in
canonical_error_strings
(
errors
,
file_names
[
0
]):
print
(
err_str
)
print
(
err_str
)
sys
.
exit
(
1
)
if
has_errors
(
errors
,
ERROR
):
else
:
sys
.
exit
(
1
)
print
(
result
.
serialize
(
how
=
'default'
if
args
.
xml
is
None
else
'xml'
)
if
isinstance
(
result
,
Node
)
else
result
)
print
(
result
.
serialize
(
how
=
'default'
if
args
.
xml
is
None
else
'xml'
)
if
isinstance
(
result
,
Node
)
else
result
)
examples/LaTeX/test_grammar/00_test_primitives.ini
View file @
37399df4
...
@@ -92,6 +92,15 @@
...
@@ -92,6 +92,15 @@
"""
"""
8:
"""
%\title{Vorlesung:
Grundlagen
des
Entscheidens
I}
%\author{Eckhart
Arnold}
%\date{Stand:
6.
Juli
2009}
%\maketitle
"""
[fail:_WSPC]
[fail:_WSPC]
10:
"X"
10:
"X"
...
...
examples/LaTeX/test_grammar/06_test_commands.ini
View file @
37399df4
...
@@ -41,4 +41,8 @@
...
@@ -41,4 +41,8 @@
/Keywords
(Computer
Simulations,
Validation
of
Simulations)
/Keywords
(Computer
Simulations,
Validation
of
Simulations)
}"""
}"""
13*:
"""\usepackage
[pdftex]
{hyperref}"""
13:
"""\usepackage
[pdftex]
{hyperref}"""
14:
"""\numberwithin{equation}{section}"""
15:
"""\newcommand{\marginline}{\marginnote}"""
16:
"""\renewcommand{\marginfont}{\scriptsize}"""
17:
"""\
"""
examples/LaTeX/test_grammar/99_test_playground.ini
0 → 100644
View file @
37399df4
[match:paragraph]
M1:
"""\
{
}"""
[match:frontpages]
M1:
"""
%\title{Vorlesung:
Grundlagen
des
Entscheidens
I}
%\author{Eckhart
Arnold}
%\date{Stand:
6.
Juli
2009}
%\maketitle
\begin{titlepage}
\begin{center}
\
{
}
\vspace{0.5cm}
{\Large
Vorlesung:
Grundlagen
des
Entscheidens
I}
\vspace{0.75cm}
Sommersemester
2009
\vspace{0.5cm}
Stand:
6.
Juli
2009
\\~\
\
Hinweis:
Das
Skript
wurde
bisher
noch
wenig
Korrektur
gelesen
und
das
letzte
Kapitel
fehlt
leider
ganz.
Es
enthält
jedem
Menge
Tippfehler
und
auch
vereinzelte
sachliche
Fehler
können
nicht
ganz
ausgeschlossen
werden.
Trotzdem:
Viel
Spaß
beim
Durcharbeiten!
\vspace{0.5cm}
Dozent:
Dr.
Eckhart
Arnold
\vspace{1cm}
\includegraphics
[width=6cm]
{Grafiken/pe_logo.eps}
\vspace{0.25cm}
{\Large
Universität
Bayreuth}
\vspace{1.75cm}
\includegraphics
[width=2.5cm]
{Grafiken/CC-BY-SA.eps}
\vspace{0.5cm}
\begin{small}
Dieses
Material
ist
frei
zugänglich
und
darf
unter
den
Bedingungen
der
Creative-Commons-Lizenz
BY-SA
4.0
weiter
gegeben
werden.
\vspace{0.5cm}
Die
Klausel
BY-SA
besagt:
Der
Name
des
Autors
muss
bei
abgeleiteten
Werken
genannt
werden,
und
abgeleitete
Werke
oder
Kopien
müssen
ebenfalls
unter
dieser
Lizenz
weitergegeben
werden.
\end{small}
\end{center}
\end{titlepage}
\tableofcontents
\newpage
\setlength{\marginparwidth}{2cm}
"""
\ No newline at end of file
examples/LaTeX/tst_LaTeX_docs.py
View file @
37399df4
...
@@ -48,8 +48,9 @@ if not DHParser.dsl.recompile_grammar(grammar_path, force=False):
...
@@ -48,8 +48,9 @@ if not DHParser.dsl.recompile_grammar(grammar_path, force=False):
sys
.
exit
(
1
)
sys
.
exit
(
1
)
from
LaTeXParser
import
get_grammar
,
get_transformer
,
get_compiler
from
LaTeXParser
import
get_preprocessor
,
get_grammar
,
get_transformer
,
get_compiler
preprocessor
=
get_preprocessor
()
parser
=
get_grammar
()
parser
=
get_grammar
()
transformer
=
get_transformer
()
transformer
=
get_transformer
()
compiler
=
get_compiler
()
compiler
=
get_compiler
()
...
@@ -81,8 +82,10 @@ def tst_func():
...
@@ -81,8 +82,10 @@ def tst_func():
with
open
(
filepath
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
filepath
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
doc
=
f
.
read
()
doc
=
f
.
read
()
print
(
'
\n\n
Parsing document: "%s"'
%
file
)
print
(
f
'
\n\n
Preprocessing document: "
{
file
}
"'
)
result
=
parser
(
doc
)
preprocessed
,
source_mapper
=
preprocessor
(
doc
,
file
)
print
(
f
'
\n\n
Parsing document: "
{
file
}
"'
)
result
=
parser
(
preprocessed
)
print
(
"Number of CST-nodes: "
+
str
(
tree_size
(
result
)))
print
(
"Number of CST-nodes: "
+
str
(
tree_size
(
result
)))
# print("Number of empty nodes: " + str(count_nodes(result,
# print("Number of empty nodes: " + str(count_nodes(result,
# lambda n: not bool(n.result))))
# lambda n: not bool(n.result))))
...
...
tests/test_preprocess.py
View file @
37399df4
...
@@ -36,7 +36,7 @@ from DHParser.dsl import grammar_provider
...
@@ -36,7 +36,7 @@ from DHParser.dsl import grammar_provider
from
DHParser
import
compile_source
from
DHParser
import
compile_source
from
DHParser.preprocess
import
make_token
,
tokenized_to_original_mapping
,
source_map
,
\
from
DHParser.preprocess
import
make_token
,
tokenized_to_original_mapping
,
source_map
,
\
BEGIN_TOKEN
,
END_TOKEN
,
TOKEN_DELIMITER
,
SourceMapFunc
,
SourceMap
,
chain_preprocessors
,
\
BEGIN_TOKEN
,
END_TOKEN
,
TOKEN_DELIMITER
,
SourceMapFunc
,
SourceMap
,
chain_preprocessors
,
\
strip_tokens
,
gen
erate
_find_include_func
,
preprocess_includes
,
IncludeInfo
strip_tokens
,
gen_find_include_func
,
preprocess_includes
,
IncludeInfo
from
DHParser.toolkit
import
lstrip_docstring
,
typing
,
re
from
DHParser.toolkit
import
lstrip_docstring
,
typing
,
re
from
DHParser.testing
import
TFFN
from
DHParser.testing
import
TFFN
from
typing
import
Tuple
,
Dict
from
typing
import
Tuple
,
Dict
...
@@ -219,14 +219,14 @@ class TestTokenParsing:
...
@@ -219,14 +219,14 @@ class TestTokenParsing:
class
TestHelpers
:
class
TestHelpers
:
def
test_generate_find_include_func
(
self
):
def
test_generate_find_include_func
(
self
):
rx
=
re
.
compile
(
r
'include\((?P<name>[^)\n]*)\)'
)
rx
=
re
.
compile
(
r
'include\((?P<name>[^)\n]*)\)'
)
find
=
gen
erate
_find_include_func
(
rx
)
find
=
gen_find_include_func
(
rx
)
info
=
find
(
'''321include(sub.txt)xyz'''
,
0
)
info
=
find
(
'''321include(sub.txt)xyz'''
,
0
)
assert
info
==
IncludeInfo
(
3
,
16
,
'sub.txt'
)
assert
info
==
IncludeInfo
(
3
,
16
,
'sub.txt'
)
def
test_generate_find_include_w_comments
(
self
):
def
test_generate_find_include_w_comments
(
self
):
rx
=
re
.
compile
(
r
'include\((?P<name>[^)\n]*)\)'
)
rx
=
re
.
compile
(
r
'include\((?P<name>[^)\n]*)\)'
)
comment_rx
=
re
.
compile
(
r
'#.*(?:\n|$)'
)
comment_rx
=
re
.
compile
(
r
'#.*(?:\n|$)'
)
find
=
gen
erate
_find_include_func
(
rx
,
comment_rx
)
find
=
gen_find_include_func
(
rx
,
comment_rx
)
test
=
'''a
test
=
'''a
b # include(alpha)
b # include(alpha)
c include(beta)
c include(beta)
...
@@ -275,8 +275,8 @@ class TestIncludes:
...
@@ -275,8 +275,8 @@ class TestIncludes:
def
test_simple_include
(
self
):
def
test_simple_include
(
self
):
def
perform
(
main
,
sub
):
def
perform
(
main
,
sub
):
self
.
create_files
({
'main.txt'
:
main
,
'sub.txt'
:
sub
})
self
.
create_files
({
'main.txt'
:
main
,
'sub.txt'
:
sub
})
find_func
=
gen
erate
_find_include_func
(
r
'include\((?P<name>[^)\n]*)\)'
)
find_func
=
gen_find_include_func
(
r
'include\((?P<name>[^)\n]*)\)'
)
text
,
mapping
=
preprocess_includes
(
'main.txt'
,
None
,
find_func
)
text
,
mapping
=
preprocess_includes
(
None
,
'main.txt'
,
find_func
)
# print(mapping)
# print(mapping)
assert
text
==
main
.
replace
(
'include(sub.txt)'
,
'abc'
),
text
assert
text
==
main
.
replace
(
'include(sub.txt)'
,
'abc'
),
text
for
i
in
range
(
len
(
text
)):
for
i
in
range
(
len
(
text
)):
...
@@ -298,8 +298,8 @@ class TestIncludes:
...
@@ -298,8 +298,8 @@ class TestIncludes:
def
test_complex_include
(
self
):
def
test_complex_include
(
self
):
def
perform
(
**
ensemble
):
def
perform
(
**
ensemble
):
self
.
create_files
(
ensemble
)
self
.
create_files
(
ensemble
)
find_func
=
gen
erate
_find_include_func
(
r
'#include\((?P<name>[^)\n]*)\)'
)
find_func
=
gen_find_include_func
(
r
'#include\((?P<name>[^)\n]*)\)'
)
text
,
mapping
=
preprocess_includes
(
'main'
,
None
,
find_func
)
text
,
mapping
=
preprocess_includes
(
None
,
'main'
,
find_func
)
# print(mapping)
# print(mapping)
substrings
=
{}
substrings
=
{}
for
k
,
v
in
reversed
(
ensemble
.
items
()):
for
k
,
v
in
reversed
(
ensemble
.
items
()):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment