Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
67fc5023
Commit
67fc5023
authored
May 26, 2021
by
di68kap
Browse files
preprocess.py: Refactoring
parent
c148e5ef
Changes
5
Hide whitespace changes
Inline
Side-by-side
examples/LaTeX/LaTeX.ebnf
View file @
67fc5023
...
...
@@ -125,12 +125,13 @@ inline_math = /\$/ /[^$]*/ §/\$/
#### commands ####
command = known_command | text_command | generic_command
command = known_command | text_command |
assignment |
generic_command
known_command = citet | citep | footnote | includegraphics | caption
| multicolumn | hline | cline | documentclass | pdfinfo
| hypersetup | label | ref | href | url
text_command = TXTCOMMAND | ESCAPED | BRACKETS
assignment = !no_command CMDNAME "=" (number [UNIT] | block | CHARS)
generic_command = !no_command CMDNAME [[ ~ config ] { ~ block }+ ]
| `{` CMDNAME _block_content §`}`
...
...
@@ -163,10 +164,6 @@ href = "\href{" urlstring "}" block
block = "{" _block_content §`}`
_block_content = { (block_environment | text_element | paragraph) [_PARSEP|S] }
info_block = "{" §{ info_assoc } "}"
info_assoc = info_key ~ [ "(" §info_value ")" ]
info_key = `/` _NAME
info_value = TEXT_NOPAR { S TEXT_NOPAR } # text without parentheses
# text = CHARS { (S | trennung) CHARS }
# text = LINE { (S | trennung) LINE }
text = TEXT { (S | trennung) TEXT }
...
...
@@ -189,12 +186,19 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
#######################################################################
config = "[" § (parameters &"]" | cfg_text) "]"
param_block = "{" [parameters] "}"
parameters = (association | flag) { "," (association | flag) } [ WARN_Komma ]
association = key~ "=" value~
flag = _QUALIFIED | magnitude
key = _QUALIFIED
value = magnitude | _LETTERS | CMDNAME | param_block | block
info_block = "{" §{ info_assoc } "}"
info_assoc = info_key ~ [ "(" §info_value ")" ]
info_key = `/` _NAME
info_value = TEXT_NOPAR { S TEXT_NOPAR } # text without parentheses
magnitude = number [UNIT]
number = INTEGER [FRAC]
cfg_text = { (~ text) | CMDNAME | SPECIAL }
...
...
examples/LaTeX/LaTeXParser.py
View file @
67fc5023
...
...
@@ -86,7 +86,7 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
param_block
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
4a60dc317415b6953ba8665a77571119
"
source_hash__
=
"
c73ecd46ffeee31f3ed6a42cd981c3b0
"
disposable__
=
re
.
compile
(
'_WSPC$|_GAP$|_LB$|_PARSEP$|_LETTERS$|_NAME$|INTEGER$|FRAC$|_QUALIFIED$|TEXT_NOPAR$|TEXT$|_block_content$|PATH$|PATHSEP$|HASH$|COLON$|TAG$|block_environment$|known_environment$|text_element$|line_element$|inline_environment$|known_inline_env$|info_block$|begin_inline_env$|end_inline_env$|command$|known_command$'
)
static_analysis_pending__
=
[]
# type: List[bool]
parser_initialization__
=
[
"upon instantiation"
]
...
...
@@ -135,6 +135,10 @@ class LaTeXGrammar(Grammar):
trennung
=
Text
(
"
\\
-"
)
number
=
Series
(
INTEGER
,
Option
(
FRAC
))
magnitude
=
Series
(
number
,
Option
(
UNIT
))
info_value
=
Series
(
TEXT_NOPAR
,
ZeroOrMore
(
Series
(
S
,
TEXT_NOPAR
)))
info_key
=
Series
(
Drop
(
Text
(
"/"
)),
_NAME
)
info_assoc
=
Series
(
info_key
,
dwsp__
,
Option
(
Series
(
Series
(
Drop
(
Text
(
"("
)),
dwsp__
),
info_value
,
Series
(
Drop
(
Text
(
")"
)),
dwsp__
),
mandatory
=
1
)))
info_block
=
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
ZeroOrMore
(
info_assoc
),
Series
(
Drop
(
Text
(
"}"
)),
dwsp__
),
mandatory
=
1
)
value
=
Alternative
(
magnitude
,
_LETTERS
,
CMDNAME
,
param_block
,
block
)
key
=
Synonym
(
_QUALIFIED
)
flag
=
Alternative
(
_QUALIFIED
,
magnitude
)
...
...
@@ -148,10 +152,6 @@ class LaTeXGrammar(Grammar):
no_command
=
Alternative
(
Series
(
Drop
(
Text
(
"
\\
begin{"
)),
dwsp__
),
Series
(
Drop
(
Text
(
"
\\
end"
)),
dwsp__
),
Series
(
BACKSLASH
,
structural
))
cfg_text
=
ZeroOrMore
(
Alternative
(
Series
(
dwsp__
,
text
),
CMDNAME
,
SPECIAL
))
config
=
Series
(
Series
(
Drop
(
Text
(
"["
)),
dwsp__
),
Alternative
(
Series
(
parameters
,
Lookahead
(
Series
(
Drop
(
Text
(
"]"
)),
dwsp__
))),
cfg_text
),
Series
(
Drop
(
Text
(
"]"
)),
dwsp__
),
mandatory
=
1
)
info_value
=
Series
(
TEXT_NOPAR
,
ZeroOrMore
(
Series
(
S
,
TEXT_NOPAR
)))
info_key
=
Series
(
Drop
(
Text
(
"/"
)),
_NAME
)
info_assoc
=
Series
(
info_key
,
dwsp__
,
Option
(
Series
(
Series
(
Drop
(
Text
(
"("
)),
dwsp__
),
info_value
,
Series
(
Drop
(
Text
(
")"
)),
dwsp__
),
mandatory
=
1
)))
info_block
=
Series
(
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
ZeroOrMore
(
info_assoc
),
Series
(
Drop
(
Text
(
"}"
)),
dwsp__
),
mandatory
=
1
)
_block_content
=
ZeroOrMore
(
Series
(
Alternative
(
block_environment
,
text_element
,
paragraph
),
Option
(
Alternative
(
_PARSEP
,
S
))))
hide_from_toc
=
Series
(
Text
(
"*"
),
dwsp__
)
target
=
Series
(
PATH
,
ZeroOrMore
(
Series
(
NegativeLookbehind
(
Drop
(
RegExp
(
's?ptth'
))),
COLON
,
PATH
)),
Option
(
Series
(
Alternative
(
HASH
,
Series
(
NegativeLookbehind
(
Drop
(
RegExp
(
's?ptth'
))),
COLON
)),
TAG
)))
...
...
@@ -174,6 +174,7 @@ class LaTeXGrammar(Grammar):
citep
=
Series
(
Alternative
(
Series
(
Drop
(
Text
(
"
\\
citep"
)),
dwsp__
),
Series
(
Drop
(
Text
(
"
\\
cite"
)),
dwsp__
)),
Option
(
config
),
block
)
citet
=
Series
(
Series
(
Drop
(
Text
(
"
\\
citet"
)),
dwsp__
),
Option
(
config
),
block
)
generic_command
=
Alternative
(
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
(
Series
(
Option
(
Series
(
dwsp__
,
config
)),
OneOrMore
(
Series
(
dwsp__
,
block
))))),
Series
(
Drop
(
Text
(
"{"
)),
CMDNAME
,
_block_content
,
Drop
(
Text
(
"}"
)),
mandatory
=
3
))
assignment
=
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Series
(
Drop
(
Text
(
"="
)),
dwsp__
),
Alternative
(
Series
(
number
,
Option
(
UNIT
)),
block
,
CHARS
))
text_command
=
Alternative
(
TXTCOMMAND
,
ESCAPED
,
BRACKETS
)
cfg_unit
=
Series
(
Drop
(
Text
(
"{"
)),
number
,
UNIT
,
Drop
(
Text
(
"}"
)))
cfg_separator
=
Text
(
"|"
)
...
...
@@ -193,7 +194,7 @@ class LaTeXGrammar(Grammar):
TBCFG_VALUE
=
Series
(
RegExp
(
'[lcr|]+'
),
dwsp__
)
multicolumn
=
Series
(
Series
(
Drop
(
Text
(
"
\\
multicolumn"
)),
dwsp__
),
Series
(
Drop
(
Text
(
"{"
)),
dwsp__
),
INTEGER
,
Series
(
Drop
(
Text
(
"}"
)),
dwsp__
),
tabular_config
,
block_of_paragraphs
)
known_command
=
Alternative
(
citet
,
citep
,
footnote
,
includegraphics
,
caption
,
multicolumn
,
hline
,
cline
,
documentclass
,
pdfinfo
,
hypersetup
,
label
,
ref
,
href
,
url
)
command
=
Alternative
(
known_command
,
text_command
,
generic_command
)
command
=
Alternative
(
known_command
,
text_command
,
assignment
,
generic_command
)
line_element
=
Alternative
(
text
,
inline_environment
,
command
,
block
)
rb_down
=
Series
(
Series
(
Drop
(
Text
(
"["
)),
dwsp__
),
number
,
UNIT
,
dwsp__
,
Series
(
Drop
(
Text
(
"]"
)),
dwsp__
))
rb_up
=
Series
(
Series
(
Drop
(
Text
(
"["
)),
dwsp__
),
number
,
UNIT
,
dwsp__
,
Series
(
Drop
(
Text
(
"]"
)),
dwsp__
))
...
...
examples/LaTeX/test_grammar/02_test_text.ini
View file @
67fc5023
...
...
@@ -34,6 +34,8 @@
9:
.
M10:
"1\,Mos"
[match:citep]
1*:
"\cite
[ch. 7]
{Schmidt:2009}"
examples/LaTeX/test_grammar/06_test_commands.ini
View file @
67fc5023
...
...
@@ -56,6 +56,8 @@ M19: \label{name}
M20:
\ref{name}
M21:
\pageref{name}
[match:assignment]
M1:
"\
overfullrule
=
1mm"
[match:protocol]
M1:
"https://"
...
...
examples/LaTeX/tex2utf8.py
0 → 100644
View file @
67fc5023
#!/usr/bin/env python3
"""tex2utf8 - converst all .tex files in a directory to utf-8."""
import
sys
,
os
def
convert
(
root_path
:
str
):
for
dirpath
,
dirnames
,
filenames
in
os
.
walk
(
root
):
for
fname
in
filenames
:
if
fname
.
endswith
(
'.tex'
):
fpath
=
os
.
path
.
join
(
dirpath
,
fname
)
with
open
(
fpath
,
'rb'
)
as
f
:
data
=
f
.
read
()
try
:
_
=
data
.
decode
(
'utf-8'
,
errors
=
'strict'
)
print
(
fpath
+
' was already unicode'
)
except
UnicodeDecodeError
:
txt
=
data
.
decode
(
'cp1252'
)
data
=
txt
.
encode
(
'utf-8'
)
os
.
rename
(
fpath
,
fpath
+
'.cp1252'
)
with
open
(
fpath
,
'wb'
)
as
f
:
f
.
write
(
data
)
print
(
fpath
+
' converted to unicode'
)
if
__name__
==
"__main__"
:
root
=
'./'
if
len
(
sys
.
argv
)
>
1
:
root
=
sys
.
argv
[
1
]
assert
os
.
path
.
exists
(
root
)
assert
os
.
path
.
isdir
(
root
)
convert
(
root
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment