Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
309c7376
Commit
309c7376
authored
Dec 26, 2017
by
Eckhart Arnold
Browse files
- proper source mapping of destination characters that are mapped onto the same source
parent
663e5268
Changes
4
Show whitespace changes
Inline
Side-by-side
DHParser/parse.py
View file @
309c7376
...
@@ -1127,8 +1127,8 @@ class RegExp(Parser):
...
@@ -1127,8 +1127,8 @@ class RegExp(Parser):
if
match
:
if
match
:
capture
=
match
.
group
(
0
)
capture
=
match
.
group
(
0
)
end
=
text
.
index
(
match
.
end
())
end
=
text
.
index
(
match
.
end
())
# regular express
e
ion must never match preprocessor-tokens!
# regular expression must never match preprocessor-tokens!
# TODO: Find a better solution here
,
e.g. static checking/re-mangling at compile time
# TODO: Find a better solution here
?
e.g. static checking/re-mangling at compile time
i
=
capture
.
find
(
BEGIN_TOKEN
)
i
=
capture
.
find
(
BEGIN_TOKEN
)
if
i
>=
0
:
if
i
>=
0
:
capture
=
capture
[:
i
]
capture
=
capture
[:
i
]
...
...
DHParser/preprocess.py
View file @
309c7376
...
@@ -106,14 +106,17 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
...
@@ -106,14 +106,17 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
positions
.
extend
([
d
+
1
,
e
+
1
])
positions
.
extend
([
d
+
1
,
e
+
1
])
offsets
.
extend
([
o
+
1
,
o
])
offsets
.
extend
([
o
+
1
,
o
])
i
=
tokenized_source
.
find
(
BEGIN_TOKEN
,
e
+
1
)
i
=
tokenized_source
.
find
(
BEGIN_TOKEN
,
e
+
1
)
if
e
+
1
<
len
(
tokenized_source
):
positions
.
append
(
len
(
tokenized_source
))
offsets
.
append
(
offsets
[
-
1
])
# post conditions
# post conditions
assert
len
(
positions
)
==
len
(
offsets
),
'
\n
'
+
str
(
positions
)
+
'
\n
'
+
str
(
offsets
)
assert
len
(
positions
)
==
len
(
offsets
),
'
\n
'
+
str
(
positions
)
+
'
\n
'
+
str
(
offsets
)
assert
positions
[
0
]
==
0
assert
positions
[
0
]
==
0
assert
all
(
positions
[
i
]
<
positions
[
i
+
1
]
for
i
in
range
(
len
(
positions
)
-
1
))
assert
all
(
positions
[
i
]
<
positions
[
i
+
1
]
for
i
in
range
(
len
(
positions
)
-
1
))
assert
all
(
offsets
[
i
]
>
offsets
[
i
+
1
]
for
i
in
range
(
len
(
offsets
)
-
1
))
assert
all
(
offsets
[
i
]
>
=
offsets
[
i
+
1
]
for
i
in
range
(
len
(
offsets
)
-
1
))
return
SourceMap
(
positions
,
offsets
)
return
SourceMap
(
positions
,
offsets
,
len
(
positions
)
)
def
source_map
(
position
:
int
,
srcmap
:
SourceMap
)
->
int
:
def
source_map
(
position
:
int
,
srcmap
:
SourceMap
)
->
int
:
...
@@ -128,7 +131,10 @@ def source_map(position: int, srcmap: SourceMap) -> int:
...
@@ -128,7 +131,10 @@ def source_map(position: int, srcmap: SourceMap) -> int:
Returns:
Returns:
the mapped position
the mapped position
"""
"""
i
=
bisect
.
bisect_right
(
srcmap
[
0
]
,
position
)
i
=
bisect
.
bisect_right
(
srcmap
.
positions
,
position
)
if
i
:
if
i
:
return
position
+
srcmap
[
1
][
i
-
1
]
return
min
(
position
+
srcmap
.
offsets
[
i
-
1
],
srcmap
.
positions
[
i
]
+
srcmap
.
offsets
[
i
])
raise
ValueError
raise
ValueError
# TODO: allow preprocessors to return their own source map (really a map or a function (easier)?)
# TODO: apply source maps in sequence.
examples/LaTeX/tst_LaTeX_docs.py
View file @
309c7376
...
@@ -62,12 +62,12 @@ def tst_func():
...
@@ -62,12 +62,12 @@ def tst_func():
doc
=
f
.
read
()
doc
=
f
.
read
()
print
(
'
\n\n
Parsing document: "%s"
\n
'
%
file
)
print
(
'
\n\n
Parsing document: "%s"
\n
'
%
file
)
result
=
parser
(
doc
)
result
=
parser
(
doc
)
with
open
(
'REPORT/'
+
file
[:
-
4
]
+
'.cst'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
if
toolkit
.
is_logging
():
with
open
(
'REPORT/'
+
file
[:
-
4
]
+
'.cst'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
result
.
as_sxpr
(
compact
=
False
))
f
.
write
(
result
.
as_sxpr
(
compact
=
False
))
transformer
(
result
)
transformer
(
result
)
with
open
(
'REPORT/'
+
file
[:
-
4
]
+
'.ast'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
'REPORT/'
+
file
[:
-
4
]
+
'.ast'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
result
.
as_sxpr
(
compact
=
False
))
f
.
write
(
result
.
as_sxpr
(
compact
=
False
))
if
toolkit
.
is_logging
():
parser
.
log_parsing_history__
()
parser
.
log_parsing_history__
()
fail_on_error
(
doc
,
result
)
fail_on_error
(
doc
,
result
)
transformer
(
result
)
transformer
(
result
)
...
...
test/test_preprocess.py
View file @
309c7376
...
@@ -60,7 +60,7 @@ class TestSourceMapping:
...
@@ -60,7 +60,7 @@ class TestSourceMapping:
assert
len
(
positions
)
==
len
(
offsets
)
assert
len
(
positions
)
==
len
(
offsets
)
assert
positions
[
0
]
==
0
assert
positions
[
0
]
==
0
assert
all
(
positions
[
i
]
<
positions
[
i
+
1
]
for
i
in
range
(
len
(
positions
)
-
1
))
assert
all
(
positions
[
i
]
<
positions
[
i
+
1
]
for
i
in
range
(
len
(
positions
)
-
1
))
assert
all
(
offsets
[
i
]
>
offsets
[
i
+
1
]
for
i
in
range
(
len
(
offsets
)
-
1
))
assert
all
(
offsets
[
i
]
>
=
offsets
[
i
+
1
]
for
i
in
range
(
len
(
offsets
)
-
1
))
assert
self
.
tokenized
.
find
(
'AND'
)
==
self
.
code
.
find
(
'AND'
)
+
len
(
'CONJUNCTION'
)
+
2
assert
self
.
tokenized
.
find
(
'AND'
)
==
self
.
code
.
find
(
'AND'
)
+
len
(
'CONJUNCTION'
)
+
2
...
@@ -124,13 +124,22 @@ class TestTokenParsing:
...
@@ -124,13 +124,22 @@ class TestTokenParsing:
# print()
# print()
assert
not
cst
.
error_flag
assert
not
cst
.
error_flag
def
test_source_mapping
(
self
):
def
test_source_mapping
_1
(
self
):
self
.
verify_mapping
(
"def func"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"def func"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"x > 0:"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"x > 0:"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"if y > 0:"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"if y > 0:"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"print(x)"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"print(x)"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"print(y)"
,
self
.
code
,
self
.
tokenized
)
self
.
verify_mapping
(
"print(y)"
,
self
.
code
,
self
.
tokenized
)
def
test_source_mapping_2
(
self
):
previous_index
=
0
L
=
len
(
self
.
code
)
for
mapped_index
in
range
(
len
(
self
.
tokenized
)):
index
=
source_map
(
mapped_index
,
self
.
srcmap
)
assert
previous_index
<=
index
<=
L
,
\
"%i <= %i <= %i violated"
%
(
previous_index
,
index
,
L
)
previous_index
=
index
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# tp = TestTokenParsing()
# tp = TestTokenParsing()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment