Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
05e4860e
Commit
05e4860e
authored
May 18, 2021
by
Eckhart Arnold
Browse files
preprocess.py: includes: work in progress
parent
e867a83d
Changes
2
Show whitespace changes
Inline
Side-by-side
DHParser/preprocess.py
View file @
05e4860e
...
...
@@ -30,7 +30,7 @@ cannot completely be described entirely with context-free grammars.
import
bisect
import
functools
from
typing
import
Union
,
Optional
,
Callable
,
Tuple
,
NamedTuple
,
List
from
typing
import
Union
,
Optional
,
Callable
,
Tuple
,
NamedTuple
,
List
,
Any
from
DHParser.toolkit
import
re
,
dataclasses
...
...
@@ -126,7 +126,7 @@ def nil_preprocessor(source_text: str, source_name: str) -> Preprocessed:
"""
A preprocessor that does nothing, i.e. just returns the input.
"""
return
source_text
,
lambda
i
:
SourceLocation
(
source_name
,
i
)
return
Preprocessed
(
source_text
,
lambda
i
:
SourceLocation
(
source_name
,
i
)
)
def
_apply_mappings
(
position
:
int
,
mappings
:
List
[
SourceMapFunc
])
->
SourceLocation
:
...
...
@@ -136,6 +136,7 @@ def _apply_mappings(position: int, mappings: List[SourceMapFunc]) -> SourceLocat
position within a preprocessed source text and mappings should therefore
be a list of reverse-mappings in reversed order.
"""
filename
=
''
for
mapping
in
mappings
:
filename
,
position
=
mapping
(
position
)
return
SourceLocation
(
filename
,
position
)
...
...
@@ -156,7 +157,7 @@ def _apply_preprocessors(source_text: str, source_name: str,
processed
,
mapping_func
=
with_source_mapping
(
prep
(
processed
,
source_name
))
mapping_chain
.
append
(
mapping_func
)
mapping_chain
.
reverse
()
return
processed
,
functools
.
partial
(
_apply_mappings
,
mappings
=
mapping_chain
)
return
Preprocessed
(
processed
,
functools
.
partial
(
_apply_mappings
,
mappings
=
mapping_chain
)
)
def
chain_preprocessors
(
*
preprocessors
)
->
PreprocessorFunc
:
...
...
@@ -315,6 +316,22 @@ def with_source_mapping(result: PreprocessorResult) -> Preprocessed:
#######################################################################
def
generate_find_include_func
(
rx
:
Union
[
str
,
Any
])
->
FindIncludeFunc
:
if
isinstance
(
rx
,
str
):
rx
=
re
.
compile
(
rx
)
def
find_include
(
text
:
str
,
begin
:
int
)
->
IncludeInfo
:
nonlocal
rx
iterator
=
rx
.
finditer
(
text
,
begin
)
try
:
m
=
next
(
iterator
)
begin
=
m
.
start
()
return
IncludeInfo
(
begin
,
m
.
end
()
-
begin
,
m
.
group
(
'name'
))
except
StopIteration
:
return
IncludeInfo
(
-
1
,
0
,
''
)
return
find_include
def
generate_include_map
(
source_name
:
str
,
source_text
:
str
,
find_next_include
:
FindIncludeFunc
)
->
Tuple
[
IncludeMap
,
str
]:
...
...
@@ -324,49 +341,58 @@ def generate_include_map(source_name: str,
nonlocal
file_names
map
:
IncludeMap
=
IncludeMap
(
source_name
,
[
0
],
[
0
],
[
source_name
])
text_chunks
:
List
[
str
]
=
[]
source_offset
:
int
=
0
if
source_name
in
file_names
:
raise
ValueError
(
f
'Circular include of
{
source_name
}
detected!'
)
file_names
.
add
(
source_name
)
last_begin
=
-
1
last_end
=
0
lengths
=
0
begin
,
length
,
include_name
=
find_next
(
source_text
,
0
)
while
begin
>=
0
:
assert
begin
>
last_begin
with
open
(
include_name
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
include_text
=
f
.
read
()
inner_map
,
inner_text
=
generate_map
(
include_name
,
include_text
,
find_next
)
inner_map
.
positions
=
[
pos
+
begin
for
pos
in
inner_map
.
positions
]
inner_map
.
offsets
=
[
offset
-
begin
for
offset
in
inner_map
.
offsets
]
if
begin
==
map
.
positions
[
-
1
]:
map
.
file_names
=
map
.
file_names
[:
-
1
]
+
inner_map
.
file_names
map
.
positions
=
map
.
positions
[:
-
1
]
+
inner_map
.
positions
map
.
offsets
=
map
.
offsets
[:
-
1
]
+
inner_map
.
offsets
inner_map
.
positions
=
[
pos
+
begin
-
lengths
+
source_offset
for
pos
in
inner_map
.
positions
]
inner_map
.
offsets
=
[
offset
-
(
source_offset
+
begin
-
lengths
)
for
offset
in
inner_map
.
offsets
]
if
begin
==
map
.
positions
[
-
1
]:
# FEHLER!
map
.
file_names
=
map
.
file_names
[:
-
1
]
+
inner_map
.
file_names
[:
-
1
]
map
.
positions
=
map
.
positions
[:
-
1
]
+
inner_map
.
positions
[:
-
1
]
map
.
offsets
=
map
.
offsets
[:
-
1
]
+
inner_map
.
offsets
[:
-
1
]
text_chunks
.
append
(
inner_text
)
else
:
text_chunks
.
append
(
source_text
[
last_end
:
begin
])
map
.
file_names
.
append
(
include_name
)
map
.
positions
+=
inner_map
.
positions
map
.
offsets
+=
inner_map
.
offsets
source_offset
+=
begin
-
last_end
map
.
file_names
+=
inner_map
.
file_names
[:
-
1
]
map
.
positions
+=
inner_map
.
positions
[:
-
1
]
map
.
offsets
+=
inner_map
.
offsets
[:
-
1
]
text_chunks
.
append
(
inner_text
)
lengths
+=
length
map
.
file_names
.
append
(
source_name
)
map
.
positions
.
append
(
begin
+
inner_map
.
positions
[
-
1
])
map
.
offsets
.
append
(
map
.
offsets
[
-
1
]
-
inner_map
.
positions
[
-
1
]
+
length
)
map
.
positions
.
append
(
inner_map
.
positions
[
-
1
])
map
.
offsets
.
append
(
source_offset
+
lengths
-
inner_map
.
positions
[
-
1
])
last_end
=
begin
+
length
last_begin
=
begin
begin
,
length
,
include_name
=
find_next
(
source_text
,
last_end
)
text_chunks
.
append
(
source_text
[
last_end
:])
rest
=
source_text
[
last_end
:]
if
rest
:
text_chunks
.
append
(
rest
)
map
.
positions
.
append
(
map
.
positions
[
-
1
]
+
len
(
rest
))
map
.
offsets
.
append
(
map
.
offsets
[
-
1
])
map
.
file_names
.
append
(
source_name
)
file_names
.
remove
(
source_name
)
return
map
,
''
.
join
(
text_chunks
)
return
generate_map
(
source_name
,
source_text
,
find_next_include
)
def
includes
_map
(
position
:
int
,
inclmap
:
IncludeMap
)
->
SourceLocation
:
def
srcmap_
includes
(
position
:
int
,
inclmap
:
IncludeMap
)
->
SourceLocation
:
i
=
bisect
.
bisect_right
(
inclmap
.
positions
,
position
)
if
i
:
return
SourceLocation
(
inclmap
.
file_names
[
i
],
# min(position + inclmap.offsets[i - 1], inclmap.positions[i] + inclmap.offsets[i])
inclmap
.
positions
[
i
]
+
inclmap
.
offsets
[
i
])
inclmap
.
file_names
[
i
-
1
],
position
+
inclmap
.
offsets
[
i
-
1
])
raise
ValueError
...
...
@@ -377,5 +403,7 @@ def preprocess_includes(source_name: str,
with
open
(
source_name
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
source_text
=
f
.
read
()
include_map
,
result
=
generate_include_map
(
source_name
,
source_text
,
find_next_include
)
mapping_func
=
functools
.
partial
(
includes
_map
,
inclmap
=
include_map
)
mapping_func
=
functools
.
partial
(
srcmap_
includes
,
inclmap
=
include_map
)
return
Preprocessed
(
result
,
mapping_func
)
tests/test_preprocess.py
View file @
05e4860e
...
...
@@ -20,7 +20,11 @@ limitations under the License.
"""
import
os
import
platform
import
shutil
import
subprocess
import
sys
import
time
scriptpath
=
os
.
path
.
dirname
(
__file__
)
or
'.'
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
scriptpath
,
'..'
)))
...
...
@@ -32,9 +36,10 @@ from DHParser.dsl import grammar_provider
from
DHParser
import
compile_source
from
DHParser.preprocess
import
make_token
,
tokenized_to_original_mapping
,
source_map
,
\
BEGIN_TOKEN
,
END_TOKEN
,
TOKEN_DELIMITER
,
SourceMapFunc
,
SourceMap
,
chain_preprocessors
,
\
strip_tokens
from
DHParser.toolkit
import
lstrip_docstring
,
typing
from
typing
import
Tuple
strip_tokens
,
generate_find_include_func
,
preprocess_includes
,
IncludeInfo
from
DHParser.toolkit
import
lstrip_docstring
,
typing
,
re
from
DHParser.testing
import
TFFN
from
typing
import
Tuple
,
Dict
class
TestMakeToken
:
...
...
@@ -211,6 +216,72 @@ class TestTokenParsing:
assert
False
,
"wrong error positions"
class
TestHelpers
:
def
test_generate_find_include_func
(
self
):
rx
=
re
.
compile
(
r
'include\((?P<name>[^)\n]*)\)'
)
find
=
generate_find_include_func
(
rx
)
info
=
find
(
'''321include(sub.txt)xyz'''
,
0
)
assert
info
==
IncludeInfo
(
3
,
16
,
'sub.txt'
)
def
system
(
s
:
str
)
->
int
:
# return os.system(s)
return
subprocess
.
call
(
s
,
shell
=
True
)
class
TestIncludes
:
def
setup
(
self
):
self
.
cwd
=
os
.
getcwd
()
os
.
chdir
(
scriptpath
)
# avoid race-condition
counter
=
10
while
counter
>
0
:
try
:
self
.
dirname
=
TFFN
(
'test_preprocess_data'
)
os
.
mkdir
(
TFFN
(
'test_preprocess_data'
))
counter
=
0
except
FileExistsError
:
time
.
sleep
(
1
)
counter
-=
1
os
.
chdir
(
os
.
path
.
join
(
scriptpath
,
self
.
dirname
))
def
teardown
(
self
):
os
.
chdir
(
scriptpath
)
if
os
.
path
.
exists
(
self
.
dirname
)
and
os
.
path
.
isdir
(
self
.
dirname
):
shutil
.
rmtree
(
self
.
dirname
)
if
os
.
path
.
exists
(
self
.
dirname
)
and
not
os
.
listdir
(
self
.
dirname
):
os
.
rmdir
(
self
.
dirname
)
os
.
chdir
(
self
.
cwd
)
def
create_files
(
self
,
files
:
Dict
[
str
,
str
]):
for
name
,
content
in
files
.
items
():
with
open
(
name
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
content
)
def
test_simple_include
(
self
):
def
perform
(
main
,
sub
):
self
.
create_files
({
'main.txt'
:
main
,
'sub.txt'
:
sub
})
find_func
=
generate_find_include_func
(
r
'include\((?P<name>[^)\n]*)\)'
)
text
,
mapping
=
preprocess_includes
(
'main.txt'
,
None
,
find_func
)
print
(
mapping
)
assert
text
==
main
.
replace
(
'include(sub.txt)'
,
'abc'
),
text
for
i
in
range
(
len
(
text
)):
name
,
k
=
mapping
(
i
)
print
(
i
,
k
,
name
)
txt
=
main
if
name
==
'main.txt'
else
sub
assert
text
[
i
]
==
txt
[
k
],
f
'
{
i
}
:
{
text
[
i
]
}
!=
{
txt
[
k
]
}
in
{
name
}
'
perform
(
'include(sub.txt)xyz'
,
'abc'
)
perform
(
'012include(sub.txt)xyz'
,
'abc'
)
perform
(
'012xyzinclude(sub.txt)'
,
'abc'
)
perform
(
'01include(sub.txt)2xyz'
,
'abc'
)
perform
(
'012include(sub.txt)xyzinclude(sub.txt)hij'
,
'abc'
)
perform
(
'012include(sub.txt)include(sub.txt)hij'
,
'abc'
)
perform
(
'include(sub.txt)include(sub.txt)hijinclude(sub.txt)'
,
'abc'
)
if
__name__
==
"__main__"
:
# tp = TestTokenParsing()
# tp.setup()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment