Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
vadere
vadere
Commits
36865d52
Commit
36865d52
authored
Jul 12, 2018
by
Benedikt Zoennchen
Browse files
documentation (adding comments), the hardware parameters are no longer hardcoded for the sorting.
parent
fdc22cfc
Changes
6
Show whitespace changes
Inline
Side-by-side
VadereUtils/resources/BitonicSort.cl
View file @
36865d52
...
@@ -8,9 +8,6 @@
...
@@ -8,9 +8,6 @@
*
is
strictly
prohibited.
*
is
strictly
prohibited.
*
*
*/
*/
#
define
LOCAL_SIZE_LIMIT
16U
inline
void
ComparatorPrivate
(
inline
void
ComparatorPrivate
(
uint
*keyA,
uint
*keyA,
uint
*valA,
uint
*valA,
...
@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
...
@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
__global
uint
*d_SrcKey,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal,
__global
uint
*d_SrcVal,
uint
arrayLength,
uint
arrayLength,
uint
dir
uint
dir,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subbatch
and
load
data
//Offset
to
the
beginning
of
subbatch
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
...
@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
__global
uint
*d_DstKey,
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal
__global
uint
*d_SrcVal,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subarray
and
load
data
//Offset
to
the
beginning
of
subarray
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
...
@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
uint
arrayLength,
uint
arrayLength,
uint
stride,
uint
stride,
uint
size,
uint
size,
uint
dir
uint
dir,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
...
VadereUtils/resources/Particles.cl
View file @
36865d52
...
@@ -63,8 +63,6 @@ typedef struct {
...
@@ -63,8 +63,6 @@ typedef struct {
float
stepLength
;
float
stepLength
;
}
pedestrian
;
}
pedestrian
;
#
define
LOCAL_SIZE_LIMIT
16U
inline
void
ComparatorPrivate
(
inline
void
ComparatorPrivate
(
uint
*keyA,
uint
*keyA,
uint
*valA,
uint
*valA,
...
@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
...
@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
__global
uint
*d_SrcKey,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal,
__global
uint
*d_SrcVal,
uint
arrayLength,
uint
arrayLength,
uint
dir
uint
dir,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subbatch
and
load
data
//Offset
to
the
beginning
of
subbatch
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
...
@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
__global
uint
*d_DstKey,
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal
__global
uint
*d_SrcVal,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subarray
and
load
data
//Offset
to
the
beginning
of
subarray
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
...
@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
uint
arrayLength,
uint
arrayLength,
uint
stride,
uint
stride,
uint
size,
uint
size,
uint
dir
uint
dir,
__local
uint
*l_key,
__local
uint
*l_val
)
{
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
uint
LOCAL_SIZE_LIMIT
=
get_local_size
(
0
)
*
2
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
...
...
VadereUtils/src/org/vadere/util/opencl/CLBitonicSort.java
View file @
36865d52
...
@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
...
@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.nio.ByteBuffer
;
import
java.nio.ByteBuffer
;
import
java.nio.FloatBuffer
;
import
java.nio.IntBuffer
;
import
java.nio.IntBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_CONTEXT_PLATFORM
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_CONTEXT_PLATFORM
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_MAX_WORK_GROUP_SIZE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_NAME
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_NAME
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_TYPE_GPU
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_TYPE_GPU
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_ALLOC_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_ALLOC_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_COPY_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_COPY_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_READ_ONLY
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_READ_WRITE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_READ_WRITE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_WRITE_ONLY
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_PROGRAM_BUILD_STATUS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_PROGRAM_BUILD_STATUS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_SUCCESS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_SUCCESS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clBuildProgram
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clBuildProgram
;
...
@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
...
@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateProgramWithSource
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateProgramWithSource
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueNDRangeKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueNDRangeKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueReadBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueReadBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueWriteBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clFinish
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clFinish
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetDeviceIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetDeviceIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetDeviceInfo
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetPlatformIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetPlatformIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseCommandQueue
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseCommandQueue
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseContext
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseContext
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseMemObject
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseMemObject
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseProgram
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseProgram
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1i
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1i
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1p
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1p
;
import
static
org
.
lwjgl
.
system
.
MemoryStack
.
stackPush
;
import
static
org
.
lwjgl
.
system
.
MemoryStack
.
stackPush
;
...
@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
...
@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
/**
* @author Benedikt Zoennchen
* @author Benedikt Zoennchen
*
* This class implements the bitonic sort using the GPU via OpenCL.
*/
*/
public
class
CLBitonicSort
{
public
class
CLBitonicSort
{
private
static
Logger
log
=
LogManager
.
getLogger
(
CLBitonicSort
.
class
);
private
static
Logger
log
=
LogManager
.
getLogger
(
CLBitonicSort
.
class
);
...
@@ -93,7 +94,7 @@ public class CLBitonicSort {
...
@@ -93,7 +94,7 @@ public class CLBitonicSort {
private
int
[]
resultKeys
;
private
int
[]
resultKeys
;
//Note: logically shared with BitonicSort.cl!
//Note: logically shared with BitonicSort.cl!
private
static
final
int
LOCAL_SIZE_LIMIT
=
16
;
private
int
max_work_group_size
=
16
;
private
boolean
debug
=
false
;
private
boolean
debug
=
false
;
...
@@ -154,7 +155,7 @@ public class CLBitonicSort {
...
@@ -154,7 +155,7 @@ public class CLBitonicSort {
// small sorts
// small sorts
if
(
keys
.
length
<=
LOCAL_SIZE_LIMIT
)
if
(
keys
.
length
<=
max_work_group_size
)
{
{
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
0
,
clOutKeys
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
0
,
clOutKeys
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
1
,
clOutValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
1
,
clOutValues
));
...
@@ -162,6 +163,8 @@ public class CLBitonicSort {
...
@@ -162,6 +163,8 @@ public class CLBitonicSort {
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
3
,
clInValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal
,
3
,
clInValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicSortLocal
,
4
,
keys
.
length
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicSortLocal
,
4
,
keys
.
length
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicSortLocal
,
5
,
1
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicSortLocal
,
5
,
1
));
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicSortLocal
,
6
,
keys
.
length
*
4
));
// local memory
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicSortLocal
,
7
,
keys
.
length
*
4
));
// local memory
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clLocalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clLocalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
...
@@ -175,20 +178,22 @@ public class CLBitonicSort {
...
@@ -175,20 +178,22 @@ public class CLBitonicSort {
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
1
,
clOutValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
1
,
clOutValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
2
,
clInKeys
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
2
,
clInKeys
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
3
,
clInValues
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicSortLocal1
,
3
,
clInValues
));
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicSortLocal1
,
4
,
max_work_group_size
*
4
));
// local memory
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicSortLocal1
,
5
,
max_work_group_size
*
4
));
// local memory
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clLocalWorkSize
.
put
(
0
,
LOCAL_SIZE_LIMIT
/
2
);
clLocalWorkSize
.
put
(
0
,
max_work_group_size
/
2
);
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicSortLocal1
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicSortLocal1
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
for
(
int
size
=
2
*
LOCAL_SIZE_LIMIT
;
size
<=
keys
.
length
;
size
<<=
1
)
for
(
int
size
=
2
*
max_work_group_size
;
size
<=
keys
.
length
;
size
<<=
1
)
{
{
for
(
int
stride
=
size
/
2
;
stride
>
0
;
stride
>>=
1
)
for
(
int
stride
=
size
/
2
;
stride
>
0
;
stride
>>=
1
)
{
{
if
(
stride
>=
LOCAL_SIZE_LIMIT
)
if
(
stride
>=
max_work_group_size
)
{
{
//Launch bitonicMergeGlobal
//Launch bitonicMergeGlobal
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicMergeGlobal
,
0
,
clOutKeys
));
CLInfo
.
checkCLError
(
clSetKernelArg1p
(
clBitonicMergeGlobal
,
0
,
clOutKeys
));
...
@@ -204,7 +209,7 @@ public class CLBitonicSort {
...
@@ -204,7 +209,7 @@ public class CLBitonicSort {
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clLocalWorkSize
.
put
(
0
,
LOCAL_SIZE_LIMIT
/
4
);
clLocalWorkSize
.
put
(
0
,
max_work_group_size
/
4
);
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicMergeGlobal
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicMergeGlobal
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
...
@@ -221,11 +226,13 @@ public class CLBitonicSort {
...
@@ -221,11 +226,13 @@ public class CLBitonicSort {
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
5
,
stride
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
5
,
stride
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
6
,
size
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
6
,
size
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
7
,
dir
));
CLInfo
.
checkCLError
(
clSetKernelArg1i
(
clBitonicMergeLocal
,
7
,
dir
));
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicMergeLocal
,
8
,
max_work_group_size
*
4
));
// local memory
CLInfo
.
checkCLError
(
clSetKernelArg
(
clBitonicMergeLocal
,
9
,
max_work_group_size
*
4
));
// local memory
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clLocalWorkSize
=
stack
.
callocPointer
(
1
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clGlobalWorkSize
.
put
(
0
,
keys
.
length
/
2
);
clLocalWorkSize
.
put
(
0
,
LOCAL_SIZE_LIMIT
/
2
);
clLocalWorkSize
.
put
(
0
,
max_work_group_size
/
2
);
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicMergeLocal
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clEnqueueNDRangeKernel
(
clQueue
,
clBitonicMergeLocal
,
1
,
null
,
clGlobalWorkSize
,
clLocalWorkSize
,
null
,
null
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
CLInfo
.
checkCLError
(
clFinish
(
clQueue
));
...
@@ -368,6 +375,9 @@ public class CLBitonicSort {
...
@@ -368,6 +375,9 @@ public class CLBitonicSort {
clBitonicMergeLocal
=
clCreateKernel
(
clProgram
,
"bitonicMergeLocal"
,
errcode_ret
);
clBitonicMergeLocal
=
clCreateKernel
(
clProgram
,
"bitonicMergeLocal"
,
errcode_ret
);
CLInfo
.
checkCLError
(
errcode_ret
);
CLInfo
.
checkCLError
(
errcode_ret
);
PointerBuffer
pp
=
stack
.
mallocPointer
(
1
);
clGetDeviceInfo
(
clDevice
,
CL_DEVICE_MAX_WORK_GROUP_SIZE
,
pp
,
null
);
max_work_group_size
=
(
int
)
pp
.
get
(
0
);
}
}
}
}
...
...
VadereUtils/src/org/vadere/util/opencl/CL
UniformHashedGrid
.java
→
VadereUtils/src/org/vadere/util/opencl/CL
LinkedCell
.java
View file @
36865d52
...
@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
...
@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
import
java.util.List
;
import
java.util.List
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_CONTEXT_PLATFORM
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_CONTEXT_PLATFORM
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_ADDRESS_BITS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_MAX_WORK_GROUP_SIZE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_MAX_WORK_GROUP_SIZE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_NAME
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_NAME
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_TYPE_GPU
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_TYPE_GPU
;
...
@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
...
@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
/**
* @author Benedikt Zoennchen
* @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/
*/
public
class
CL
UniformHashedGrid
{
public
class
CL
LinkedCell
{
private
static
Logger
log
=
LogManager
.
getLogger
(
CL
UniformHashedGrid
.
class
);
private
static
Logger
log
=
LogManager
.
getLogger
(
CL
LinkedCell
.
class
);
// CL ids
// CL ids
private
long
clPlatform
;
private
long
clPlatform
;
...
@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
...
@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
private
int
[]
resultValues
;
private
int
[]
resultValues
;
private
int
[]
resultKeys
;
private
int
[]
resultKeys
;
//Note: logically shared with BitonicSort.cl!
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
CLLinkedCell
.
class
);
private
static
final
int
LOCAL_SIZE_LIMIT
=
16
;
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
CLUniformHashedGrid
.
class
);
private
int
max_work_group_size
;
private
long
max_work_group_size
;
private
boolean
debug
=
false
;
private
boolean
debug
=
false
;
...
@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
...
@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
NonSeparate
NonSeparate
}
}
public
CLUniformHashedGrid
(
final
int
numberOfElements
,
final
VRectangle
bound
,
final
double
cellSize
)
throws
OpenCLException
{
/**
* Default constructor.
*
* @param numberOfElements the number of positions contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public
CLLinkedCell
(
final
int
numberOfElements
,
final
VRectangle
bound
,
final
double
cellSize
)
throws
OpenCLException
{
this
.
numberOfElements
=
numberOfElements
;
this
.
numberOfElements
=
numberOfElements
;
this
.
iGridSize
=
new
int
[]{
(
int
)
Math
.
ceil
(
bound
.
getWidth
()
/
cellSize
),
(
int
)
Math
.
ceil
(
bound
.
getHeight
()
/
cellSize
)};
this
.
iGridSize
=
new
int
[]{
(
int
)
Math
.
ceil
(
bound
.
getWidth
()
/
cellSize
),
(
int
)
Math
.
ceil
(
bound
.
getHeight
()
/
cellSize
)};
this
.
numberOfGridCells
=
this
.
iGridSize
[
0
]
*
this
.
iGridSize
[
1
];
this
.
numberOfGridCells
=
this
.
iGridSize
[
0
]
*
this
.
iGridSize
[
1
];
...
@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
...
@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
init
();
init
();
}
}
public
class
GridCells
{
/**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPositions[cellStart[i]*2], reorderedPositions[cellStart[i]*2+1])
* and (reorderedPositions[(cellEnds[i]-1)*2], reorderedPositions[(cellEnds[i]-1)*2+1]).
*/
public
class
LinkedCell
{
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public
int
[]
cellStarts
;
public
int
[]
cellStarts
;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public
int
[]
cellEnds
;
public
int
[]
cellEnds
;
/**
* the ordered 2D-coordinates.
*/
public
float
[]
reorderedPositions
;
public
float
[]
reorderedPositions
;
/**
* the mapping between the unordered (original) positions and the reorderedPositions,
* i.e. reorderedPositions[i] == positions[indices[i]]
*/
public
int
[]
indices
;
public
int
[]
indices
;
/**
* the hashes i.e. the cell of the positions, i.e. hashes[i] is the cell of positions[i].
*/
public
int
[]
hashes
;
public
int
[]
hashes
;
/**
* the original positions in original order.
*/
public
float
[]
positions
;
public
float
[]
positions
;
}
}
public
GridCells
calcPositionsInCell
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
/**
* Computes the {@link LinkedCell} of the list of positions.
*
* @param positions a list of position contained in {@link CLLinkedCell#bound}.
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public
LinkedCell
calcLinkedCell
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
assert
positions
.
size
()
==
numberOfElements
;
assert
positions
.
size
()
==
numberOfElements
;
this
.
positionList
=
positions
;
this
.
positionList
=
positions
;
allocHostMemory
();
allocHostMemory
();
...
@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
...
@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
int
[]
aHashes
=
CLUtils
.
toIntArray
(
hashes
,
numberOfElements
);
int
[]
aHashes
=
CLUtils
.
toIntArray
(
hashes
,
numberOfElements
);
float
[]
aPositions
=
CLUtils
.
toFloatArray
(
this
.
positions
,
numberOfElements
*
2
);
float
[]
aPositions
=
CLUtils
.
toFloatArray
(
this
.
positions
,
numberOfElements
*
2
);
Gri
dCell
s
gridCells
=
new
Gri
dCell
s
();
Linke
dCell
gridCells
=
new
Linke
dCell
();
gridCells
.
cellEnds
=
aCellEnds
;
gridCells
.
cellEnds
=
aCellEnds
;
gridCells
.
cellStarts
=
aCellStarts
;
gridCells
.
cellStarts
=
aCellStarts
;
gridCells
.
reorderedPositions
=
aReorderedPositions
;
gridCells
.
reorderedPositions
=
aReorderedPositions
;
...
@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
...
@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
}
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the positions which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public
int
[]
calcSortedHashes
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
public
int
[]
calcSortedHashes
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
assert
positions
.
size
()
==
numberOfElements
;
assert
positions
.
size
()
==
numberOfElements
;
this
.
positionList
=
positions
;
this
.
positionList
=
positions
;
...
@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
...
@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
}
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the positions which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public
int
[]
calcHashes
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
public
int
[]
calcHashes
(
@NotNull
final
List
<
VPoint
>
positions
)
throws
OpenCLException
{
assert
positions
.
size
()
==
numberOfElements
;
assert
positions
.
size
()
==
numberOfElements
;
this
.
positionList
=
positions
;
this
.
positionList
=
positions
;
...
@@ -239,10 +300,21 @@ public class CLUniformHashedGrid {
...
@@ -239,10 +300,21 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
}
/**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public
int
[]
getGridSize
()
{
public
int
[]
getGridSize
()
{
return
new
int
[]{
iGridSize
[
0
],
iGridSize
[
1
]};
return
new
int
[]{
iGridSize
[
0
],
iGridSize
[
1
]};
}
}
/**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public
float
getCellSize
()
{
public
float
getCellSize
()
{
return
iCellSize
;
return
iCellSize
;
}
}
...
@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
...
@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
return
resultValues
;
return
resultValues
;
}
}