Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 36865d52 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen
Browse files

documentation (adding comments), the hardware parameters are no longer hardcoded for the sorting.

parent fdc22cfc
...@@ -8,9 +8,6 @@ ...@@ -8,9 +8,6 @@
* is strictly prohibited. * is strictly prohibited.
* *
*/ */
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate( inline void ComparatorPrivate(
uint *keyA, uint *keyA,
uint *valA, uint *valA,
...@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal( ...@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal, __global uint *d_SrcVal,
uint arrayLength, uint arrayLength,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subbatch and load data //Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1( ...@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey, __global uint *d_DstKey,
__global uint *d_DstVal, __global uint *d_DstVal,
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal __global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subarray and load data //Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal( ...@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength, uint arrayLength,
uint stride, uint stride,
uint size, uint size,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
...@@ -63,8 +63,6 @@ typedef struct { ...@@ -63,8 +63,6 @@ typedef struct {
float stepLength; float stepLength;
} pedestrian; } pedestrian;
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate( inline void ComparatorPrivate(
uint *keyA, uint *keyA,
uint *valA, uint *valA,
...@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal( ...@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal, __global uint *d_SrcVal,
uint arrayLength, uint arrayLength,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subbatch and load data //Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1( ...@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey, __global uint *d_DstKey,
__global uint *d_DstVal, __global uint *d_DstVal,
__global uint *d_SrcKey, __global uint *d_SrcKey,
__global uint *d_SrcVal __global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
//Offset to the beginning of subarray and load data //Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
...@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal( ...@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength, uint arrayLength,
uint stride, uint stride,
uint size, uint size,
uint dir uint dir,
__local uint *l_key,
__local uint *l_val
){ ){
__local uint l_key[LOCAL_SIZE_LIMIT]; uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
__local uint l_val[LOCAL_SIZE_LIMIT];
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0); d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
...@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil; ...@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer; import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM; import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME; import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU; import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR; import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR; import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_ONLY;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE; import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE;
import static org.lwjgl.opencl.CL10.CL_MEM_WRITE_ONLY;
import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS; import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS;
import static org.lwjgl.opencl.CL10.CL_SUCCESS; import static org.lwjgl.opencl.CL10.CL_SUCCESS;
import static org.lwjgl.opencl.CL10.clBuildProgram; import static org.lwjgl.opencl.CL10.clBuildProgram;
...@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel; ...@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
import static org.lwjgl.opencl.CL10.clCreateProgramWithSource; import static org.lwjgl.opencl.CL10.clCreateProgramWithSource;
import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel; import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel;
import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer; import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer;
import static org.lwjgl.opencl.CL10.clEnqueueWriteBuffer;
import static org.lwjgl.opencl.CL10.clFinish; import static org.lwjgl.opencl.CL10.clFinish;
import static org.lwjgl.opencl.CL10.clGetDeviceIDs; import static org.lwjgl.opencl.CL10.clGetDeviceIDs;
import static org.lwjgl.opencl.CL10.clGetDeviceInfo;
import static org.lwjgl.opencl.CL10.clGetPlatformIDs; import static org.lwjgl.opencl.CL10.clGetPlatformIDs;
import static org.lwjgl.opencl.CL10.clReleaseCommandQueue; import static org.lwjgl.opencl.CL10.clReleaseCommandQueue;
import static org.lwjgl.opencl.CL10.clReleaseContext; import static org.lwjgl.opencl.CL10.clReleaseContext;
import static org.lwjgl.opencl.CL10.clReleaseKernel; import static org.lwjgl.opencl.CL10.clReleaseKernel;
import static org.lwjgl.opencl.CL10.clReleaseMemObject; import static org.lwjgl.opencl.CL10.clReleaseMemObject;
import static org.lwjgl.opencl.CL10.clReleaseProgram; import static org.lwjgl.opencl.CL10.clReleaseProgram;
import static org.lwjgl.opencl.CL10.clSetKernelArg;
import static org.lwjgl.opencl.CL10.clSetKernelArg1i; import static org.lwjgl.opencl.CL10.clSetKernelArg1i;
import static org.lwjgl.opencl.CL10.clSetKernelArg1p; import static org.lwjgl.opencl.CL10.clSetKernelArg1p;
import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryStack.stackPush;
...@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8; ...@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/** /**
* @author Benedikt Zoennchen * @author Benedikt Zoennchen
*
* This class implements the bitonic sort using the GPU via OpenCL.
*/ */
public class CLBitonicSort { public class CLBitonicSort {
private static Logger log = LogManager.getLogger(CLBitonicSort.class); private static Logger log = LogManager.getLogger(CLBitonicSort.class);
...@@ -93,7 +94,7 @@ public class CLBitonicSort { ...@@ -93,7 +94,7 @@ public class CLBitonicSort {
private int[] resultKeys; private int[] resultKeys;
//Note: logically shared with BitonicSort.cl! //Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16; private int max_work_group_size = 16;
private boolean debug = false; private boolean debug = false;
...@@ -154,7 +155,7 @@ public class CLBitonicSort { ...@@ -154,7 +155,7 @@ public class CLBitonicSort {
// small sorts // small sorts
if(keys.length <= LOCAL_SIZE_LIMIT) if(keys.length <= max_work_group_size)
{ {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues));
...@@ -162,6 +163,8 @@ public class CLBitonicSort { ...@@ -162,6 +163,8 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, keys.length / 2); clLocalWorkSize.put(0, keys.length / 2);
...@@ -175,20 +178,22 @@ public class CLBitonicSort { ...@@ -175,20 +178,22 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2); clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
for(int size = 2 * LOCAL_SIZE_LIMIT; size <= keys.length; size <<= 1) for(int size = 2 * max_work_group_size; size <= keys.length; size <<= 1)
{ {
for(int stride = size / 2; stride > 0; stride >>= 1) for(int stride = size / 2; stride > 0; stride >>= 1)
{ {
if(stride >= LOCAL_SIZE_LIMIT) if(stride >= max_work_group_size)
{ {
//Launch bitonicMergeGlobal //Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys)); CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys));
...@@ -204,7 +209,7 @@ public class CLBitonicSort { ...@@ -204,7 +209,7 @@ public class CLBitonicSort {
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4); clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
...@@ -221,11 +226,13 @@ public class CLBitonicSort { ...@@ -221,11 +226,13 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir)); CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4 )); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1); clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1); clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2); clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2); clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null)); CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue)); CLInfo.checkCLError(clFinish(clQueue));
...@@ -368,6 +375,9 @@ public class CLBitonicSort { ...@@ -368,6 +375,9 @@ public class CLBitonicSort {
clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret); clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret);
CLInfo.checkCLError(errcode_ret); CLInfo.checkCLError(errcode_ret);
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = (int)pp.get(0);
} }
} }
......
...@@ -19,7 +19,6 @@ import java.nio.IntBuffer; ...@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
import java.util.List; import java.util.List;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM; import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_ADDRESS_BITS;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE; import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME; import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU; import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
...@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8; ...@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/** /**
* @author Benedikt Zoennchen * @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/ */
public class CLUniformHashedGrid { public class CLLinkedCell {
private static Logger log = LogManager.getLogger(CLUniformHashedGrid.class); private static Logger log = LogManager.getLogger(CLLinkedCell.class);
// CL ids // CL ids
private long clPlatform; private long clPlatform;
...@@ -122,12 +124,9 @@ public class CLUniformHashedGrid { ...@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
private int[] resultValues; private int[] resultValues;
private int[] resultKeys; private int[] resultKeys;
//Note: logically shared with BitonicSort.cl! private static final Logger logger = LogManager.getLogger(CLLinkedCell.class);
private static final int LOCAL_SIZE_LIMIT = 16;
private static final Logger logger = LogManager.getLogger(CLUniformHashedGrid.class); private int max_work_group_size;
private long max_work_group_size;
private boolean debug = false; private boolean debug = false;
...@@ -138,7 +137,16 @@ public class CLUniformHashedGrid { ...@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
NonSeparate NonSeparate
} }
public CLUniformHashedGrid(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException { /**
* Default constructor.
*
* @param numberOfElements the number of positions contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public CLLinkedCell(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
this.numberOfElements = numberOfElements; this.numberOfElements = numberOfElements;
this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)}; this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)};
this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1]; this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1];
...@@ -153,16 +161,53 @@ public class CLUniformHashedGrid { ...@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
init(); init();
} }
public class GridCells { /**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPositions[cellStart[i]*2], reorderedPositions[cellStart[i]*2+1])
* and (reorderedPositions[(cellEnds[i]-1)*2], reorderedPositions[(cellEnds[i]-1)*2+1]).
*/
public class LinkedCell {
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public int[] cellStarts; public int[] cellStarts;
public int[] cellEnds;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public int[] cellEnds;
/**
* the ordered 2D-coordinates.
*/
public float[] reorderedPositions; public float[] reorderedPositions;
/**
* the mapping between the unordered (original) positions and the reorderedPositions,
* i.e. reorderedPositions[i] == positions[indices[i]]
*/
public int[] indices; public int[] indices;
public int[] hashes;
public float[] positions; /**
* the hashes i.e. the cell of the positions, i.e. hashes[i] is the cell of positions[i].
*/
public int[] hashes;
/**
* the original positions in original order.
*/
public float[] positions;
} }
public GridCells calcPositionsInCell(@NotNull final List<VPoint> positions) throws OpenCLException { /**
* Computes the {@link LinkedCell} of the list of positions.
*
* @param positions a list of position contained in {@link CLLinkedCell#bound}.
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public LinkedCell calcLinkedCell(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
allocHostMemory(); allocHostMemory();
...@@ -186,7 +231,7 @@ public class CLUniformHashedGrid { ...@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements); int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements);
float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2); float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2);
GridCells gridCells = new GridCells(); LinkedCell gridCells = new LinkedCell();
gridCells.cellEnds = aCellEnds; gridCells.cellEnds = aCellEnds;
gridCells.cellStarts = aCellStarts; gridCells.cellStarts = aCellStarts;
gridCells.reorderedPositions = aReorderedPositions; gridCells.reorderedPositions = aReorderedPositions;
...@@ -202,6 +247,14 @@ public class CLUniformHashedGrid { ...@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the positions which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException { public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
...@@ -221,6 +274,14 @@ public class CLUniformHashedGrid { ...@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the positions which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException { public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements; assert positions.size() == numberOfElements;
this.positionList = positions; this.positionList = positions;
...@@ -239,11 +300,22 @@ public class CLUniformHashedGrid { ...@@ -239,11 +300,22 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells); //clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
} }
public int[] getGridSize() { /**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public int[] getGridSize() {
return new int[]{iGridSize[0], iGridSize[1]}; return new int[]{iGridSize[0], iGridSize[1]};
} }
public float getCellSize() { /**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public float getCellSize() {
return iCellSize; return iCellSize;
} }
...@@ -299,7 +371,7 @@ public class CLUniformHashedGrid { ...@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
return resultValues; return resultValues;
} }