Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 36865d52 authored by Benedikt Zoennchen's avatar Benedikt Zoennchen
Browse files

documentation (adding comments), the hardware parameters are no longer hardcoded for the sorting.

parent fdc22cfc
......@@ -8,9 +8,6 @@
* is strictly prohibited.
*
*/
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate(
uint *keyA,
uint *valA,
......@@ -48,10 +45,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey,
__global uint *d_SrcVal,
uint arrayLength,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -108,11 +106,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey,
__global uint *d_DstVal,
__global uint *d_SrcKey,
__global uint *d_SrcVal
__global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -206,11 +204,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength,
uint stride,
uint size,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
......@@ -63,8 +63,6 @@ typedef struct {
float stepLength;
} pedestrian;
#define LOCAL_SIZE_LIMIT 16U
inline void ComparatorPrivate(
uint *keyA,
uint *valA,
......@@ -329,10 +327,11 @@ __kernel void bitonicSortLocal(
__global uint *d_SrcKey,
__global uint *d_SrcVal,
uint arrayLength,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subbatch and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -389,11 +388,11 @@ __kernel void bitonicSortLocal1(
__global uint *d_DstKey,
__global uint *d_DstVal,
__global uint *d_SrcKey,
__global uint *d_SrcVal
__global uint *d_SrcVal,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
//Offset to the beginning of subarray and load data
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......@@ -487,11 +486,11 @@ __kernel void bitonicMergeLocal(
uint arrayLength,
uint stride,
uint size,
uint dir
uint dir,
__local uint *l_key,
__local uint *l_val
){
__local uint l_key[LOCAL_SIZE_LIMIT];
__local uint l_val[LOCAL_SIZE_LIMIT];
uint LOCAL_SIZE_LIMIT = get_local_size(0) * 2;
d_SrcKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_SrcVal += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
d_DstKey += get_group_id(0) * LOCAL_SIZE_LIMIT + get_local_id(0);
......
......@@ -12,17 +12,15 @@ import org.lwjgl.system.MemoryUtil;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
import static org.lwjgl.opencl.CL10.CL_MEM_ALLOC_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_COPY_HOST_PTR;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_ONLY;
import static org.lwjgl.opencl.CL10.CL_MEM_READ_WRITE;
import static org.lwjgl.opencl.CL10.CL_MEM_WRITE_ONLY;
import static org.lwjgl.opencl.CL10.CL_PROGRAM_BUILD_STATUS;
import static org.lwjgl.opencl.CL10.CL_SUCCESS;
import static org.lwjgl.opencl.CL10.clBuildProgram;
......@@ -33,15 +31,16 @@ import static org.lwjgl.opencl.CL10.clCreateKernel;
import static org.lwjgl.opencl.CL10.clCreateProgramWithSource;
import static org.lwjgl.opencl.CL10.clEnqueueNDRangeKernel;
import static org.lwjgl.opencl.CL10.clEnqueueReadBuffer;
import static org.lwjgl.opencl.CL10.clEnqueueWriteBuffer;
import static org.lwjgl.opencl.CL10.clFinish;
import static org.lwjgl.opencl.CL10.clGetDeviceIDs;
import static org.lwjgl.opencl.CL10.clGetDeviceInfo;
import static org.lwjgl.opencl.CL10.clGetPlatformIDs;
import static org.lwjgl.opencl.CL10.clReleaseCommandQueue;
import static org.lwjgl.opencl.CL10.clReleaseContext;
import static org.lwjgl.opencl.CL10.clReleaseKernel;
import static org.lwjgl.opencl.CL10.clReleaseMemObject;
import static org.lwjgl.opencl.CL10.clReleaseProgram;
import static org.lwjgl.opencl.CL10.clSetKernelArg;
import static org.lwjgl.opencl.CL10.clSetKernelArg1i;
import static org.lwjgl.opencl.CL10.clSetKernelArg1p;
import static org.lwjgl.system.MemoryStack.stackPush;
......@@ -50,6 +49,8 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* @author Benedikt Zoennchen
*
* This class implements the bitonic sort using the GPU via OpenCL.
*/
public class CLBitonicSort {
private static Logger log = LogManager.getLogger(CLBitonicSort.class);
......@@ -93,7 +94,7 @@ public class CLBitonicSort {
private int[] resultKeys;
//Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16;
private int max_work_group_size = 16;
private boolean debug = false;
......@@ -154,7 +155,7 @@ public class CLBitonicSort {
// small sorts
if(keys.length <= LOCAL_SIZE_LIMIT)
if(keys.length <= max_work_group_size)
{
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clOutKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clOutValues));
......@@ -162,6 +163,8 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, keys.length));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, keys.length / 2);
......@@ -175,20 +178,22 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clOutValues));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clInKeys));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clInValues));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
for(int size = 2 * LOCAL_SIZE_LIMIT; size <= keys.length; size <<= 1)
for(int size = 2 * max_work_group_size; size <= keys.length; size <<= 1)
{
for(int stride = size / 2; stride > 0; stride >>= 1)
{
if(stride >= LOCAL_SIZE_LIMIT)
if(stride >= max_work_group_size)
{
//Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clOutKeys));
......@@ -204,7 +209,7 @@ public class CLBitonicSort {
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4);
clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -221,11 +226,13 @@ public class CLBitonicSort {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4 )); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, keys.length / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -368,6 +375,9 @@ public class CLBitonicSort {
clBitonicMergeLocal = clCreateKernel(clProgram, "bitonicMergeLocal", errcode_ret);
CLInfo.checkCLError(errcode_ret);
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = (int)pp.get(0);
}
}
......
......@@ -19,7 +19,6 @@ import java.nio.IntBuffer;
import java.util.List;
import static org.lwjgl.opencl.CL10.CL_CONTEXT_PLATFORM;
import static org.lwjgl.opencl.CL10.CL_DEVICE_ADDRESS_BITS;
import static org.lwjgl.opencl.CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE;
import static org.lwjgl.opencl.CL10.CL_DEVICE_NAME;
import static org.lwjgl.opencl.CL10.CL_DEVICE_TYPE_GPU;
......@@ -57,9 +56,12 @@ import static org.lwjgl.system.MemoryUtil.memUTF8;
/**
* @author Benedikt Zoennchen
*
* This class offers the methods to compute an array based linked-cell which contains 2D-coordinates i.e. {@link VPoint}
* using the GPU (see. green-2007 Building the Grid using Sorting).
*/
public class CLUniformHashedGrid {
private static Logger log = LogManager.getLogger(CLUniformHashedGrid.class);
public class CLLinkedCell {
private static Logger log = LogManager.getLogger(CLLinkedCell.class);
// CL ids
private long clPlatform;
......@@ -122,12 +124,9 @@ public class CLUniformHashedGrid {
private int[] resultValues;
private int[] resultKeys;
//Note: logically shared with BitonicSort.cl!
private static final int LOCAL_SIZE_LIMIT = 16;
private static final Logger logger = LogManager.getLogger(CLLinkedCell.class);
private static final Logger logger = LogManager.getLogger(CLUniformHashedGrid.class);
private long max_work_group_size;
private int max_work_group_size;
private boolean debug = false;
......@@ -138,7 +137,16 @@ public class CLUniformHashedGrid {
NonSeparate
}
public CLUniformHashedGrid(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
/**
* Default constructor.
*
* @param numberOfElements the number of positions contained in the linked cell.
* @param bound the spatial bound of the linked cell.
* @param cellSize the cellSize (in x and y direction) of the linked cell.
*
* @throws OpenCLException
*/
public CLLinkedCell(final int numberOfElements, final VRectangle bound, final double cellSize) throws OpenCLException {
this.numberOfElements = numberOfElements;
this.iGridSize = new int[]{ (int)Math.ceil(bound.getWidth() / cellSize), (int)Math.ceil(bound.getHeight() / cellSize)};
this.numberOfGridCells = this.iGridSize[0] * this.iGridSize[1];
......@@ -153,16 +161,53 @@ public class CLUniformHashedGrid {
init();
}
public class GridCells {
/**
* The data structure representing the linked cell. The elements of cell i
* between (reorderedPositions[cellStart[i]*2], reorderedPositions[cellStart[i]*2+1])
* and (reorderedPositions[(cellEnds[i]-1)*2], reorderedPositions[(cellEnds[i]-1)*2+1]).
*/
public class LinkedCell {
/**
* the starting index at which the cell starts, i.e. cell i starts at cellStart[i].
*/
public int[] cellStarts;
/**
* the ending index at which the cell starts, i.e. cell i ends at cellStart[i].
*/
public int[] cellEnds;
/**
* the ordered 2D-coordinates.
*/
public float[] reorderedPositions;
/**
* the mapping between the unordered (original) positions and the reorderedPositions,
* i.e. reorderedPositions[i] == positions[indices[i]]
*/
public int[] indices;
/**
* the hashes i.e. the cell of the positions, i.e. hashes[i] is the cell of positions[i].
*/
public int[] hashes;
/**
* the original positions in original order.
*/
public float[] positions;
}
public GridCells calcPositionsInCell(@NotNull final List<VPoint> positions) throws OpenCLException {
/**
* Computes the {@link LinkedCell} of the list of positions.
*
* @param positions a list of position contained in {@link CLLinkedCell#bound}.
* @return {@link LinkedCell} which is the linked list in an array based structure.
*
* @throws OpenCLException
*/
public LinkedCell calcLinkedCell(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
allocHostMemory();
......@@ -186,7 +231,7 @@ public class CLUniformHashedGrid {
int[] aHashes = CLUtils.toIntArray(hashes, numberOfElements);
float[] aPositions = CLUtils.toFloatArray(this.positions, numberOfElements * 2);
GridCells gridCells = new GridCells();
LinkedCell gridCells = new LinkedCell();
gridCells.cellEnds = aCellEnds;
gridCells.cellStarts = aCellStarts;
gridCells.reorderedPositions = aReorderedPositions;
......@@ -202,6 +247,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
/**
* Computes all the hash values, i.e. cells of each position and sort these hashes and construct a mapping
* of the rearrangement. This method exists to test the bitonic sort algorithm on the GPU.
*
* @param positions the positions which will be hashed.
* @return the sorted hashes.
* @throws OpenCLException
*/
public int[] calcSortedHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
......@@ -221,6 +274,14 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
/**
* Computes all the hash values, i.e. cells of each position.
* This method exists to test the hash computation on the GPU.
*
* @param positions the positions which will be hashed.
* @return the (unsorted) hashes.
* @throws OpenCLException
*/
public int[] calcHashes(@NotNull final List<VPoint> positions) throws OpenCLException {
assert positions.size() == numberOfElements;
this.positionList = positions;
......@@ -239,10 +300,21 @@ public class CLUniformHashedGrid {
//clFindCellBoundsAndReorder(clCellStarts, clCellEnds, clReorderedPositions, clHashes, clIndices, clPositions, numberOfElements, numberOfGridCells);
}
/**
* Returns the gridSizes of the linked cell, i.e. result[0] is the x and
* result[1] the y direction.
*
* @return the gridSizes (2D) stored in an array.
*/
public int[] getGridSize() {
return new int[]{iGridSize[0], iGridSize[1]};
}
/**
* Returns the gridSize which is equal in x and y direction.
*
* @return the gridSize
*/
public float getCellSize() {
return iCellSize;
}
......@@ -299,7 +371,7 @@ public class CLUniformHashedGrid {
return resultValues;
}
public void init() throws OpenCLException {
private void init() throws OpenCLException {
initCallbacks();
initCL();
buildProgram();
......@@ -373,7 +445,7 @@ public class CLUniformHashedGrid {
IntBuffer errcode_ret = stack.callocInt(1);
// small sorts
if (numberOfElements <= LOCAL_SIZE_LIMIT) {
if (numberOfElements <= max_work_group_size) {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 0, clKeysOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 1, clValuesOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal, 2, clKeysIn));
......@@ -381,6 +453,8 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 4, numberOfElements));
//TODO: check the hard coded 1, and the waiting of the queue
CLInfo.checkCLError(clSetKernelArg1i(clBitonicSortLocal, 5, 1));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 6, keys.length * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal, 7, keys.length * 4)); // local memory
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, numberOfElements / 2);
......@@ -393,18 +467,20 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 1, clValuesOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 2, clKeysIn));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicSortLocal1, 3, clValuesIn));
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 4, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicSortLocal1, 5, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicSortLocal1, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
for (int size = 2 * LOCAL_SIZE_LIMIT; size <= numberOfElements; size <<= 1) {
for (int size = 2 * max_work_group_size; size <= numberOfElements; size <<= 1) {
for (int stride = size / 2; stride > 0; stride >>= 1) {
if (stride >= LOCAL_SIZE_LIMIT) {
if (stride >= max_work_group_size) {
//Launch bitonicMergeGlobal
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 0, clKeysOut));
CLInfo.checkCLError(clSetKernelArg1p(clBitonicMergeGlobal, 1, clValuesOut));
......@@ -419,7 +495,7 @@ public class CLUniformHashedGrid {
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 4);
clLocalWorkSize.put(0, max_work_group_size / 4);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeGlobal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -434,11 +510,13 @@ public class CLUniformHashedGrid {
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 5, stride));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 6, size));
CLInfo.checkCLError(clSetKernelArg1i(clBitonicMergeLocal, 7, dir));
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 8, max_work_group_size * 4)); // local memory
CLInfo.checkCLError(clSetKernelArg(clBitonicMergeLocal, 9, max_work_group_size * 4)); // local memory
clGlobalWorkSize = stack.callocPointer(1);
clLocalWorkSize = stack.callocPointer(1);
clGlobalWorkSize.put(0, numberOfElements / 2);
clLocalWorkSize.put(0, LOCAL_SIZE_LIMIT / 2);
clLocalWorkSize.put(0, max_work_group_size / 2);
CLInfo.checkCLError(clEnqueueNDRangeKernel(clQueue, clBitonicMergeLocal, 1, null, clGlobalWorkSize, clLocalWorkSize, null, null));
CLInfo.checkCLError(clFinish(clQueue));
......@@ -594,7 +672,7 @@ public class CLUniformHashedGrid {
PointerBuffer pp = stack.mallocPointer(1);
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, pp, null);
max_work_group_size = pp.get(0);
max_work_group_size = (int)pp.get(0);
logger.info("CL_DEVICE_MAX_WORK_GROUP_SIZE = " + max_work_group_size);
}
......
......@@ -28,8 +28,8 @@ public class TestBitonicSort {
@Test
public void testLocalSort() throws IOException, OpenCLException {
int[] keys = randomArray(256);
int[] values = randomArray(256);
int[] keys = randomArray(32);
int[] values = randomArray(32);
CLBitonicSort clBitonicSort = new CLBitonicSort();
clBitonicSort.sort(keys, values);
......
......@@ -6,7 +6,7 @@ import org.junit.Before;
import org.junit.Test;
import org.vadere.util.geometry.shapes.VPoint;
import org.vadere.util.geometry.shapes.VRectangle;
import org.vadere.util.opencl.CLUniformHashedGrid;
import org.vadere.util.opencl.CLLinkedCell;
import org.vadere.util.opencl.OpenCLException;
import java.io.IOException;
......@@ -19,7 +19,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Benedikt Zoennchen
*/
public class TestCellGridSort {
public class TestCLLinkedList {
private static Logger logger = LogManager.getLogger(TestConvolution.class);
......@@ -30,7 +30,7 @@ public class TestCellGridSort {
@Test
public void testCalcHash() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
......@@ -49,7 +49,7 @@ public class TestCellGridSort {
@Test
public void testCalcAndSortHash() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
......@@ -72,12 +72,12 @@ public class TestCellGridSort {
@Test
public void testGridCell() throws IOException, OpenCLException {
CLUniformHashedGrid clUniformHashedGrid = new CLUniformHashedGrid(1024, new VRectangle(0, 0, 10, 10), 1);
CLLinkedCell clUniformHashedGrid = new CLLinkedCell(1024, new VRectangle(0, 0, 10, 10), 1);
ArrayList<VPoint> positions = new ArrayList<>();
for(int i = 0; i < 1024; i++) {
positions.add(new VPoint(random.nextDouble() * 10,random.nextDouble() * 10));
}
CLUniformHashedGrid.GridCells gridCells = clUniformHashedGrid.calcPositionsInCell(positions);
CLLinkedCell.LinkedCell gridCells = clUniformHashedGrid.calcLinkedCell(positions);
int numberOfCells = clUniformHashedGrid.getGridSize()[0] * clUniformHashedGrid.getGridSize()[1];
for(int cell = 0; cell < numberOfCells; cell++) {
int cellStart = gridCells.cellStarts[cell];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment