Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
vadere
vadere
Commits
a58b9135
Commit
a58b9135
authored
Jul 12, 2018
by
Benedikt Zoennchen
Browse files
implement the linked cell on the GPU.
parent
9cad2a18
Changes
4
Hide whitespace changes
Inline
Side-by-side
VadereUtils/resources/Particles.cl
0 → 100644
View file @
a58b9135
/*
*
Copyright
1993-2010
NVIDIA
Corporation.
All
rights
reserved.
*
*
Please
refer
to
the
NVIDIA
end
user
license
agreement
(
EULA
)
associated
*
with
this
source
code
for
terms
and
conditions
that
govern
your
use
of
*
this
software.
Any
use,
reproduction,
disclosure,
or
distribution
of
*
this
software
and
related
documentation
outside
the
terms
of
the
EULA
*
is
strictly
prohibited.
*
*/
////////////////////////////////////////////////////////////////////////////////
//
Common
definitions
////////////////////////////////////////////////////////////////////////////////
#
define
UMAD
(
a,
b,
c
)
(
(
a
)
*
(
b
)
+
(
c
)
)
typedef
struct{
float
x
;
float
y
;
float
z
;
}
Float3
;
typedef
struct{
uint
x
;
uint
y
;
uint
z
;
}Uint3
;
typedef
struct{
int
x
;
int
y
;
int
z
;
}Int3
;
typedef
struct{
Float3
colliderPos
;
float
colliderRadius
;
Float3
gravity
;
float
globalDamping
;
float
particleRadius
;
Uint3
gridSize
;
uint
numCells
;
Float3
worldOrigin
;
Float3
cellSize
;
uint
numBodies
;
uint
maxParticlesPerCell
;
float
spring
;
float
damping
;
float
shear
;
float
attraction
;
float
boundaryDamping
;
}
simParams_t
;
typedef
struct
{
float2
position
;
float
stepLength
;
}
pedestrian
;
#
define
LOCAL_SIZE_LIMIT
16U
inline
void
ComparatorPrivate
(
uint
*keyA,
uint
*valA,
uint
*keyB,
uint
*valB,
uint
dir
)
{
if
(
(
*keyA
>
*keyB
)
==
dir
)
{
uint
t
;
t
=
*keyA
; *keyA = *keyB; *keyB = t;
t
=
*valA
; *valA = *valB; *valB = t;
}
}
inline
void
ComparatorLocal
(
__local
uint
*keyA,
__local
uint
*valA,
__local
uint
*keyB,
__local
uint
*valB,
uint
dir
)
{
if
(
(
*keyA
>
*keyB
)
==
dir
)
{
uint
t
;
t
=
*keyA
; *keyA = *keyB; *keyB = t;
t
=
*valA
; *valA = *valB; *valB = t;
}
}
////////////////////////////////////////////////////////////////////////////////
//
Save
particle
grid
cell
hashes
and
indices
////////////////////////////////////////////////////////////////////////////////
uint2
getGridPos
(
float2
p,
__constant
float*
cellSize,
__constant
float2*
worldOrigin
)
{
uint2
gridPos
;
float2
wordOr
=
(
*worldOrigin
)
;
gridPos.x
=
(
int
)
floor
((
p.x
-
wordOr.x
)
/
(
*cellSize
))
;
gridPos.y
=
(
int
)
floor
((
p.y
-
wordOr.y
)
/
(
*cellSize
))
;
return
gridPos
;
}
//Calculate
address
in
grid
from
position
(
clamping
to
edges
)
uint
getGridHash
(
uint2
gridPos,
__constant
uint2*
gridSize
)
{
//Wrap
addressing,
assume
power-of-two
grid
dimensions
gridPos.x
=
gridPos.x
&
((
*gridSize
)
.
x
-
1
)
;
gridPos.y
=
gridPos.y
&
((
*gridSize
)
.
y
-
1
)
;
return
UMAD
(
UMAD
(
1.0
,
(
*gridSize
)
.
y,
gridPos.y
)
,
(
*gridSize
)
.
x,
gridPos.x
)
;
}
//Calculate
grid
hash
value
for
each
particle
__kernel
void
calcHash
(
__global
uint
*d_Hash,
//output
__global
uint
*d_Index,
//output
__global
const
float2
*d_Pos,
//input:
positions
__constant
float*
cellSize,
__constant
float2*
worldOrigin,
__constant
uint2*
gridSize,
uint
numParticles
)
{
const
uint
index
=
get_global_id
(
0
)
;
if
(
index
>=
numParticles
)
return
;
float2
p
=
d_Pos[index]
;
//Get
address
in
grid
uint2
gridPos
=
getGridPos
(
p,
cellSize,
worldOrigin
)
;
uint
gridHash
=
getGridHash
(
gridPos,
gridSize
)
;
//Store
grid
hash
and
particle
index
d_Hash[index]
=
gridHash
;
d_Index[index]
=
index
;
}
////////////////////////////////////////////////////////////////////////////////
//
Find
cell
bounds
and
reorder
positions+velocities
by
sorted
indices
////////////////////////////////////////////////////////////////////////////////
__kernel
void
Memset
(
__global
uint
*d_Data,
uint
val,
uint
N
)
{
if
(
get_global_id
(
0
)
<
N
)
d_Data[get_global_id
(
0
)
]
=
val
;
}
__kernel
void
findCellBoundsAndReorder
(
__global
uint
*d_CellStart,
//output:
cell
start
index
__global
uint
*d_CellEnd,
//output:
cell
end
index
__global
float2
*d_ReorderedPos,
//output:
reordered
by
cell
hash
positions
__global
float2
*d_ReorderedVel,
//output:
reordered
by
cell
hash
velocities
__global
const
uint
*d_Hash,
//input:
sorted
grid
hashes
__global
const
uint
*d_Index,
//input:
particle
indices
sorted
by
hash
__global
const
float2
*d_Pos,
//input:
positions
array
sorted
by
hash
__local
uint
*localHash,
//get_group_size
(
0
)
+
1
elements
uint
numParticles
)
{
uint
hash
;
const
uint
index
=
get_global_id
(
0
)
;
//Handle
case
when
no.
of
particles
not
multiple
of
block
size
if
(
index
<
numParticles
)
{
hash
=
d_Hash[index]
;
//Load
hash
data
into
local
memory
so
that
we
can
look
//at
neighboring
particle
's
hash
value
without
loading
//two
hash
values
per
thread
localHash[get_local_id
(
0
)
+
1]
=
hash
;
//First
thread
in
block
must
load
neighbor
particle
hash
if
(
index
>
0
&&
get_local_id
(
0
)
==
0
)
localHash[0]
=
d_Hash[index
-
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
index
<
numParticles
)
{
//Border
case
if
(
index
==
0
)
d_CellStart[hash]
=
0
;
//Main
case
else{
if
(
hash
!=
localHash[get_local_id
(
0
)
]
)
d_CellEnd[localHash[get_local_id
(
0
)
]]
=
d_CellStart[hash]
=
index
;
}
;
//Another
border
case
if
(
index
==
numParticles
-
1
)
d_CellEnd[hash]
=
numParticles
;
//Now
use
the
sorted
index
to
reorder
the
pos
and
vel
arrays
uint
sortedIndex
=
d_Index[index]
;
float2
pos
=
d_Pos[sortedIndex]
;
d_ReorderedPos[index]
=
pos
;
}
}
////////////////////////////////////////////////////////////////////////////////
//
Process
collisions
(
calculate
accelerations
)
////////////////////////////////////////////////////////////////////////////////
float4
collideSpheres
(
float4
posA,
float4
posB,
float4
velA,
float4
velB,
float
radiusA,
float
radiusB,
float
spring,
float
damping,
float
shear,
float
attraction
)
{
//Calculate
relative
position
float4
relPos
=
(
float4
)(
posB.x
-
posA.x,
posB.y
-
posA.y,
posB.z
-
posA.z,
0
)
;
float
dist
=
sqrt
(
relPos.x
*
relPos.x
+
relPos.y
*
relPos.y
+
relPos.z
*
relPos.z
)
;
float
collideDist
=
radiusA
+
radiusB
;
float4
force
=
(
float4
)(
0
,
0
,
0
,
0
)
;
if
(
dist
<
collideDist
)
{
float4
norm
=
(
float4
)(
relPos.x
/
dist,
relPos.y
/
dist,
relPos.z
/
dist,
0
)
;
//Relative
velocity
float4
relVel
=
(
float4
)(
velB.x
-
velA.x,
velB.y
-
velA.y,
velB.z
-
velA.z,
0
)
;
//Relative
tangential
velocity
float
relVelDotNorm
=
relVel.x
*
norm.x
+
relVel.y
*
norm.y
+
relVel.z
*
norm.z
;
float4
tanVel
=
(
float4
)(
relVel.x
-
relVelDotNorm
*
norm.x,
relVel.y
-
relVelDotNorm
*
norm.y,
relVel.z
-
relVelDotNorm
*
norm.z,
0
)
;
//Spring
force
(
potential
)
float
springFactor
=
-spring
*
(
collideDist
-
dist
)
;
force
=
(
float4
)(
springFactor
*
norm.x
+
damping
*
relVel.x
+
shear
*
tanVel.x
+
attraction
*
relPos.x,
springFactor
*
norm.y
+
damping
*
relVel.y
+
shear
*
tanVel.y
+
attraction
*
relPos.y,
springFactor
*
norm.z
+
damping
*
relVel.z
+
shear
*
tanVel.z
+
attraction
*
relPos.z,
0
)
;
}
return
force
;
}
__kernel
void
collide
(
__global
float2
*d_Vel,
//output:
new
velocity
__global
const
float2
*d_ReorderedPos,
//input:
reordered
positions
__global
const
float2
*d_ReorderedVel,
//input:
reordered
velocities
__global
const
uint
*d_Index,
//input:
reordered
particle
indices
__global
const
uint
*d_CellStart,
//input:
cell
boundaries
__global
const
uint
*d_CellEnd,
__constant
float*
cellSize,
__constant
float2*
worldOrigin,
__constant
uint2*
gridSize,
uint
numParticles
)
{
uint
index
=
get_global_id
(
0
)
;
if
(
index
>=
numParticles
)
return
;
float2
pos
=
d_ReorderedPos[index]
;
float2
vel
=
d_ReorderedVel[index]
;
float2
force
=
(
float2
)(
0
,
0
)
;
//Get
address
in
grid
uint2
gridPos
=
getGridPos
(
pos,
cellSize,
worldOrigin
)
;
//Accumulate
surrounding
cells
for
(
int
z
=
-1
; z <= 1; z++)
for
(
int
y
=
-1
; y <= 1; y++)
for
(
int
x
=
-1
; x <= 1; x++){
//Get
start
particle
index
for
this
cell
uint
hash
=
getGridHash
(
gridPos
+
(
uint2
)(
x,
y
)
,
gridSize
)
;
uint
startI
=
d_CellStart[hash]
;
//Skip
empty
cell
if
(
startI
==
0xFFFFFFFFU
)
continue
;
//Iterate
over
particles
in
this
cell
uint
endI
=
d_CellEnd[hash]
;
for
(
uint
j
=
startI
; j < endI; j++){
if
(
j
==
index
)
continue
;
float2
pos2
=
d_ReorderedPos[j]
;
float2
vel2
=
d_ReorderedVel[j]
;
//Collide
two
spheres
/*force
+=
collideSpheres
(
pos,
pos2,
vel,
vel2,
params->particleRadius,
params->particleRadius,
params->spring,
params->damping,
params->shear,
params->attraction
)
;*/
}
}
//Collide
with
cursor
sphere
/*force
+=
collideSpheres
(
pos,
(
float4
)(
params->colliderPos.x,
params->colliderPos.y,
params->colliderPos.z,
0
)
,
vel,
(
float4
)(
0
,
0
,
0
,
0
)
,
params->particleRadius,
params->colliderRadius,
params->spring,
params->damping,
params->shear,
params->attraction
)
;*/
//Write
new
velocity
back
to
original
unsorted
location
d_Vel[d_Index[index]]
=
vel
+
force
;
}
////////////////////////////////////////////////////////////////////////////////
//
Monolithic
bitonic
sort
kernel
for
short
arrays
fitting
into
local
memory
////////////////////////////////////////////////////////////////////////////////
__kernel
void
bitonicSortLocal
(
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal,
uint
arrayLength,
uint
dir
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subbatch
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
l_key[get_local_id
(
0
)
+
0]
=
d_SrcKey[
0]
;
l_val[get_local_id
(
0
)
+
0]
=
d_SrcVal[
0]
;
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
for
(
uint
size
=
2
; size < arrayLength; size <<= 1){
//Bitonic
merge
uint
ddd
=
dir
^
(
(
get_local_id
(
0
)
&
(
size
/
2
))
!=
0
)
;
for
(
uint
stride
=
size
/
2
; stride > 0; stride >>= 1){
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uint
pos
=
2
*
get_local_id
(
0
)
-
(
get_local_id
(
0
)
&
(
stride
-
1
))
;
ComparatorLocal
(
&l_key[pos
+
0],
&l_val[pos
+
0],
&l_key[pos
+
stride],
&l_val[pos
+
stride],
ddd
)
;
}
}
//ddd
==
dir
for
the
last
bitonic
merge
step
{
for
(
uint
stride
=
arrayLength
/
2
; stride > 0; stride >>= 1){
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uint
pos
=
2
*
get_local_id
(
0
)
-
(
get_local_id
(
0
)
&
(
stride
-
1
))
;
ComparatorLocal
(
&l_key[pos
+
0],
&l_val[pos
+
0],
&l_key[pos
+
stride],
&l_val[pos
+
stride],
dir
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
d_DstKey[
0]
=
l_key[get_local_id
(
0
)
+
0]
;
d_DstVal[
0]
=
l_val[get_local_id
(
0
)
+
0]
;
d_DstKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
d_DstVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
}
////////////////////////////////////////////////////////////////////////////////
//
Bitonic
sort
kernel
for
large
arrays
(
not
fitting
into
local
memory
)
////////////////////////////////////////////////////////////////////////////////
//Bottom-level
bitonic
sort
//Almost
the
same
as
bitonicSortLocal
with
the
only
exception
//of
even
/
odd
subarrays
(
of
LOCAL_SIZE_LIMIT
points
)
being
//sorted
in
opposite
directions
__kernel
void
bitonicSortLocal1
(
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
//Offset
to
the
beginning
of
subarray
and
load
data
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
l_key[get_local_id
(
0
)
+
0]
=
d_SrcKey[
0]
;
l_val[get_local_id
(
0
)
+
0]
=
d_SrcVal[
0]
;
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
uint
comparatorI
=
get_global_id
(
0
)
&
((
LOCAL_SIZE_LIMIT
/
2
)
-
1
)
;
for
(
uint
size
=
2
; size < LOCAL_SIZE_LIMIT; size <<= 1){
//Bitonic
merge
uint
ddd
=
(
comparatorI
&
(
size
/
2
))
!=
0
;
for
(
uint
stride
=
size
/
2
; stride > 0; stride >>= 1){
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uint
pos
=
2
*
get_local_id
(
0
)
-
(
get_local_id
(
0
)
&
(
stride
-
1
))
;
ComparatorLocal
(
&l_key[pos
+
0],
&l_val[pos
+
0],
&l_key[pos
+
stride],
&l_val[pos
+
stride],
ddd
)
;
}
}
//Odd
/
even
arrays
of
LOCAL_SIZE_LIMIT
elements
//sorted
in
opposite
directions
{
uint
ddd
=
(
get_group_id
(
0
)
&
1
)
;
for
(
uint
stride
=
LOCAL_SIZE_LIMIT
/
2
; stride > 0; stride >>= 1){
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uint
pos
=
2
*
get_local_id
(
0
)
-
(
get_local_id
(
0
)
&
(
stride
-
1
))
;
ComparatorLocal
(
&l_key[pos
+
0],
&l_val[pos
+
0],
&l_key[pos
+
stride],
&l_val[pos
+
stride],
ddd
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
d_DstKey[
0]
=
l_key[get_local_id
(
0
)
+
0]
;
d_DstVal[
0]
=
l_val[get_local_id
(
0
)
+
0]
;
d_DstKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
d_DstVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
}
//Bitonic
merge
iteration
for
'stride
'
>=
LOCAL_SIZE_LIMIT
__kernel
void
bitonicMergeGlobal
(
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal,
uint
arrayLength,
uint
size,
uint
stride,
uint
dir
)
{
uint
global_comparatorI
=
get_global_id
(
0
)
;
uint
comparatorI
=
global_comparatorI
&
(
arrayLength
/
2
-
1
)
;
//Bitonic
merge
uint
ddd
=
dir
^
(
(
comparatorI
&
(
size
/
2
))
!=
0
)
;
uint
pos
=
2
*
global_comparatorI
-
(
global_comparatorI
&
(
stride
-
1
))
;
uint
keyA
=
d_SrcKey[pos
+
0]
;
uint
valA
=
d_SrcVal[pos
+
0]
;
uint
keyB
=
d_SrcKey[pos
+
stride]
;
uint
valB
=
d_SrcVal[pos
+
stride]
;
ComparatorPrivate
(
&keyA,
&valA,
&keyB,
&valB,
ddd
)
;
d_DstKey[pos
+
0]
=
keyA
;
d_DstVal[pos
+
0]
=
valA
;
d_DstKey[pos
+
stride]
=
keyB
;
d_DstVal[pos
+
stride]
=
valB
;
}
//Combined
bitonic
merge
steps
for
//
'size
'
>
LOCAL_SIZE_LIMIT
and
'stride
'
=
[1
..
LOCAL_SIZE_LIMIT
/
2]
__kernel
void
bitonicMergeLocal
(
__global
uint
*d_DstKey,
__global
uint
*d_DstVal,
__global
uint
*d_SrcKey,
__global
uint
*d_SrcVal,
uint
arrayLength,
uint
stride,
uint
size,
uint
dir
)
{
__local
uint
l_key[LOCAL_SIZE_LIMIT]
;
__local
uint
l_val[LOCAL_SIZE_LIMIT]
;
d_SrcKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_SrcVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstKey
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
d_DstVal
+=
get_group_id
(
0
)
*
LOCAL_SIZE_LIMIT
+
get_local_id
(
0
)
;
l_key[get_local_id
(
0
)
+
0]
=
d_SrcKey[
0]
;
l_val[get_local_id
(
0
)
+
0]
=
d_SrcVal[
0]
;
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
d_SrcVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
//Bitonic
merge
uint
comparatorI
=
get_global_id
(
0
)
&
((
arrayLength
/
2
)
-
1
)
;
uint
ddd
=
dir
^
(
(
comparatorI
&
(
size
/
2
))
!=
0
)
;
for
(
; stride > 0; stride >>= 1){
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
uint
pos
=
2
*
get_local_id
(
0
)
-
(
get_local_id
(
0
)
&
(
stride
-
1
))
;
ComparatorLocal
(
&l_key[pos
+
0],
&l_val[pos
+
0],
&l_key[pos
+
stride],
&l_val[pos
+
stride],
ddd
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
d_DstKey[
0]
=
l_key[get_local_id
(
0
)
+
0]
;
d_DstVal[
0]
=
l_val[get_local_id
(
0
)
+
0]
;
d_DstKey[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_key[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
d_DstVal[
(
LOCAL_SIZE_LIMIT
/
2
)
]
=
l_val[get_local_id
(
0
)
+
(
LOCAL_SIZE_LIMIT
/
2
)
]
;
}
\ No newline at end of file
VadereUtils/src/org/vadere/util/opencl/CLUniformHashedGrid.java
0 → 100644
View file @
a58b9135
package
org.vadere.util.opencl
;
import
org.apache.log4j.LogManager
;
import
org.apache.log4j.Logger
;
import
org.jetbrains.annotations.NotNull
;
import
org.lwjgl.PointerBuffer
;
import
org.lwjgl.opencl.CLContextCallback
;
import
org.lwjgl.opencl.CLProgramCallback
;
import
org.lwjgl.system.Configuration
;
import
org.lwjgl.system.MemoryStack
;
import
org.lwjgl.system.MemoryUtil
;
import
org.vadere.util.geometry.shapes.VPoint
;
import
org.vadere.util.geometry.shapes.VRectangle
;
import
java.io.IOException
;
import
java.nio.ByteBuffer
;
import
java.nio.FloatBuffer
;
import
java.nio.IntBuffer
;
import
java.util.List
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_CONTEXT_PLATFORM
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_ADDRESS_BITS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_MAX_WORK_GROUP_SIZE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_NAME
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_DEVICE_TYPE_GPU
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_ALLOC_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_COPY_HOST_PTR
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_READ_ONLY
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_MEM_READ_WRITE
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_PROGRAM_BUILD_STATUS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
CL_SUCCESS
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clBuildProgram
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateCommandQueue
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateContext
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clCreateProgramWithSource
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueNDRangeKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueReadBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueWaitForEvents
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clEnqueueWriteBuffer
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clFinish
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetDeviceIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clGetPlatformIDs
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseCommandQueue
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseContext
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseKernel
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseMemObject
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clReleaseProgram
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1i
;
import
static
org
.
lwjgl
.
opencl
.
CL10
.
clSetKernelArg1p
;
import
static
org
.
lwjgl
.
system
.
MemoryStack
.
stackPush
;
import
static
org
.
lwjgl
.
system
.
MemoryUtil
.
NULL
;
import
static
org
.
lwjgl
.
system
.
MemoryUtil
.
memUTF8
;
/**
* @author Benedikt Zoennchen
*/
public
class
CLUniformHashedGrid
{
private
static
Logger
log
=
LogManager
.
getLogger
(
CLUniformHashedGrid
.
class
);
// CL ids
private
long
clPlatform
;
private
long
clDevice
;
private
long
clContext
;
private
long
clQueue
;
private
long
clProgram
;
// CL Memory
private
long
clHashes
;
private
long
clIndices
;
private
long
clCellStarts
;
private
long
clCellEnds
;
private
long
clReorderedPositions
;
private
long
clPositions
;
private
long
clCellSize
;