DistMeshHE.cl 15.5 KB
Newer Older
1
/**
2
3
 * The implementation of DistMesh using the half-edge data structure.
 * Forces are computed for each vertex in parallel instead of for each
4
5
6
7
8
9
 * edge in parallel. Edge flips are done via a three stages (for each edge in parallel):
 * (1) Lock the first triangle A for edge e
 * (2) Lock the second triangle B for edge e if e holds the lock of A
 * (3) Flip edge e if e holds the lock for A and B
 */

10
11
12
13
14
15
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_amd_printf : enable
#define LOCK(a) atomic_cmpxchg(a, 0, 1)
#define UNLOCK(a) atomic_xchg(a, 0)

// helper methods!
16
17
inline float2 getCircumcenter(float2 p1, float2 p2, float2 p3) {
    float d = 2 * (p1.x * (p2.y - p3.y) + p2.x * (p3.y - p1.y) + p3.x * (p1.y - p2.y));
18

19
    float x = ((p1.x * p1.x + p1.y * p1.y) * (p2.y - p3.y)
20
21
    				+ (p2.x * p2.x + p2.y * p2.y) * (p3.y - p1.y)
    				+ (p3.x * p3.x + p3.y * p3.y) * (p1.y - p2.y)) / d;
22
    float y = ((p1.x * p1.x + p1.y * p1.y) * (p3.x - p2.x)
23
24
    				+ (p2.x * p2.x + p2.y * p2.y) * (p1.x - p3.x)
    				+ (p3.x * p3.x + p3.y * p3.y) * (p2.x - p1.x)) / d;
25
    return (float2)(x, y);
26
27
}

28
inline bool isCCW(float2 q, float2 p, float2 r) {
29
30
31
    return ((p.y - q.y) * (r.x - p.x) - (p.x - q.x) * (r.y - p.y)) < 0;
}

32
33
34
35
inline float quality(float2 p, float2 q, float2 r) {
    float a = length(p-q);
    float b = length(p-r);
    float c = length(q-r);
36
    return (float)((b + c - a) * (c + a - b) * (a + b - c)) / (a * b * c);
37
38
}

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
inline bool isInCircle(float2 a, float2 b, float2 c, float x , float y) {
    float eps = 0.00001f;
    float adx = a.x - x;
    float ady = a.y - y;
    float bdx = b.x - x;
    float bdy = b.y - y;
    float cdx = c.x - x;
    float cdy = c.y - y;

    float abdet = adx * bdy - bdx * ady;
    float bcdet = bdx * cdy - cdx * bdy;
    float cadet = cdx * ady - adx * cdy;
    float alift = adx * adx + ady * ady;
    float blift = bdx * bdx + bdy * bdy;
    float clift = cdx * cdx + cdy * cdy;

    float disc = alift * bcdet + blift * cadet + clift * abdet;
56
57
58
59
    bool ccw = isCCW(a, b, c);
    return (disc-eps > 0 && ccw) || (disc+eps < 0 && !ccw);
}

60
inline bool isEdgeAlive(int4 edge) {
61
62
63
    return edge.s0 != -1;
}

64
inline bool isFaceAlive(int2 face) {
65
66
67
    return face.s0 != -1;
}

68
inline float dabs(float d) {if(d < 0){ return -d; }else{ return d;}}
69

70
71
inline bool isAtBoundary(int4 edge, __global int4* edges){
    return getFace(edge) == -1 || getFace(edges[getTwin(edge)]) == -1;
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
}

inline int getVertex(int4 edge) {
    return edge.s0;
}

inline int getNext(int4 edge) {
    return edge.s1;
}

inline int getTwin(int4 edge) {
    return edge.s2;
}

inline int getFace(int4 edge) {
    return edge.s3;
}

90
inline float dist(float2 v) {
91
    return dabs(7.0f - length(v))-3.0f;
92
93
94
95
}
// end helper

// remove low quality triangles on the boundary
96
kernel void removeTriangles(__global float2* vertices,
97
98
                            __global int4* edges,
                            __global int3* triangles) {
99
    int edgeId = get_global_id(0);
100
101

    // edge is alive?
102
    if(isEdgeAlive(edges[edgeId])){
103
        float eps = 0.00001f;
104
105
106
107
108
109
110
111

        int ta = edges[edgeId].s2;
        int tb = edges[edgeId].s3;

        // edge is a boundary edge
        if(ta == -1 || tb == -1) {
            ta = max(ta, tb);
            int3 tri = triangles[ta];
112
113
114
            float2 v0 = vertices[tri.s0];
            float2 v1 = vertices[tri.s1];
            float2 v2 = vertices[tri.s2];
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

            if(quality(v0, v1, v2) < 0.2f) {
                // destroy edge i.e. disconnect edge
                edges[edgeId].s0 = -1;
                edges[edgeId].s1 = -1;

                // destroy triangle
                tri.s0 = -1;
                tri.s1 = -1;
                tri.s2 = -1;
            }
        }
    }
}

// for each edge in parallel, label illegal edges
131
kernel void label(__global float2* vertices,
132
133
134
135
136
137
138
139
140
                  __global int4* edges,
                  __global int* labeledEdges,
                  __global int* illegalEdge) {
    /**
     * edge.s0 = edge.vertexId
     * edge.s1 = edge.nextId
     * edge.s2 = edge.twinId
     * edge.s3 = edge.faceId
     */
141
	 int edgeId = get_global_id(0);
142
    int4 edge = edges[edgeId];
143
144
    if(isEdgeAlive(edge) && !isAtBoundary(edge, edges)){
        float eps = 0.0001f;
145

146
        int v0 = getVertex(edge);
147
148
149
150
151
152
153
154
155
		int4 nextEdge = edges[getNext(edge)];
		int4 prefEdge = edges[getNext(nextEdge)];
		int4 twinEdge = edges[getTwin(edge)];

		int p = getVertex(edges[getNext(twinEdge)]);
		int v1 = getVertex(nextEdge);
		int v2 = getVertex(prefEdge);

		// test delaunay criteria
Benedikt Zoennchen's avatar
Benedikt Zoennchen committed
156
157
158
159
160
161
162
163
164
		//float2 c = getCircumcenter(vertices[v0], vertices[v1], vertices[v2]);
		float2 pp = vertices[p];
        //double2 c = getCircumcenter(vertices[v0], vertices[v1], vertices[v2]);
         // require a flip?
        if(isInCircle(vertices[v0], vertices[v1], vertices[v2], pp.x, pp.y)) {
            labeledEdges[edgeId] = 1;
            *illegalEdge = 1;
        } else {
             labeledEdges[edgeId] = 0;
165
        }
166
167
    }
    else {
168
169
        labeledEdges[edgeId] = 0;
    }
170
171
}

172
173
// for each edge in parallel, re-label illegal edges
kernel void updateLabel(__global float2* vertices,
174
175
176
177
178
179
180
181
182
                        __global int4* edges,
                        __global int* labeledEdges,
                        __global int* illegalEdge) {
    /**
     * edge.s0 = edge.vertexId
     * edge.s1 = edge.nextId
     * edge.s2 = edge.twinId
     * edge.s3 = edge.faceId
     */
183
	int edgeId = get_global_id(0);
184
185
    int4 edge = edges[edgeId];

186
    if(isEdgeAlive(edge) && labeledEdges[edgeId] == 1 && !isAtBoundary(edge, edges)){
187
        float eps = 0.0001f;
188
        int v0 = getVertex(edge);
189
190
191
192
193
194
195
196
197
		int4 nextEdge = edges[getNext(edge)];
		int4 prefEdge = edges[getNext(nextEdge)];
		int4 twinEdge = edges[getTwin(edge)];

		int p = getVertex(edges[getNext(twinEdge)]);
		int v1 = getVertex(nextEdge);
		int v2 = getVertex(prefEdge);

		// test delaunay criteria
Benedikt Zoennchen's avatar
Benedikt Zoennchen committed
198
199
200
201
202
203
		float2 pp = vertices[p];
        //double2 c = getCircumcenter(vertices[v0], vertices[v1], vertices[v2]);
         // require a flip?
        if(isInCircle(vertices[v0], vertices[v1], vertices[v2], pp.x, pp.y)) {
            labeledEdges[edgeId] = 1;
            *illegalEdge = 1;
204
        } else {
Benedikt Zoennchen's avatar
Benedikt Zoennchen committed
205
             labeledEdges[edgeId] = 0;
206
        }
207
    }
208
209
210
    else {
        labeledEdges[edgeId] = 0;
    }
211
212
}

213
// lock triangle A
214
215
216
kernel void flipStage1(__global int4* edges,
                       __global int* labeledEdges,
                       __global int2* faces) {
217
    int edgeId = get_global_id(0);
218
219
220
221

    // is the edge illegal
    if(labeledEdges[edgeId] == 1) {
        int4 edge = edges[edgeId];
222
223
224
225
226
227
        int face = getFace(edges[edgeId]);

        // to avoid the twin from locking
        if(edgeId < getTwin(edge)) {
            faces[face].s1 = edgeId;
        }
228
229
230
    }
}

231
// lock triangle B
232
233
234
235
kernel void flipStage2(__global int4* edges,
                       __global int* labeledEdges,
                       __global int2* faces) {

236
    int edgeId = get_global_id(0);
237
238

    // is the edge illegal
239
    if(labeledEdges[edgeId] == 1) {
240
241
        int4 edge = edges[edgeId];
        int4 twinEdge = edges[getTwin(edge)];
242
        int faceId = getFace(edges[edgeId]);
243

244
        // to avoid the twin from locking
245
246
247
        if(faces[faceId].s1 == edgeId) {
            faces[getFace(twinEdge)].s1 = edgeId;
        }
248
249
250
    }
}

251
// flip
252
253
kernel void flipStage3(__global float2* vertices,
                       __global int* vertexToEdge,
254
255
256
                       __global int4* edges,
                       __global int* labeledEdges,
                       __global int2* faces) {
257
    int edgeId = get_global_id(0);
258

259
    if(labeledEdges[edgeId] == 1) {
260
        int4 edge = edges[edgeId];
261
262
        int4 twinEdge = edges[getTwin(edge)];

263
264
265
        // swap if both triangles are locked by ta, i.e. by this thread
        if(faces[getFace(edge)].s1 == edgeId && faces[getFace(twinEdge)].s1 == edgeId) {
            // flip and repair ds
266

267
268
269
            // 1. gather all the references required
            int a0Id = edgeId;
            int4 a0 = edge;
270
            int a1Id = getNext(a0);
271
            int4 a1 = edges[a1Id];
272
273
            int a2Id = getNext(a1);
            int4 a2 = edges[getNext(a1)];
274
275
276

            int b0Id = getTwin(edge);
            int4 b0 = edges[b0Id];
277
            int b1Id = getNext(b0);
278
            int4 b1 = edges[b1Id];
279
            int b2Id = getNext(b1);
280
281
282
283
284
285
286
287
288
289
290
            int4 b2 = edges[b2Id];

            int faId = getFace(a0);
            int fbId = getFace(b0);

            int va1Id = getVertex(a1);
            int vb1Id = getVertex(b1);

            int va0Id = getVertex(a0);
            int vb0Id = getVertex(b0);

291
            if(faces[fbId].s0 == b1Id) {
292
				// setEdge(fb, a1);
293
                faces[fbId].s0 = a1Id;
294
            }
295

296
            if(faces[faId].s0 == a1Id) {
297
				// setEdge(fa, b1);
298
                faces[faId].s0 = b1Id;
299
            }
300

301
            if(vertexToEdge[va0Id] == a0Id) {
302
				// setEdge(va0, b2);
303
                vertexToEdge[va0Id] = b2Id;
304
            }
305

306
            if(vertexToEdge[vb0Id] == b0Id) {
307
				// setEdge(vb0,a2);
308
                vertexToEdge[vb0Id] = a2Id;
309
            }
310

311
312
313
314
315
316
			/**
			 * edge.s0 = edge.vertexId
			 * edge.s1 = edge.nextId
			 * edge.s2 = edge.twinId
			 * edge.s3 = edge.faceId
			 */
317
318
            a0.s0 = va1Id;
            b0.s0 = vb1Id;
319

320
321
322
            a0.s1 = a2Id;
            a2.s1 = b1Id;
            b1.s1 = a0Id;
323

324
325
326
            b0.s1 = b2Id;
            b2.s1 = a1Id;
            a1.s1 = b0Id;
327

328
            a1.s3 = fbId;
329

330
331
            //setFace(b1, faId);
            b1.s3 = faId;
332
333
334
335
336
337
338
339
340
341

            // copy to global mem
            edges[a0Id] = a0;
            edges[a1Id] = a1;
            edges[a2Id] = a2;

            edges[b0Id] = b0;
            edges[b1Id] = b1;
            edges[b2Id] = b2;

342
343
            labeledEdges[a0Id] = 0;
            labeledEdges[b0Id] = 0;
344
345
346
347
        }
    }
}

348
349
// for each triangle: remove all locks
kernel void unlockFaces(__global int2* faces) {
350
351
352
353
    int faceId = get_global_id(0);
    faces[faceId].s1 = -1;
}

354
355
// for each triangle: test its legality TODO
 kernel void checkTriangles(__global float2* vertices,
356
357
358
359
360
                            __global int4* edges,
                            __global int2* faces,
                            __global int* illegalTri) {
    int faceId = get_global_id(0);
    int2 face = faces[faceId];
361
    if(isFaceAlive(face)) {
362
363
364
365
366
367
368
369
370
371
        int edgeId = face.s0;

        int4 edge = edges[edgeId];
        int4 nextEdge = edges[getNext(edge)];
        int4 prefEdge = edges[getNext(nextEdge)];

        int v0 = getVertex(edge);
        int v1 = getVertex(nextEdge);
        int v2 = getVertex(prefEdge);

372
373
374
        float2 p0 = vertices[v0];
        float2 p1 = vertices[v1];
        float2 p2 = vertices[v2];
375
376
377
378
379
380
381
382
383

        // triangle is illegal => re-triangulation is necessary!
        if(*illegalTri != 1 && !isCCW(p0, p1, p2)) {
            *illegalTri = 1;
        }
    }
}


384
385
// for each vertex in parallel
kernel void computeForces(__global float2* vertices,
386
                          __global int4* edges,
387
                          __global int* vertexToEdge,
388
                          __global float2* lengths,
389
                          __global float* scalingFactor,
390
                          __global float2* forces)
391
{
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
    int vertexId = get_global_id(0);
    int edgeId = vertexToEdge[vertexId];
    if(edgeId != -1){
        int4 edge = edges[edgeId];
        int twinId = edgeId;
        int4 twinEdge = edge;
        bool first = true;
        float2 force = (float2)(0.0f, 0.0f);
        float2 p0 = vertices[vertexId];

        while(first || (edgeId != twinId)){
            first = false;

            // compute force
            int nextId = getNext(twinEdge);
            int4 nextEdge = edges[nextId];
            float2 p1 = vertices[getVertex(nextEdge)];

            float2 v = normalize(p0-p1);
            float len = lengths[nextId].s0;
            float desiredLen = lengths[nextId].s1 * (*scalingFactor) * 1.2f;
            float lenDiff = (desiredLen - len);
            lenDiff = max(lenDiff, 0.0f);
            float2 partialForce = v * lenDiff;
            force = force + partialForce;

            // go on
            twinId = getTwin(nextEdge);
            twinEdge = edges[twinId];
        }

        forces[vertexId] = force;
    }
425
426
427
}

// for each vertex
428
kernel void moveVertices(__global float2* vertices,
429
                         __global int* borderVertices,
430
431
                         __global float2* forces,
                         const float delta) {
432
    int vertexId = get_global_id(0);
433
    float deps = 0.00001f;
434
435
    float2 force = forces[vertexId];
    float2 v = vertices[vertexId];
436

437
    v = v + (force * 0.3f);
438
439

    // project back if necessary
440
    float distance = dist(v);
441
    if(distance > 0.0f || borderVertices[vertexId] == 1) {
442
443
444
445
        float2 dX = (float2)(deps, 0.0f);
        float2 dY = (float2)(0.0f, deps);
        float2 vx = (float2)(v + dX);
        float2 vy = (float2)(v + dY);
446
447
448
        float dGradPX = (dist(vx) - distance) / deps;
        float dGradPY = (dist(vy) - distance) / deps;
        float2 projection = (float2)(dGradPX * distance, dGradPY * distance);
449
450
451
        v = v - projection;
    }

452
    forces[vertexId] = (float2)(0.0f, 0.0f);
453
454
455
456
457
    vertices[vertexId] = v;
}

// computation of the scaling factor:
kernel void computeLengths(
458
    __global float2* vertices,
459
    __global int4* edges,
460
461
    __global float2* lengths,
    __global float2* qLengths)
462
{
463
    int edgeId = get_global_id(0);
464
465
466
    int4 edge = edges[edgeId];
    int4 twinEdge = edges[getTwin(edge)];

467
468
469
    float2 p0 = vertices[getVertex(twinEdge)];
    float2 p1 = vertices[getVertex(edge)];
    float2 v = p0-p1;
470
471

    //TODO: desiredLenfunction required
472
473
    float desiredLen = 1.0f;
    float2 len = (float2)(length(v), desiredLen);
474
475
    lengths[edgeId] = len;
    qLengths[edgeId] = (float2)(length(v)*length(v), desiredLen*desiredLen);
476
477
478
479
}


// kernel for multiple work-groups
480
kernel void computePartialSF(__const int size, __global float2* qlengths, __local float2* partialSums, __global float2* output) {
481
482
483
484
485
    int gid = get_global_id(0);
    int lid = get_local_id(0);

    if(gid < size){
        int global_index = gid;
486
        float2 accumulator = (0.0f, 0.0f);
487
488
        // Loop sequentially over chunks of input vector
        while (global_index < size) {
489
            float2 element = qlengths[global_index];
490
491
492
493
494
            accumulator += element;
            global_index += get_global_size(0);
        }

        int group_size = get_local_size(0);
495
        float2 len = accumulator;
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
        partialSums[lid] = len;

        barrier(CLK_LOCAL_MEM_FENCE);
        // group_size has to be a power of 2!
        for(int i = group_size/2; i > 0; i>>=1){
            if(lid < i && gid + i < size) {
                partialSums[lid] += partialSums[lid + i];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }

        if(lid == 0){
            output[get_group_id(0)] = partialSums[0];
        }
    }
    else {
        if(lid == 0){
513
            output[get_group_id(0)] = (float2)(0.0f, 0.0f);
514
515
516
517
518
        }
    }
}

// kernel for 1 work-group
519
kernel void computeCompleteSF(__const int size, __global float2* qlengths, __local float2* partialSums, __global float* scaleFactor) {
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
    int gid = get_global_id(0);
    int lid = get_local_id(0);
    if(lid < size){
        int group_size = get_local_size(0);
        partialSums[lid] = qlengths[lid];

        barrier(CLK_LOCAL_MEM_FENCE);

        // group_size has to be a power of 2!
        for(int i = group_size/2; i > 0; i>>=1){
            if(lid < i && lid + i < size) {
                partialSums[lid] += partialSums[lid + i];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }


        if(lid == 0) {
          *scaleFactor = sqrt(partialSums[0].s0 / partialSums[0].s1);
        }
    }
}