10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 154792b2 authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard

KernelGen - fix dudx_T name + minor improv

parent eab26b0a
......@@ -93,16 +93,16 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
const double inverseDt = 1.0 / dt;
const double dtBydx = inverseDx * dt; //Assume dx[0] == dx[1] == dx[2]
{% if useNCP or (useFlux and useCERKGuess) or useViscousFlux %}
double dudxT_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_T_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
// 0. precompute 1/dx * dudx_T. Assume dx[0] == dx[1] == dx[2]
#pragma omp simd aligned(dudxT_by_dx,dudx_T:ALIGNMENT)
#pragma omp simd aligned(dudx_T_by_dx,dudx_T:ALIGNMENT)
for(int it=0;it<{{nDof*nDofPad}};it++) {
dudxT_by_dx[it] = inverseDx * dudx_T[it];
dudx_T_by_dx[it] = inverseDx * dudx_T[it];
}
{% if useLibxsmm %}
#if defined(USE_IPO) && ! defined(UNSAFE_IPO)
volatile double doNotOptimizeAway_dudx_by_dt = dudxT_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_by_dt = dudx_T_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
#endif
{% endif %}
{% endif %}
......@@ -227,20 +227,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_x', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
}
{% endif %}
{% endif %}{# useNCP or useViscousFlux #}
......@@ -406,20 +406,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_x', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
}
{% endif %}
{% endif %}
......@@ -623,20 +623,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_x', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(4) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(6) }}{##}
{{ m.matmul('gradQ_y', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(6) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_z', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(4) }}{##}
}
{% endif %}
......
......@@ -75,6 +75,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
__assume_aligned(lFhi, ALIGNMENT);
{% endif %}
__assume_aligned(FLCoeff, ALIGNMENT); // == F0
__assume_aligned(dudx, ALIGNMENT);
__assume_aligned(dudx_T, ALIGNMENT);
__assume_aligned(Kxi, ALIGNMENT);
__assume_aligned(Kxi_T, ALIGNMENT);
__assume_aligned(iK1_T, ALIGNMENT);
......@@ -91,22 +93,23 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
#endif
// 0. Allocate local variable
constexpr int MaxIterations = {{2*nDof+1}};
double new_lQi_slice[{{nDof*nVar*nDofPad}}] __attribute__((aligned(ALIGNMENT))) = {0.}; //for step 4 (computing new lQi value), doesn't update parameters
const double inverseDt = 1.0 / dt;
const double dtBydx = inverseDx * dt; //Assume dx[0] == dx[1] == dx[2]
{% if useNCP or (useFlux and useCERKGuess) or useViscousFlux %}
double dudxT_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_T_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
// 0. precompute 1/dx * dudx_T. Assume dx[0] == dx[1] == dx[2]
#pragma omp simd aligned(dudxT_by_dx,dudx_T:ALIGNMENT)
#pragma omp simd aligned(dudx_T_by_dx,dudx_T,dudx:ALIGNMENT)
for(int it=0;it<{{nDof*nDofPad}};it++) {
dudxT_by_dx[it] = inverseDx * dudx_T[it];
dudx_T_by_dx[it] = inverseDx * dudx_T[it];
dudx_by_dx[it] = inverseDx * dudx[it];
}
{% if useLibxsmm %}
#if defined(USE_IPO) && ! defined(UNSAFE_IPO)
volatile double doNotOptimizeAway_dudxT_by_dt = dudxT_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_T_by_dt = dudx_T_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_by_dt = dudx_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
#endif
{% endif %}
......@@ -122,7 +125,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
}
double rhsCoeff_T[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
for (int i = 0; i < {{nDof}}; i++) {
#pragma omp simd aligned(rhsCoeff,Kxi,iweights1:ALIGNMENT)
#pragma omp simd aligned(rhsCoeff_T,Kxi_T,iweights1:ALIGNMENT)
for (int j = 0; j < {{nDofPad}}; j++) {
rhsCoeff_T[i*{{nDofPad}}+j] = -inverseDx * Kxi_T[i*{{nDofPad}}+j] * iweights1[j];
}
......@@ -233,7 +236,6 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// 2. Discrete Picard iterations
constexpr int MaxIterations = {% if useCERKGuess %}{% if nDof-3 <= 1 %}1; //cannot be lower than 1{% else %}{{nDof-3}}; //nDof-3{% endif %}{% else %}{{2*nDof+1}};{% endif %}
int iter = 0;
for (; iter < MaxIterations; iter++) {
......@@ -252,12 +254,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(10) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(8) }}{##}
{% endif %}
{% endif %}{# useNCP or useViscousFlux #}
......@@ -425,12 +427,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(6) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(6) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(4) }}{##}
{% endif %}
{% endif %}
......@@ -639,12 +641,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_y', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(4) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(2) }}{##}
{{ m.matmul('gradQ_z', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(2) }}{##}
{% endif %}
std::memset(lGradQhbnd, 0, {{2*nDim*nVarPad*nDof*nDof3D*nDim}} * sizeof(double));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment