Commit 2e3a132b authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen SplitCK - add missing pragma simd if multiplyMatPar is used (the...

KernelGen SplitCK - add missing pragma simd if multiplyMatPar is used (the compiler should already have used SIMD but just to be consistent)
parent 6217a5df
......@@ -231,6 +231,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_x_sck_aosoa2', 'negativeDudx_by_dx', 'lFhi', 'gradQ', '0', idx(0,n,0,0), idx(0,n,0,0)) | indent(8) }}{##}
}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ);
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lQi_next[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(0,0,0,nyx)}}];
}
......@@ -267,6 +268,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_sck_aosoa2', 'lFhi', 'negativeDudxT_by_dx', 'gradQ', idx(0,n,0,0), '0', idx(0,n,0,0)) | indent(8) }}{##}
}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ);
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lQi_next[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(0,0,0,nyx)}}];
}
......@@ -304,6 +306,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_z_sck_aosoa2', 'lFhi', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(4) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ+{{idx(z,0,0,0)}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lQi_next[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(z,0,0,nyx)}}];
}
......@@ -530,6 +533,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_x_sck_aosoa2', 'dudx_by_dx', 'lQi_next', 'gradQ', '0', idx(0,n,0,0), idx(0,n,0,0)) | indent(6) }}{##}
}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ);
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lFhi[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(0,0,0,nyx)}}];
}
......@@ -620,6 +624,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_sck_aosoa2', 'lQi_next', 'dudxT_by_dx', 'gradQ', idx(0,n,0,0), '0', idx(0,n,0,0)) | indent(6) }}{##}
}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ);
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lFhi[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(0,0,0,nyx)}}];
}
......@@ -681,6 +686,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_z_sck_aosoa2', 'lQi_next', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(2) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,0,0,0)}}, gradQ+{{idx(z,0,0,0)}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nyx = 0; nyx < {{nVar*nDof2Pad}} ; nyx++){
lFhi[{{idx(z,0,0,nyx)}}] += gradQ[{{idx(z,0,0,nyx)}}];
}
......
......@@ -105,8 +105,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
double negativeDudxT_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
{% if useLibxsmm %}
#if defined(USE_IPO) && ! defined(UNSAFE_IPO)
double doNotOptimizeAway1 = dudxT_by_dx[0]; // used to prevent the compiler from optimizing dudxT_by_dx away
double doNotOptimizeAway2 = negativeDudxT_by_dx[0]; // used to prevent the compiler from optimizing dudxT_by_dx away
volatile double doNotOptimizeAway1 = dudxT_by_dx[0]; // used to prevent the compiler from optimizing dudxT_by_dx away
volatile double doNotOptimizeAway2 = negativeDudxT_by_dx[0]; // used to prevent the compiler from optimizing dudxT_by_dx away
#endif
{% endif %}
......@@ -218,6 +218,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int x = 0; x < {{nDof}} ; x++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(0,yz,x,0)}}, gradQ+x*{{nVarPad}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lQi_next[{{idx(0,yz,x,n)}}] += gradQ[{{idx(0,0,x,n)}}];
}
......@@ -255,6 +256,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int y = 0; y < {{nDof}} ; y++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(z,y,x,0)}}, gradQ+y*{{nVarPad}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lQi_next[{{idx(z,y,x,n)}}] += gradQ[{{idx(0,0,y,n)}}];
}
......@@ -295,6 +297,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(z,0,xy,0)}}, gradQ+z*{{nVarPad}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lQi_next[{{idx(z,0,xy,n)}}] += gradQ[{{idx(0,0,z,n)}}];
}
......@@ -480,6 +483,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int x = 0; x < {{nDof}} ; x++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(0,yz,x,0)}}, gradQ+x*{{nVarPad}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lFhi[{{idx(0,yz,x,n)}}] += gradQ[{{idx(0,0,x,n)}}];
}
......@@ -538,8 +542,9 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int y = 0; y < {{nDof}} ; y++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(z,y,x,0)}}, gradQ+y*{{nVarPad}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lFhi[{{idx(z,y,x,n)}}] += gradQ[{{idx(0,0,y,n)}}];
lFhi[{{idx(z,y,x,n)}}] += gradQ[{{idx(0,0,y,n)}}];
}
}
{% else %}
......@@ -600,6 +605,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('gradQ_x_sck', 'tmpArray', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_scalar(lPi+{{idxLPi(z,0,xy,0)}}, gradQ+z*{{nVarPad}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int n = 0; n < {{nVarPad}} ; n++){
lFhi[{{idx(z,0,xy,n)}}] += gradQ[{{idx(0,0,z,n)}}];
}
......
......@@ -234,6 +234,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
// store M*rhs in gradQ [n][x]
{{ m.matmul('flux_x_sck_vect', 'negativeDudx_by_dx', 'lFhi', 'gradQ', '0', '0', '0') | indent(6) }}{##}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(0,yz,0,0)}}, gradQ);
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lQi_next[{{idx(0,yz,0,nx)}}] += gradQ[{{idx(0,0,0,nx)}}];
}
......@@ -268,6 +269,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_or_z_sck_vect', 'lFhi', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int y = 0; y < {{nDof}} ; y++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,y,0,0)}}, gradQ+y*{{nDofPad*nVar}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lQi_next[{{idx(z,y,0,nx)}}] += gradQ[{{idx(0,y,0,nx)}}];
}
......@@ -306,6 +308,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_or_z_sck_vect', 'lFhi', 'negativeDudxT_by_dx', 'gradQ', '0', '0', '0') | indent(6) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,y,0,0)}}, gradQ+z*{{nDofPad*nVar}});
#pragma omp simd aligned(lQi_next,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lQi_next[{{idx(z,y,0,nx)}}] += gradQ[{{idx(0,z,0,nx)}}];
}
......@@ -528,6 +531,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
// store M*rhs in gradQ [n][x]
{{ m.matmul('flux_x_sck_vect', 'dudx_by_dx', 'lQi_next', 'gradQ', '0', '0', '0') | indent(4) }}{##}
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(0,yz,0,0)}}, gradQ);
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lFhi[{{idx(0,yz,0,nx)}}] += gradQ[{{idx(0,0,0,nx)}}];
}
......@@ -608,6 +612,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_or_z_sck_vect', 'lQi_next', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(4) }}{##}
for (int y = 0; y < {{nDof}} ; y++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,y,0,0)}}, gradQ+y*{{nDofPad*nVar}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lFhi[{{idx(z,y,0,nx)}}] += gradQ[{{idx(0,y,0,nx)}}];
}
......@@ -668,6 +673,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
{{ m.matmul('flux_y_or_z_sck_vect', 'lQi_next', 'dudxT_by_dx', 'gradQ', '0', '0', '0') | indent(4) }}{##}
for (int z = 0; z < {{nDof}} ; z++){
solver.{{solverName}}::multiplyMaterialParameterMatrix_vect(lPi+{{idxLPi(z,y,0,0)}}, gradQ+z*{{nDofPad*nVar}});
#pragma omp simd aligned(lFhi,gradQ:ALIGNMENT)
for (int nx = 0; nx < {{nVar*nDofPad}} ; nx++){
lFhi[{{idx(z,y,0,nx)}}] += gradQ[{{idx(0,z,0,nx)}}];
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment