Commit 94bf42ac authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen Nonlinear STP - AoSoA2 version (tested on mixedEuler)

parent 29a99c87
......@@ -111,16 +111,28 @@ class ConfigurationParametersModel(AbstractModelBaseClass):
# nonlinear
if self.context["predictorRecompute"]:
if self.context["useVectPDE"]:
self.context["lQiSize"] = nDofPad*nVar*(nDof**nDim)
self.context["lQhiSize"] = nDofPad*nVar*nDof*nDof3D
if nPar > 0:
self.context["lPiSize"] = nDofPad*nPar*nDof*nDof3D
if self.context["useFlux"]:
self.context["lFhiSize"] = nDofPad*nVar*nDof*nDof3D*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nDofPad*nVar*nDof*nDof3D
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nDofPad*nVar*nDof*nDof3D*nDim
if self.context["useAoSoA2"]:
self.context["lQiSize"] = nDof2Pad*nVar*nDof*nDof3D
self.context["lQhiSize"] = nDof2Pad*nVar*nDof3D
if nPar > 0:
self.context["lPiSize"] = nDof2Pad*nPar*nDof3D
if self.context["useFlux"]:
self.context["lFhiSize"] = nDof2Pad*nVar*nDof3D*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nDof2Pad*nVar*nDof3D
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nDof2Pad*nVar*nDof3D*nDim
else:
self.context["lQiSize"] = nDofPad*nVar*(nDof**nDim)
self.context["lQhiSize"] = nDofPad*nVar*nDof*nDof3D
if nPar > 0:
self.context["lPiSize"] = nDofPad*nPar*nDof*nDof3D
if self.context["useFlux"]:
self.context["lFhiSize"] = nDofPad*nVar*nDof*nDof3D*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nDofPad*nVar*nDof*nDof3D
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nDofPad*nVar*nDof*nDof3D*nDim
else: #scalar predictorRecompute
self.context["lQiSize"] = nVarPad*(nDof**(nDim+1))
self.context["lQhiSize"] = nVarPad*(nDof**nDim)
......
......@@ -74,7 +74,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
self.context["j_seq"] = range(0,self.context["nDof"]) if (self.context["nDim"] >= 3) else [0]
if self.context["predictorRecompute"]:
if self.context["useVectPDE"]:
self.render(("aderdg", "fusedSPTVI_nonlinear_mem_vect_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
if self.context["useAoSoA2"]:
self.render(("aderdg", "fusedSPTVI_nonlinear_mem_aosoa2_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
else:
self.render(("aderdg", "fusedSPTVI_nonlinear_mem_vect_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
else:
self.render(("aderdg", "fusedSPTVI_nonlinear_mem_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
else:
......@@ -151,24 +154,42 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
self.context["matmulConfigs"]["gradQ_y"] = MatmulConfig(nVar, nDof, nDof, nDataPad*nDof , nDofPad, nVarPad*nDof , 1, 1, 1, 1, 1, "gradQ_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nVar, nDof, nDof, nDataPad*nDof2, nDofPad, nVarPad*nDof2, 1, 1, 1, 1, 1, "gradQ_z")
else: #NonLinear
if self.context["predictorRecompute"]: # TODO JMG matmuls for gradQ, rhs and lduh are exactly the same...
if self.context["useVectPDE"]:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["rhs_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof , 1, 1, 1, 1, 1, "rhs_z")
self.context["matmulConfigs"]["lduh_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "lduh_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "lduh_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["lduh_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "lduh_z")
if self.context["useNCP"] or self.context['useViscousFlux']:
self.context["matmulConfigs"]["gradQ_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "gradQ_x")
self.context["matmulConfigs"]["gradQ_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "gradQ_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar*nDof*nDof3D, nDofPad, nDofPad*nVar, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
if self.context["useAoSoA2"]: # aosoa2
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDof, nDof, nDof, nDofPad, nDof, nDof, 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDof, nDof, nDof, nDof, nDofPad, nDof, 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["rhs_z"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 1, 1, 1, 1, "rhs_z")
self.context["matmulConfigs"]["lduh_x"] = MatmulConfig(nDof, nDof, nDof, nDofPad, nDof, nDof, 1, 1, 1, 1, 1, "lduh_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDof, nDof, nDof, nDof, nDofPad, nDof, 1, 1, 1, 1, 1, "lduh_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["lduh_z"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 1, 1, 1, 1, "lduh_z")
if self.context["useNCP"] or self.context['useViscousFlux']:
self.context["matmulConfigs"]["gradQ_x"] = MatmulConfig(nDof, nDof, nDof, nDofPad, nDof, nDof, 1, 1, 1, 1, 1, "gradQ_x")
self.context["matmulConfigs"]["gradQ_y"] = MatmulConfig(nDof, nDof, nDof, nDof, nDofPad, nDof, 1, 1, 1, 1, 1, "gradQ_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nDof2Pad, nDof, nDof, nDof2Pad*nVar*nDof3D, nDofPad, nDof2Pad, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["rhs_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof , 1, 1, 1, 1, 1, "rhs_z")
self.context["matmulConfigs"]["lduh_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "lduh_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "lduh_y", prefetchInput="A", prefetchOutput="C")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["lduh_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "lduh_z")
if self.context["useNCP"] or self.context['useViscousFlux']:
self.context["matmulConfigs"]["gradQ_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "gradQ_x")
self.context["matmulConfigs"]["gradQ_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "gradQ_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar*nDof*nDof3D, nDofPad, nDofPad*nVar, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
else: #scalar predictor recompute
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad , 1, 1, 1, 1, 1, "rhs_x")
......
......@@ -562,7 +562,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
std::memset(lFhbnd, 0, {{2*nDim*nVarPad*nDof*nDof3D}} * sizeof(double));
/*
// x-direction: face 1 (left) and face 2 (right)
for (int zy = 0; zy < {{nDof*nDof3D}}; zy++) {
// Matrix-Vector Products
......@@ -585,6 +585,41 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
}
}
}
*/
// with reduction
// x-direction: face 1 (left) and face 2 (right)
for (int zy = 0; zy < {{nDof*nDof3D}}; zy++) {
// Matrix-Vector Products
for (int n = 0; n < {{nVar}}; n++) {
double tmpL = 0.;
double tmpR = 0.;
#pragma omp simd aligned(lQhi,FLCoeff,FRCoeff:ALIGNMENT) reduction(+:tmpL,tmpR)
for (int x = 0; x < {{nDof}}; x++) {
// Fortran: lQhbnd(:,j,i,1) = lQhi(:,:,j,i) * FLCoeff(:)
tmpL += lQhi[{{idxLQhi(0,zy,n,x)}}] * FLCoeff[x];
// Fortran: lQhbnd(:,j,i,2) = lQhi(:,:,j,i) * FRCoeff(:)
tmpR += lQhi[{{idxLQhi(0,zy,n,x)}}] * FRCoeff[x];
}
lQhbnd[{{idxLQhbnd(0,0,zy,n)}}] = tmpL;
lQhbnd[{{idxLQhbnd(1,0,zy,n)}}] = tmpR;
{% if useFlux %}
tmpL = 0.;
tmpR = 0.;
#pragma omp simd aligned(lFhi,FLCoeff,FRCoeff:ALIGNMENT) reduction(+:tmpL,tmpR)
for (int x = 0; x < {{nDof}}; x++) {
// Fortran: lFhbnd(:,j,i,1) = lFhi_x(:,:,j,i) * FLCoeff(:)
tmpL += lFhi[{{idxLFhi(0,0,zy,n,x)}}] * FLCoeff[x];
// Fortran: lFhbnd(:,j,i,2) = lFhi_x(:,:,j,i) * FRCoeff(:)
tmpR += lFhi[{{idxLFhi(0,0,zy,n,x)}}] * FRCoeff[x];
}
lFhbnd[{{idxLFhbnd(0,0,zy,n)}}] = tmpL;
lFhbnd[{{idxLFhbnd(1,0,zy,n)}}] = tmpR;
{% endif %}{# useFlux #}
}
}
// y-direction: face 3 (left) and face 4 (right)
for (int z = 0; z < {{nDof3D}}; z++) {
......@@ -831,7 +866,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
for (int n = 0; n < {{nVar}}; n++) {
for (int x = 0; x < {{nDof}}; x++) {
lduh[{{idxLduh(0,zy,x,n)}}] = weights2[zy]*weights1[x]*lQi[{{idxLQhi(0,zy,n,x)}}];
lduh[{{idxLduh(0,zy,x,n)}}] = weights3[zy*{{nDof}}+x]*lQi[{{idxLQhi(0,zy,n,x)}}];
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment