Commit b43f016f authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen - WiP (not working): SP linear AoSoA2

parent 77873b72
......@@ -103,8 +103,9 @@ class Controller:
"useAoSoA2" : args["useAoSoA2"],
"predictorRecompute" : args["predictorRecompute"],
"advancedStopCriterion" : False, #TODO JMG put as proper toolkit arg
"initialGuess" : "mixedPicard" #TODO JMG put as proper toolkit arg
#"initialGuess" : "default" #TODO JMG put as proper toolkit arg
#"initialGuess" : "mixedPicard", #TODO JMG put as proper toolkit arg
"initialGuess" : "default", #TODO JMG put as proper toolkit arg
"useSinglePrecision" : False # TODO JMG test, only supported by linear splitCK aosoa2
})
self.config["useSourceOrNCP"] = self.config["useSource"] or self.config["useNCP"]
elif self.config["kernelType"] == "limiter":
......
......@@ -41,7 +41,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["useSplitCK"]:
if self.context["useVectPDE"]:
if self.context["useAoSoA2"]:
template = "fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
if self.context["useSinglePrecision"]:
template = "SP_fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
else:
template = "fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
else:
template = "fusedSPTVI_linear_split_ck_vect_cpp.template"
else:
......@@ -100,26 +103,39 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
nDof3 = nDof2*nDof
nDof3D = self.context["nDof3D"]
nDofPad = self.context["nDofPad"]
nDof2Pad = self.context["nDof2Pad"]
nDof2Pad = self.context["nDof2Pad"]
nDim = self.context["nDim"]
prec = "DP" if not self.context["useSinglePrecision"] else "SP" # only used for Linear SplitCK AoSoA2
# LINEAR
if self.context["isLinear"]:
if self.context["useSplitCK"]:
if self.context["useVectPDE"]:
if self.context["useAoSoA2"]: #split_ck aosoa2
# Linear SplitCK AoSoA2
if self.context["useFlux"]:
if self.context["useMaterialParam"]:
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, 1, "flux_x_sck_aosoa2") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, 1, "flux_y_sck_aosoa2") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 0, 1, 1, 1, "flux_z_sck_aosoa2") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, 1, "flux_x_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, 1, "flux_y_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 0, 1, 1, 1, "flux_z_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
else:
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 1, 1, 1, 1, "flux_x_sck_aosoa2")
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 1, 1, 1, 1, "flux_y_sck_aosoa2")
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 1, 1, 1, 1, "flux_z_sck_aosoa2")
self.context["matmulConfigs"]["gradQ_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, 1, "gradQ_x_sck_aosoa2") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, 1, "gradQ_y_sck_aosoa2") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 0, 1, 1, 1, "gradQ_z_sck_aosoa2") # beta, 0 => overwrite C
else:# split_ck vect
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 1, 1, 1, 1, "flux_x_sck_aosoa2", precision=prec)
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 1, 1, 1, 1, "flux_y_sck_aosoa2", precision=prec)
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 1, 1, 1, 1, "flux_z_sck_aosoa2", precision=prec)
self.context["matmulConfigs"]["gradQ_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, 1, "gradQ_x_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, 1, "gradQ_y_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 0, 1, 1, 1, "gradQ_z_sck_aosoa2", precision=prec) # beta, 0 => overwrite C
# Linear SplitCK vect (AoSoA)
else:
if self.context["useFlux"]:
if self.context["useMaterialParam"]:
self.context["matmulConfigs"]["flux_x_sck_vect"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad , nDofPad, nDofPad , 1, 0, 1, 1, 1, "flux_x_sck_vect") # beta, 0 => overwrite C
......@@ -133,7 +149,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
self.context["matmulConfigs"]["gradQ_y_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 0, 1, 1, 1, "gradQ_y_sck_vect") # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z_sck_vect"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 0, 1, 1, 1, "gradQ_z_sck_vect") # beta, 0 => overwrite C
else: # split_ck scalar
# Linear SplitCK scalar
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["flux_x_sck"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad , 1, 1, 1, 1, 1, "flux_x_sck")
self.context["matmulConfigs"]["flux_y_sck"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad*nDof , 1, 1, 1, 1, 1, "flux_y_sck")
......@@ -143,7 +162,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
self.context["matmulConfigs"]["gradQ_y_sck"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof , nDofPad, nVarPad*nDof , 1, 0, 1, 1, 1, "gradQ_y_sck") # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z_sck"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof2, nDofPad, nVarPad*nDof2, 1, 0, 1, 1, 1, "gradQ_z_sck", "nopf", "gemm") # beta, 0 => overwrite C
else: # default linear
# Linear default
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["flux_x"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad, 1, 0, 1, 1, 1, "flux_x") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_y"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof , nDofPad, nVarPad, 1, 0, 1, 1, 1, "flux_y") # beta, 0 => overwrite C
......@@ -155,10 +177,15 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nVar, nDof, nDof, nDataPad*nDof2, nDofPad, nVarPad*nDof2, 1, 1, 1, 1, 1, "gradQ_z")
else: #NonLinear
# NONLINEAR
else:
if self.context["predictorRecompute"]: # TODO JMG matmuls for gradQ, rhs and lduh are exactly the same...
if self.context["useVectPDE"]:
if self.context["useAoSoA2"]: # aosoa2
# Nonlinear PredictorRecompute AoSoA2
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDof, nDof, nDof, nDofPad, nDof, nDof, 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDof, nDof, nDof, nDof, nDofPad, nDof, 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
......@@ -174,6 +201,9 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nDof2Pad, nDof, nDof, nDof2Pad*nVar*nDof3D, nDofPad, nDof2Pad, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
# Nonlinear PredictorRecompute vect (AoSoA)
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
......@@ -190,7 +220,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar*nDof*nDof3D, nDofPad, nDofPad*nVar, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
else: #scalar predictor recompute
# Nonlinear PredictorRecompute scalar
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad , 1, 1, 1, 1, 1, "rhs_x")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof , nDofPad, nVarPad*nDof , 1, 1, 1, 1, 1, "rhs_y")
......@@ -206,7 +239,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof2, nDofPad, nVarPad*nDof2, 1, 1, 1, 1, 1, "gradQ_z")
self.context["matmulConfigs"]["lqi"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*(nDof**nDim), nDofPad, nVarPad, 1, 0, 1, 1, 1, "lqi") # beta, 0 => overwrite C
else: # default nonlinear
# Nonlinear default
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad , 1, 1, 1, 1, 1, "rhs_x")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad*nDof , nDofPad, nVarPad*nDof , 1, 1, 1, 1, 1, "rhs_y")
......
......@@ -26,6 +26,18 @@ double* {{codeNamespace}}::FRCoeff;
double** {{codeNamespace}}::fineGridProjector1d;
double** {{codeNamespace}}::fineGridProjector1d_T_weighted;
{% if useSinglePrecision %}
//single precision version
float* {{codeNamespace}}::Kxi_SP;
float* {{codeNamespace}}::Kxi_T_SP;
float* {{codeNamespace}}::iK1_T_SP;
float* {{codeNamespace}}::dudx_SP;
float* {{codeNamespace}}::dudx_T_SP;
float* {{codeNamespace}}::FLCoeff_SP;
float* {{codeNamespace}}::FRCoeff_SP;
float** {{codeNamespace}}::fineGridProjector1d_SP;
float** {{codeNamespace}}::fineGridProjector1d_T_weighted_SP;
{% endif %}
void {{codeNamespace}}::freeDGMatrices() {
_mm_free(FLCoeff);
......@@ -45,6 +57,26 @@ void {{codeNamespace}}::freeDGMatrices() {
_mm_free(fineGridProjector1d_T_weighted[1]);
_mm_free(fineGridProjector1d_T_weighted[2]);
delete [] fineGridProjector1d_T_weighted;
{% if useSinglePrecision %}
_mm_free(FLCoeff_SP);
_mm_free(FRCoeff_SP);
_mm_free(dudx_SP);
_mm_free(dudx_T_SP);
_mm_free(iK1_T_SP);
_mm_free(Kxi_SP);
_mm_free(Kxi_T_SP);
_mm_free(fineGridProjector1d_SP[0]);
_mm_free(fineGridProjector1d_SP[1]);
_mm_free(fineGridProjector1d_SP[2]);
delete [] fineGridProjector1d_SP;
_mm_free(fineGridProjector1d_T_weighted_SP[0]);
_mm_free(fineGridProjector1d_T_weighted_SP[1]);
_mm_free(fineGridProjector1d_T_weighted_SP[2]);
delete [] fineGridProjector1d_T_weighted_SP;
{% endif %}
}
......@@ -115,4 +147,72 @@ void {{codeNamespace}}::initDGMatrices() {
fineGridProjector1d_T_weighted[2][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_T_weighted_2[i])}};
{% endfor %}
{% if useSinglePrecision %}
FLCoeff_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad}}, ALIGNMENT);
FRCoeff_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad}}, ALIGNMENT);
//note: FLCoeff is also F0
dudx_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
dudx_T_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
iK1_T_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
Kxi_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
Kxi_T_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
fineGridProjector1d_SP = new float* [3];
fineGridProjector1d_T_weighted_SP = new float* [3];
for(int i=0; i<3; i++) {
fineGridProjector1d_SP[i] = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
fineGridProjector1d_T_weighted_SP[i] = (float *) _mm_malloc(sizeof(float)*{{nDofPad*nDof}}, ALIGNMENT);
}
{% for i in nDofPad_seq %}
FLCoeff_SP[{{i}}] = {{"{:.15e}".format(FLCoeff[i])}};
{% endfor %}
{% for i in nDofPad_seq %}
FRCoeff_SP[{{i}}] = {{"{:.15e}".format(FRCoeff[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
dudx_SP[{{i}}] = {{"{:.15e}".format(dudx[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
dudx_T_SP[{{i}}] = {{"{:.15e}".format(dudx_T[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
iK1_T_SP[{{i}}] = {{"{:.15e}".format(iK1_T[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
Kxi_SP[{{i}}] = {{"{:.15e}".format(Kxi[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
Kxi_T_SP[{{i}}] = {{"{:.15e}".format(Kxi_T[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_SP[0][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_0[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_SP[1][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_1[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_SP[2][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_2[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_T_weighted_SP[0][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_T_weighted_0[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_T_weighted_SP[1][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_T_weighted_1[i])}};
{% endfor %}
{% for i in nDofPadTimesnDof_seq %}
fineGridProjector1d_T_weighted_SP[2][{{i}}] = {{"{:.15e}".format(fineGridProjector1d_T_weighted_2[i])}};
{% endfor %}
{% endif %}{# useSinglePrecision #}
}
......@@ -34,6 +34,19 @@ extern double *FRCoeff;
extern double ** fineGridProjector1d;
extern double ** fineGridProjector1d_T_weighted; // [k][i*nDof+j] = fineGridProjector1d[k][j*nDof+i] * weight[j] / weight[i] / 3.0
{% if useSinglePrecision %}
//single precision version
extern float *Kxi_SP;
extern float *Kxi_T_SP;
extern float *iK1_T_SP; //note: the generic version of iK1 is actually transposed
extern float *dudx_SP;
extern float *dudx_T_SP;
extern float *FLCoeff_SP;
extern float *FRCoeff_SP;
extern float ** fineGridProjector1d_SP;
extern float ** fineGridProjector1d_T_weighted_SP; // [k][i*nDof+j] = fineGridProjector1d[k][j*nDof+i] * weight[j] / weight[i] / 3.0
{% endif %}
{% for namespaceName in codeNamespaceList %}
}
{% endfor %}
......
......@@ -23,6 +23,18 @@ double* {{codeNamespace}}::weights4;
double* {{codeNamespace}}::iweights1;
double* {{codeNamespace}}::iweights3;
double* {{codeNamespace}}::nodes;
{% if useSinglePrecision %}
//single precision version
float* {{codeNamespace}}::weights1_SP;
float* {{codeNamespace}}::weights2_SP;
float* {{codeNamespace}}::weights3_SP;
float* {{codeNamespace}}::weights4_SP;
float* {{codeNamespace}}::iweights1_SP;
float* {{codeNamespace}}::iweights3_SP;
float* {{codeNamespace}}::nodes_SP;
{% endif %}
{% if kernelType=="limiter" %}
double* {{codeNamespace}}::uh2lob;
double* {{codeNamespace}}::dg2fv;
......@@ -37,6 +49,17 @@ void {{codeNamespace}}::freeQuadratureNodesAndWeights() {
_mm_free(iweights1);
_mm_free(iweights3);
_mm_free(nodes);
{% if useSinglePrecision %}
_mm_free(weights1_SP);
_mm_free(weights2_SP);
_mm_free(weights3_SP);
_mm_free(weights4_SP);
_mm_free(iweights1_SP);
_mm_free(iweights3_SP);
_mm_free(nodes_SP);
{% endif %}
{% if kernelType=="limiter" %}
_mm_free(uh2lob);
_mm_free(dg2fv);
......@@ -52,52 +75,97 @@ void {{codeNamespace}}::initQuadratureNodesAndWeights() {
iweights1 = (double *) _mm_malloc(sizeof(double)*{{w1Size }}, ALIGNMENT); //nDofPad
iweights3 = (double *) _mm_malloc(sizeof(double)*{{w3Size }}, ALIGNMENT); //2D: (nDof*nDof)Pad (== w1[i]*w1[j]), 3D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k])
nodes = (double *) _mm_malloc(sizeof(double)*{{nDofPad}}, ALIGNMENT);
{% for i in w1_seq %}
weights1[{{i}}] = {{"{:.17e}".format(weights1[i])}};
{% endfor %}
{% for i in w2_seq %}
weights2[{{i}}] = {{"{:.17e}".format(weights2[i])}};
{% endfor %}
{% for i in w3_seq %}
weights3[{{i}}] = {{"{:.17e}".format(weights3[i])}};
{% endfor %}
{% for i in w4_seq %}
weights4[{{i}}] = {{"{:.17e}".format(weights4[i])}};
{% endfor %}
{% for i in w1_seq %}
iweights1[{{i}}] = {{"{:.17e}".format(iweights1[i])}};
{% endfor %}
{% for i in w3_seq %}
iweights3[{{i}}] = {{"{:.17e}".format(iweights3[i])}};
{% endfor %}
{% for i in quadrature_seq %}
nodes[{{i}}] = {{"{:.17e}".format(QuadratureNodes[i])}};
{% endfor %}
{% if useSinglePrecision %}
weights1_SP = (float *) _mm_malloc(sizeof(float)*{{w1Size }}, ALIGNMENT); //nDofPad
weights2_SP = (float *) _mm_malloc(sizeof(float)*{{w2Size }}, ALIGNMENT); //2D: nDofPad (==weight1), 3D: (nDof*nDof)Pad (== w1[i]*w1[j])
weights3_SP = (float *) _mm_malloc(sizeof(float)*{{w3Size }}, ALIGNMENT); //2D: (nDof*nDof)Pad (== w1[i]*w1[j]), 3D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k])
weights4_SP = (float *) _mm_malloc(sizeof(float)*{{w4Size }}, ALIGNMENT); //2D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k]), 3D: (nDof*nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k]*w1[l])
iweights1_SP = (float *) _mm_malloc(sizeof(float)*{{w1Size }}, ALIGNMENT); //nDofPad
iweights3_SP = (float *) _mm_malloc(sizeof(float)*{{w3Size }}, ALIGNMENT); //2D: (nDof*nDof)Pad (== w1[i]*w1[j]), 3D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k])
nodes_SP = (float *) _mm_malloc(sizeof(float)*{{nDofPad}}, ALIGNMENT);
{% if kernelType=="limiter" %}
uh2lob = (double *) _mm_malloc(sizeof(double)*{{uh2lobSize}}, ALIGNMENT); //nDof*nDofPad
dg2fv = (double *) _mm_malloc(sizeof(double)*{{dg2fvSize }}, ALIGNMENT); //nDof*nDofLimPad
fv2dg = (double *) _mm_malloc(sizeof(double)*{{fv2dgSize }}, ALIGNMENT); //nDofLim*nDofPad
uh2lob_SP = (float *) _mm_malloc(sizeof(float)*{{uh2lobSize}}, ALIGNMENT); //nDof*nDofPad
dg2fv_SP = (float *) _mm_malloc(sizeof(float)*{{dg2fvSize }}, ALIGNMENT); //nDof*nDofLimPad
fv2dg_SP = (float *) _mm_malloc(sizeof(float)*{{fv2dgSize }}, ALIGNMENT); //nDofLim*nDofPad
{% endif %}
{% for i in w1_seq %}
weights1[{{i}}] = {{"{:.15e}".format(weights1[i])}};
weights1_SP[{{i}}] = {{"{:.17e}".format(weights1[i])}};
{% endfor %}
{% for i in w2_seq %}
weights2[{{i}}] = {{"{:.15e}".format(weights2[i])}};
weights2_SP[{{i}}] = {{"{:.17e}".format(weights2[i])}};
{% endfor %}
{% for i in w3_seq %}
weights3[{{i}}] = {{"{:.15e}".format(weights3[i])}};
weights3_SP[{{i}}] = {{"{:.17e}".format(weights3[i])}};
{% endfor %}
{% for i in w4_seq %}
weights4[{{i}}] = {{"{:.15e}".format(weights4[i])}};
weights4_SP[{{i}}] = {{"{:.17e}".format(weights4[i])}};
{% endfor %}
{% for i in w1_seq %}
iweights1[{{i}}] = {{"{:.15e}".format(iweights1[i])}};
iweights1_SP[{{i}}] = {{"{:.17e}".format(iweights1[i])}};
{% endfor %}
{% for i in w3_seq %}
iweights3[{{i}}] = {{"{:.15e}".format(iweights3[i])}};
iweights3_SP[{{i}}] = {{"{:.17e}".format(iweights3[i])}};
{% endfor %}
{% for i in quadrature_seq %}
nodes[{{i}}] = {{"{:.15e}".format(QuadratureNodes[i])}};
nodes_SP[{{i}}] = {{"{:.17e}".format(QuadratureNodes[i])}};
{% endfor %}
{% endif %}{# useSinglePrecision #}
{% if kernelType=="limiter" %}
uh2lob = (double *) _mm_malloc(sizeof(double)*{{uh2lobSize}}, ALIGNMENT); //nDof*nDofPad
dg2fv = (double *) _mm_malloc(sizeof(double)*{{dg2fvSize }}, ALIGNMENT); //nDof*nDofLimPad
fv2dg = (double *) _mm_malloc(sizeof(double)*{{fv2dgSize }}, ALIGNMENT); //nDofLim*nDofPad
{% endif %}
{% if kernelType=="limiter" %}
{% for i in uh2lob_seq %}
uh2lob[{{i}}] = {{"{:.15e}".format(uh2lob[i])}};
uh2lob[{{i}}] = {{"{:.17e}".format(uh2lob[i])}};
{% endfor %}
{% for i in dg2fv_seq %}
dg2fv[{{i}}] = {{"{:.15e}".format(dg2fv[i])}};
dg2fv[{{i}}] = {{"{:.17e}".format(dg2fv[i])}};
{% endfor %}
{% for i in fv2dg_seq %}
fv2dg[{{i}}] = {{"{:.15e}".format(fv2dg[i])}};
fv2dg[{{i}}] = {{"{:.17e}".format(fv2dg[i])}};
{% endfor %}
{% endif %}
{% endif %}{# limiter #}
}
......@@ -31,6 +31,17 @@ extern double *weights4;
extern double *iweights1;
extern double *iweights3;
{% if useSinglePrecision %}
//single precision version
extern float *nodes_SP;
extern float *weights1_SP;
extern float *weights2_SP;
extern float *weights3_SP;
extern float *weights4_SP;
extern float *iweights1_SP;
extern float *iweights3_SP;
{% endif %}
{% if kernelType=="limiter" %}
// limiter projection matrices
extern double* uh2lob;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment