In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 8c428607 authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard

KernelGen - modify weights behavior in kernel_recompute scheme + add WiP vect...

KernelGen - modify weights behavior in kernel_recompute scheme + add WiP vect scheme (compile but error)
parent e2733dd2
......@@ -62,22 +62,23 @@ class ArgumentParser:
("architecture", ArgType.MandatoryString, "the microarchitecture of the target device"),
# optional arguments
("useFlux", ArgType.OptionalBool, "enable flux"),
("useFluxVect", ArgType.OptionalBool, "enable vectorized flux (include useFlux)"),
("useFluxVect", ArgType.OptionalBool, "enable vectorized flux (include useFlux)"), #TODO JMG: legacy, use usePDEVect instead
("useViscousFlux", ArgType.OptionalBool, "enable viscous flux"),
("useNCP", ArgType.OptionalBool, "enable non conservative product"),
("useNCPVect", ArgType.OptionalBool, "enable vectorized non conservative product (include useNCP)"),
("useNCPVect", ArgType.OptionalBool, "enable vectorized non conservative product (include useNCP)"), #TODO JMG: legacy, use usePDEVect instead
("useSource", ArgType.OptionalBool, "enable source terms"),
("useSourceVect", ArgType.OptionalBool, "enable vectorized source terms (include useSource)"),
("useSourceVect", ArgType.OptionalBool, "enable vectorized source terms (include useSource)"), #TODO JMG: legacy, use usePDEVect instead
("useFusedSource", ArgType.OptionalBool, "enable fused source terms (include useSource)"),
("useFusedSourceVect", ArgType.OptionalBool, "enable vectorized fused source terms (include useFusedSource and useSourceVect)"),
("useFusedSourceVect", ArgType.OptionalBool, "enable vectorized fused source terms (include useFusedSource and useSourceVect)"), #TODO JMG: legacy, use usePDEVect instead
("useMaterialParam", ArgType.OptionalBool, "enable material parameters"),
("useMaterialParamVect",ArgType.OptionalBool, "enable vectorized material parameters"),
("useMaterialParamVect",ArgType.OptionalBool, "enable vectorized material parameters"), #TODO JMG: legacy, use usePDEVect instead
("usePointSources", ArgType.OptionalInt , "enable numberOfPointSources point sources", -1, "numberOfPointSources"),
("useCERKGuess", ArgType.OptionalBool, "use CERK for SpaceTimePredictor inital guess (nonlinear only)"),
("useSplitCKScalar", ArgType.OptionalBool, "use split Cauchy–Kowalevski formulation (linear only)"),
("useSplitCKVect", ArgType.OptionalBool, "use split Cauchy–Kowalevski formulation with vect PDE (linear only)"),
("useSplitCKScalar", ArgType.OptionalBool, "use split Cauchy–Kowalevski formulation (linear only)"), #TODO JMG: legacy, merge with vect and use usePDEVect
("useSplitCKVect", ArgType.OptionalBool, "use split Cauchy–Kowalevski formulation with vect PDE (linear only)"), #TODO JMG: legacy, merge with scalar and use usePDEVect
("useGaussLobatto", ArgType.OptionalBool, "use Gauss Lobatto Quadrature instead of Gauss Legendre"),
("predictorRecompute", ArgType.OptionalBool, "predictor step will recompute the PDE instead of relying on stored values from the picard loop (nonlinear only)"),
("useVectPDE", ArgType.OptionalBool, "use vectorized PDE terms (applies when present to: Flux, NCP, Source, FusedSource and MaterialParam)"),
("tempVarsOnStack", ArgType.OptionalBool, "put the big scratch arrays on the stack instead of the heap (you can use ulimit -s to increase the stack size)")
]
......
......@@ -100,6 +100,7 @@ class Controller:
"useCERKGuess" : args["useCERKGuess"],
"useSplitCKScalar" : args["useSplitCKScalar"],
"useSplitCKVect" : args["useSplitCKVect"],
"useVectPDE" : args["useVectPDE"],
"predictorRecompute" : args["predictorRecompute"]
})
self.config["useSourceOrNCP"] = self.config["useSource"] or self.config["useNCP"]
......
......@@ -98,17 +98,29 @@ class ConfigurationParametersModel(AbstractModelBaseClass):
else:
# nonlinear
if self.context["predictorRecompute"]:
self.context["lQiSize"] = nVarPad*(nDof**(nDim+1))
self.context["lQhiSize"] = nVarPad*(nDof**nDim)
if nPar > 0:
self.context["lPiSize"] = nParPad*(nDof**nDim)
if self.context["useFlux"]:
self.context["lFhiSize"] = nVarPad*(nDof**nDim)*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nVarPad*(nDof**nDim)
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nVarPad*(nDof**nDim)*nDim
else:
if self.context["useVectPDE"]:
self.context["lQiSize"] = nDofPad*nVar*(nDof**nDim)
self.context["lQhiSize"] = nDofPad*nVar*nDof*nDof3D
if nPar > 0:
self.context["lPiSize"] = nDofPad*nPar*nDof*nDof3D
if self.context["useFlux"]:
self.context["lFhiSize"] = nDofPad*nVar*nDof*nDof3D*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nDofPad*nVar*nDof*nDof3D
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nDofPad*nVar*nDof*nDof3D*nDim
else: #scalar predictorRecompute
self.context["lQiSize"] = nVarPad*(nDof**(nDim+1))
self.context["lQhiSize"] = nVarPad*(nDof**nDim)
if nPar > 0:
self.context["lPiSize"] = nParPad*(nDof**nDim)
if self.context["useFlux"]:
self.context["lFhiSize"] = nVarPad*(nDof**nDim)*nDim
if self.context["useSource"] or self.context["useNCP"]:
self.context["lShiSize"] = nVarPad*(nDof**nDim)
if self.context["useNCP"] or self.context["useViscousFlux"]:
self.context["gradQSize"] = nVarPad*(nDof**nDim)*nDim
else: # default nonlinear
self.context["lQiSize"] = nDataPad*(nDof**(nDim+1))
self.context["lQhiSize"] = nDataPad*(nDof**nDim)
if self.context["useFlux"]:
......
......@@ -38,12 +38,16 @@ class QuadratureModel(AbstractModelBaseClass):
OtherQuadratureWeights, OtherQuadratureNodes = MathsUtils.getGaussLobatto(self.context["nDof"])
else:
raise ValueError("Quadrature type "+self.context["quadratureType"]+" not supported." )
weightsVector = MathsUtils.vectorPad(QuadratureWeights, self.context["nDofPad"] - self.context["nDof"])
self.context["weights1"] = weightsVector
self.context["w1Size"] = len(self.context["weights1"])
self.context["w1_seq"] = range(self.context["w1Size"])
#inverse of weights1
self.context["iweights1"] = [1.0/self.context["weights1"][i] if self.context["weights1"][i] != 0. else 0. for i in self.context["w1_seq"]]
if(self.context["nDim"] == 2):
# weightsVector is QuadratureWeights itself
weightsVector = MathsUtils.vectorPad(QuadratureWeights, self.controller.getPadSize(len(QuadratureWeights)))
......@@ -57,6 +61,13 @@ class QuadratureModel(AbstractModelBaseClass):
self.context["weights3"] = weightsVector
self.context["w3Size"] = len(self.context["weights3"])
self.context["w3_seq"] = range(self.context["w3Size"])
# all combinations of three weights, written as an 1D array
weightsVector = [QuadratureWeights[i] * QuadratureWeights[j] * QuadratureWeights[k] for i in range(self.context["nDof"]) for j in range(self.context["nDof"]) for k in range(self.context["nDof"])]
weightsVector = MathsUtils.vectorPad(weightsVector, self.controller.getPadSize(len(weightsVector)))
self.context["weights4"] = weightsVector
self.context["w4Size"] = len(self.context["weights4"])
self.context["w4_seq"] = range(self.context["w4Size"])
elif(self.context["nDim"] == 3):
# all combinations of two weights, written as an 1D array
......@@ -72,28 +83,35 @@ class QuadratureModel(AbstractModelBaseClass):
self.context["weights3"] = weightsVector
self.context["w3Size"] = len(self.context["weights3"])
self.context["w3_seq"] = range(self.context["w3Size"])
# all combination of four weights, written as an 1D array
weightsVector = [QuadratureWeights[i] * QuadratureWeights[j] * QuadratureWeights[k] * QuadratureWeights[l] for i in range(self.context["nDof"]) for j in range(self.context["nDof"]) for k in range(self.context["nDof"]) for l in range(self.context["nDof"])]
weightsVector = MathsUtils.vectorPad(weightsVector, self.controller.getPadSize(len(weightsVector)))
self.context["weights4"] = weightsVector
self.context["w4Size"] = len(self.context["weights4"])
self.context["w4_seq"] = range(self.context["w4Size"])
else:
print("QuadratureModel: nDim not supported")
# inverse of weight3
self.context["iweights3"] = [1.0/self.context["weights3"][i] if self.context["weights3"][i] != 0. else 0. for i in self.context["w3_seq"]]
self.context["QuadratureWeights"], self.context["QuadratureNodes"] = QuadratureWeights, QuadratureNodes
self.context["quadrature_seq"] = range(self.context["nDof"])
if(self.context["kernelType"]=="limiter"):
l_padSize = self.context["nDofPad"] - self.context["nDof"]
uh2lob = MathsUtils.assembleQuadratureConversion(QuadratureNodes, OtherQuadratureNodes, self.context["nDof"]) #TODO JMG adapt when allowing Lobatto as node
self.context["uh2lob"] = MathsUtils.matrixPadAndFlatten_ColMajor(uh2lob,l_padSize)
self.context["uh2lobSize"] = len(self.context["uh2lob"])
self.context["uh2lob_seq"] = range(self.context["uh2lobSize"])
l_padSize = self.context["nDofPad"] - self.context["nDof"]
dg2fv = MathsUtils.assembleDGToFV(QuadratureNodes, QuadratureWeights, self.context["nDof"], self.context["nDofLim"])
self.context["dg2fv"] = MathsUtils.matrixPadAndFlatten_ColMajor(dg2fv,l_padSize)
self.context["dg2fvSize"] = len(self.context["dg2fv"])
self.context["dg2fv_seq"] = range(self.context["dg2fvSize"])
l_padSize = self.context["nDofLimPad"] - self.context["nDofLim"]
fv2dg = MathsUtils.assembleFVToDG(dg2fv, QuadratureWeights, self.context["nDof"], self.context["nDofLim"])
self.context["fv2dg"] = MathsUtils.matrixPadAndFlatten_ColMajor(fv2dg,l_padSize)
......
......@@ -19,6 +19,8 @@
double* {{codeNamespace}}::weights1;
double* {{codeNamespace}}::weights2;
double* {{codeNamespace}}::weights3;
double* {{codeNamespace}}::weights4;
double* {{codeNamespace}}::iweights1;
double* {{codeNamespace}}::iweights3;
double* {{codeNamespace}}::nodes;
{% if kernelType=="limiter" %}
......@@ -31,6 +33,8 @@ void {{codeNamespace}}::freeQuadratureNodesAndWeights() {
_mm_free(weights1);
_mm_free(weights2);
_mm_free(weights3);
_mm_free(weights4);
_mm_free(iweights1);
_mm_free(iweights3);
_mm_free(nodes);
{% if kernelType=="limiter" %}
......@@ -44,6 +48,8 @@ void {{codeNamespace}}::initQuadratureNodesAndWeights() {
weights1 = (double *) _mm_malloc(sizeof(double)*{{w1Size }}, ALIGNMENT); //nDofPad
weights2 = (double *) _mm_malloc(sizeof(double)*{{w2Size }}, ALIGNMENT); //2D: nDofPad (==weight1), 3D: (nDof*nDof)Pad (== w1[i]*w1[j])
weights3 = (double *) _mm_malloc(sizeof(double)*{{w3Size }}, ALIGNMENT); //2D: (nDof*nDof)Pad (== w1[i]*w1[j]), 3D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k])
weights4 = (double *) _mm_malloc(sizeof(double)*{{w4Size }}, ALIGNMENT); //2D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k]), 3D: (nDof*nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k]*w1[l])
iweights1 = (double *) _mm_malloc(sizeof(double)*{{w1Size }}, ALIGNMENT); //nDofPad
iweights3 = (double *) _mm_malloc(sizeof(double)*{{w3Size }}, ALIGNMENT); //2D: (nDof*nDof)Pad (== w1[i]*w1[j]), 3D: (nDof*nDof*nDof)Pad (== w1[i]*w1[j]*w1[k])
nodes = (double *) _mm_malloc(sizeof(double)*{{nDofPad}}, ALIGNMENT);
{% if kernelType=="limiter" %}
......@@ -64,6 +70,14 @@ void {{codeNamespace}}::initQuadratureNodesAndWeights() {
weights3[{{i}}] = {{"{:.15e}".format(weights3[i])}};
{% endfor %}
{% for i in w4_seq %}
weights4[{{i}}] = {{"{:.15e}".format(weights4[i])}};
{% endfor %}
{% for i in w1_seq %}
iweights1[{{i}}] = {{"{:.15e}".format(iweights1[i])}};
{% endfor %}
{% for i in w3_seq %}
iweights3[{{i}}] = {{"{:.15e}".format(iweights3[i])}};
{% endfor %}
......
......@@ -27,6 +27,8 @@ extern double *nodes;
extern double *weights1;
extern double *weights2;
extern double *weights3;
extern double *weights4;
extern double *iweights1;
extern double *iweights3;
{% if kernelType=="limiter" %}
......
......@@ -694,6 +694,12 @@
"available-for" : ["optimised"],
"title" : "Recompute on-the-fly the PDE outputs (e.g. flux, source, ncp) during the predictor step to avoid storing them all during the picard iteration and saving memory requirement at the cost of more flops (nonlinear only).",
"default" : false
},
"vectorise_terms" : {
"type" : "boolean",
"available-for" : ["optimised"],
"title" : "WiP: Use vectorised user function formulations (SoA data layout) for PDE functions related to the terms' options: flux, source, ncp, viscous_flux and fusedsource. Only available for optimised kernels with either split_ck (linear) or predictor_recompute (nonlinear)",
"default" : false
}
}
},
......
......@@ -50,21 +50,22 @@ class KernelgeneratorModel:
"architecture" : solverContext["architecture"],
# Optional bool parameters (may set redundant flags and default false flag)
"useFlux" : solverContext["useFlux"],
"useFluxVect" : solverContext["useFluxVect"],
"useFluxVect" : solverContext["useFluxVect"], #TODO JMG: legacy, use usePDEVect instead
"useViscousFlux" : solverContext["useViscousFlux"],
"useNCP" : solverContext["useNCP"],
"useNCPVect" : solverContext["useNCPVect"],
"useNCPVect" : solverContext["useNCPVect"], #TODO JMG: legacy, use usePDEVect instead
"useSource" : solverContext["useSource"],
"useSourceVect" : solverContext["useSourceVect"],
"useSourceVect" : solverContext["useSourceVect"], #TODO JMG: legacy, use usePDEVect instead
"useFusedSource" : solverContext["useFusedSource"],
"useFusedSourceVect" : solverContext["useFusedSourceVect"],
"useFusedSourceVect" : solverContext["useFusedSourceVect"], #TODO JMG: legacy, use usePDEVect instead
"useMaterialParam" : solverContext["useMaterialParameters"],
"useMaterialParamVect" : solverContext["useMaterialParametersVect"],
"useMaterialParamVect" : solverContext["useMaterialParametersVect"], #TODO JMG: legacy
"useCERKGuess" : solverContext["useCERK"],
"useSplitCKScalar" : solverContext["useSplitCKScalar"],
"useSplitCKVect" : solverContext["useSplitCKVect"],
"useSplitCKScalar" : solverContext["useSplitCKScalar"], #TODO JMG: legacy, merge with vect and use usePDEVect
"useSplitCKVect" : solverContext["useSplitCKVect"], #TODO JMG: legacy, merge with scalar and use usePDEVect
"useGaussLobatto" : solverContext["basis"] == "lobatto",
"predictorRecompute" : solverContext["predictorRecompute"],
"useVectPDE" : solverContext["useVectPDE"],
# Optional int parameters (may set redundant flags)
"usePointSources" : solverContext["numberOfPointSources"] if solverContext["numberOfPointSources"] > 0 else -1,
"tempVarsOnStack" : solverContext["tempVarsOnStack"]
......
......@@ -223,6 +223,7 @@ class SolverController:
context["noTimeAveraging"] = kernel.get("space_time_predictor",{}).get("notimeavg",False)
context["noTimeAveraging_s"] = "true" if kernel.get("space_time_predictor",{}).get("notimeavg",False) else "false"
context["predictorRecompute"] = kernel.get("space_time_predictor",{}).get("predictor_recompute",False)
context["useVectPDE"] = kernel.get("space_time_predictor",{}).get("vectorise_terms",False)
context.update(self.buildKernelTermsContext(kernel["terms"]))
return context
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment