Commit 66f0ff2f authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen - add SinglePrecision_STP option (WiP: template not finished)

parent 4274526c
......@@ -79,7 +79,8 @@ class ArgumentParser:
("predictorRecompute", ArgType.OptionalBool, "predictor step will recompute the PDE instead of relying on stored values from the picard loop (nonlinear only)"),
("useVectPDE", ArgType.OptionalBool, "use vectorized PDE terms (applies when present to: Flux, NCP, Source, FusedSource and MaterialParam)"),
("useAoSoA2", ArgType.OptionalBool, "use AoSoA[2] data layout in SpaceTimePredictor kernel (WiP: linear only), requires useVectPDE"), #TODO JMG: WiP
("tempVarsOnStack", ArgType.OptionalBool, "put the big scratch arrays on the stack instead of the heap (you can use ulimit -s to increase the stack size)")
("tempVarsOnStack", ArgType.OptionalBool, "put the big scratch arrays on the stack instead of the heap (you can use ulimit -s to increase the stack size)"),
("singlePrecisionSTP", ArgType.OptionalBool, "EXPERIMENTAL: use single precision inside SpaceTime-Predictor kernel, only supported by linear AoSoA2, requires _SP variant of user functions")
]
# Limiter args
......
......@@ -105,7 +105,8 @@ class Controller:
"advancedStopCriterion" : False, #TODO JMG put as proper toolkit arg
#"initialGuess" : "mixedPicard", #TODO JMG put as proper toolkit arg
"initialGuess" : "default", #TODO JMG put as proper toolkit arg
"useSinglePrecision" : False # TODO JMG test, only supported by linear splitCK aosoa2
"singlePrecisionSTP" : args["singlePrecisionSTP"], # experiment, not supported by every kernel
"useSinglePrecision" : args["singlePrecisionSTP"] # should be enabled if single precision coeff matrices are required
})
self.config["useSourceOrNCP"] = self.config["useSource"] or self.config["useNCP"]
elif self.config["kernelType"] == "limiter":
......@@ -145,6 +146,10 @@ class Controller:
self.validateConfig(Configuration.simdWidth.keys())
self.config["vectSize"] = Configuration.simdWidth[self.config["architecture"]] #only initialize once architecture has been validated
self.config["cachelineSize"] = Configuration.cachelineSize[self.config["architecture"]] #only initialize once architecture has been validated
# if single precision is used, multiply SIMD and cache values by 2 (TODO JMG: WiP, this affects all the code instead of only the single precision kernels)
if self.config["useSinglePrecision"]:
self.config["vectSize"] *= 2
self.config["cachelineSize"] *= 2
self.baseContext = self.generateBaseContext() # default context build from config
self.matmulList = [] #list to store the tupple (fileName, matmulConfig) of all requested Matmul (used for gemmsGeneratorModel)
self.gemmList = [] #list to store the name of all generated gemms (used for gemmsCPPModel)
......
......@@ -41,7 +41,7 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["useSplitCK"]:
if self.context["useVectPDE"]:
if self.context["useAoSoA2"]:
if self.context["useSinglePrecision"]:
if self.context["singlePrecisionSTP"]:
template = "SP_fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
else:
template = "fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
......
......@@ -29,5 +29,10 @@ class KernelsHeaderModel(AbstractModelBaseClass):
def generateCode(self):
self.context["solverNamespace"] = self.context["solverName"].split("::")[0]
self.context["solverClass"] = self.context["solverName"].split("::")[1]
if self.context["singlePrecisionSTP"]:
self.context["STP_Precision"] = "float"
else:
self.context["STP_Precision"] = "double"
self.render((self.context["kernelType"], "Kernels_h.template"), "Kernels.h")
......@@ -37,13 +37,13 @@ namespace {{namespaceName}} {
double* restrict lQhbnd,
double* restrict lFhbnd,
{% if useSplitCK %}
double* restrict lQi,
double* restrict lQiNext,
double* restrict lPi, // for NCP or Source
double* restrict lQhi,
double* restrict lFhi,
double* restrict gradQ, // for NCP or Source
double* restrict PSi, // for pointSource
{{STP_Precision}}* restrict lQi,
{{STP_Precision}}* restrict lQiNext,
{{STP_Precision}}* restrict lPi, // for NCP or Source
{{STP_Precision}}* restrict lQhi,
{{STP_Precision}}* restrict lFhi,
{{STP_Precision}}* restrict gradQ, // for NCP or Source
{{STP_Precision}}* restrict PSi, // for pointSource
{% else %}
double* restrict lQi,
double* restrict lFi,
......@@ -69,13 +69,13 @@ namespace {{namespaceName}} {
double* restrict lQhbnd,
double* restrict lFhbnd,
{% if useSplitCK %}
double* restrict lQi,
double* restrict lQiNext,
double* restrict lPi, // for NCP or Source
double* restrict lQhi,
double* restrict lFhi,
double* restrict gradQ, // for NCP or Source
double* restrict PSi, // for pointSource
{{STP_Precision}}* restrict lQi,
{{STP_Precision}}* restrict lQiNext,
{{STP_Precision}}* restrict lPi, // for NCP or Source
{{STP_Precision}}* restrict lQhi,
{{STP_Precision}}* restrict lFhi,
{{STP_Precision}}* restrict gradQ, // for NCP or Source
{{STP_Precision}}* restrict PSi, // for pointSource
{% else %}
double* restrict lQi,
double* restrict lFi,
......
......@@ -705,6 +705,12 @@
"available-for" : ["optimised"],
"title" : "WiP: use AoSoA[2] data layout, requires vectorise_terms true",
"default" : false
},
"SinglePrecision_STP" : {
"type" : "boolean",
"available-for" : ["optimised"],
"title" : "WiP: compute STP with single precision only, requires linear and AoSoA2_layout",
"default" : false
}
}
},
......
......@@ -66,6 +66,7 @@ class KernelgeneratorModel:
"predictorRecompute" : solverContext["predictorRecompute"],
"useVectPDE" : solverContext["useVectPDE"],
"useAoSoA2" : solverContext["useAoSoA2"],
"singlePrecisionSTP" : solverContext["singlePrecisionSTP"],
# Optional int parameters (may set redundant flags)
"usePointSources" : solverContext["numberOfPointSources"] if solverContext["numberOfPointSources"] > 0 else -1,
"tempVarsOnStack" : solverContext["tempVarsOnStack"]
......
......@@ -224,6 +224,8 @@ class SolverController:
context["predictorRecompute"] = kernel.get("space_time_predictor",{}).get("predictor_recompute",False)
context["useVectPDE"] = kernel.get("space_time_predictor",{}).get("vectorise_terms",False)
context["useAoSoA2"] = kernel.get("space_time_predictor",{}).get("AoSoA2_layout",False)
context["singlePrecisionSTP"] = kernel.get("space_time_predictor",{}).get("SinglePrecision_STP",False)
context["STP_Precision"] = "float" if kernel.get("space_time_predictor",{}).get("SinglePrecision_STP",False) else "double"
context.update(self.buildKernelTermsContext(kernel["terms"]))
return context
......
......@@ -160,7 +160,7 @@ int {{project}}::{{abstractSolver}}::fusedSpaceTimePredictorVolumeIntegral(doubl
constexpr int totalSize = {{optNamespace}}::getFusedSTPVISize();
{% if tempVarsOnStack %}
double memory[totalSize] __attribute__((aligned(ALIGNMENT)));
{{STP_Precision}} memory[totalSize] __attribute__((aligned(ALIGNMENT)));
{% else %}
//const int index = exahype::DataHeap::getInstance().createData(totalSize,totalSize,exahype::DataHeap::Allocation::UseRecycledEntriesIfPossibleCreateNewEntriesIfRequired);
//double* memory = exahype::DataHeap::getInstance().getData(index).data();
......@@ -170,37 +170,37 @@ int {{project}}::{{abstractSolver}}::fusedSpaceTimePredictorVolumeIntegral(doubl
{% if isLinear %}
{% if useSplitCK %}
{% set SPTbufferSignature="lQi, lQi_next, lPi, lQhi, lFhi, gradQ, PSi" %}
double* lQi = memory + {{optNamespace}}::getlQiShift();
double* lQi_next= memory + {{optNamespace}}::getlQiNextShift();
double* lPi = memory + {{optNamespace}}::getlPiShift();
double* lQhi = memory + {{optNamespace}}::getlQhiShift();
double* lFhi = memory + {{optNamespace}}::getlFhiShift();
double* gradQ = memory + {{optNamespace}}::getgradQShift();
double* PSi = memory + {{optNamespace}}::getPSiShift();
{{STP_Precision}}* lQi = memory + {{optNamespace}}::getlQiShift();
{{STP_Precision}}* lQi_next= memory + {{optNamespace}}::getlQiNextShift();
{{STP_Precision}}* lPi = memory + {{optNamespace}}::getlPiShift();
{{STP_Precision}}* lQhi = memory + {{optNamespace}}::getlQhiShift();
{{STP_Precision}}* lFhi = memory + {{optNamespace}}::getlFhiShift();
{{STP_Precision}}* gradQ = memory + {{optNamespace}}::getgradQShift();
{{STP_Precision}}* PSi = memory + {{optNamespace}}::getPSiShift();
{% else %}
{% set SPTbufferSignature="lQi, lFi, lSi, lQhi, lFhi, lShi, gradQ, PSi, PSderivatives" %}
double* lQi = memory + {{optNamespace}}::getlQiShift();
double* lQhi = memory + {{optNamespace}}::getlQhiShift();
double* lFi = memory + {{optNamespace}}::getlFiShift();
double* lFhi = memory + {{optNamespace}}::getlFhiShift();
{{STP_Precision}}* lQi = memory + {{optNamespace}}::getlQiShift();
{{STP_Precision}}* lQhi = memory + {{optNamespace}}::getlQhiShift();
{{STP_Precision}}* lFi = memory + {{optNamespace}}::getlFiShift();
{{STP_Precision}}* lFhi = memory + {{optNamespace}}::getlFhiShift();
{% if useSource %}
double* lSi = memory + {{optNamespace}}::getlSiShift();
double* lShi = memory + {{optNamespace}}::getlShiShift();
{{STP_Precision}}* lSi = memory + {{optNamespace}}::getlSiShift();
{{STP_Precision}}* lShi = memory + {{optNamespace}}::getlShiShift();
{% else %}
constexpr double* lSi = nullptr;
constexpr double* lShi = nullptr;
constexpr {{STP_Precision}}* lSi = nullptr;
constexpr {{STP_Precision}}* lShi = nullptr;
{% endif %}
{% if useNCP %}
double* gradQ = memory + {{optNamespace}}::getgradQShift();
{{STP_Precision}}* gradQ = memory + {{optNamespace}}::getgradQShift();
{% else %}
constexpr double* gradQ = nullptr;
constexpr {{STP_Precision}}* gradQ = nullptr;
{% endif %}
{% if usePointSources %}
double* PSi = memory + {{optNamespace}}::getPSiShift();
double* PSderivatives = memory + {{optNamespace}}::getPSderivativesShift();
{{STP_Precision}}* PSi = memory + {{optNamespace}}::getPSiShift();
{{STP_Precision}}* PSderivatives = memory + {{optNamespace}}::getPSderivativesShift();
{% else %}
constexpr double* PSi = nullptr;
constexpr double* PSderivatives = nullptr;
constexpr {{STP_Precision}}* PSi = nullptr;
constexpr {{STP_Precision}}* PSderivatives = nullptr;
{% endif %}
{% endif %}{# useSplitCK #}
{% else %}{#
......@@ -208,43 +208,43 @@ int {{project}}::{{abstractSolver}}::fusedSpaceTimePredictorVolumeIntegral(doubl
nonlinear case
#}
double* lQi = memory + {{optNamespace}}::getlQiShift();
double* lQhi = memory + {{optNamespace}}::getlQhiShift();
{{STP_Precision}}* lQi = memory + {{optNamespace}}::getlQiShift();
{{STP_Precision}}* lQhi = memory + {{optNamespace}}::getlQhiShift();
{% if predictorRecompute %}
{% if numberOfMaterialParameters > 0 %}
double* lPi = memory + {{optNamespace}}::getlPiShift();
{{STP_Precision}}* lPi = memory + {{optNamespace}}::getlPiShift();
{% else %}
constexpr double* lPi = nullptr;
constexpr {{STP_Precision}}* lPi = nullptr;
{% endif %}
{% endif %}
double* rhs = memory + {{optNamespace}}::getrhsShift(); //same size as lQi
{{STP_Precision}}* rhs = memory + {{optNamespace}}::getrhsShift(); //same size as lQi
{% if useFlux %}
{% if not predictorRecompute %}
double* lFi = memory + {{optNamespace}}::getlFiShift();
{{STP_Precision}}* lFi = memory + {{optNamespace}}::getlFiShift();
{% endif %}
double* lFhi = memory + {{optNamespace}}::getlFhiShift();
{{STP_Precision}}* lFhi = memory + {{optNamespace}}::getlFhiShift();
{% else %}
constexpr double* lFi = nullptr;
constexpr double* lFhi = nullptr;
constexpr {{STP_Precision}}* lFi = nullptr;
constexpr {{STP_Precision}}* lFhi = nullptr;
{% endif %}
{% if useNCP or useSource %}
{% if not predictorRecompute %}
double* lSi = memory + {{optNamespace}}::getlSiShift();
{{STP_Precision}}* lSi = memory + {{optNamespace}}::getlSiShift();
{% endif %}
double* lShi = memory + {{optNamespace}}::getlShiShift();
{{STP_Precision}}* lShi = memory + {{optNamespace}}::getlShiShift();
{% else %}
constexpr double* lSi = nullptr;
constexpr double* lShi = nullptr;
constexpr {{STP_Precision}}* lSi = nullptr;
constexpr {{STP_Precision}}* lShi = nullptr;
{% endif %}
{% if useNCP or useViscousFlux %}
double* gradQ = memory + {{optNamespace}}::getgradQShift();
{{STP_Precision}}* gradQ = memory + {{optNamespace}}::getgradQShift();
{% else %}
constexpr double* gradQ = nullptr;
constexpr {{STP_Precision}}* gradQ = nullptr;
{% endif %}
{% if useViscousFlux and not predictorRecompute %}
double* gradQAvg = memory + {{optNamespace}}::getgradQAvgShift();
{{STP_Precision}}* gradQAvg = memory + {{optNamespace}}::getgradQAvgShift();
{% else %}
constexpr double* gradQAvg = nullptr;
constexpr {{STP_Precision}}* gradQAvg = nullptr;
{% endif %}
{% endif %}{# if case useNCP or useViscousFlux #}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment