Commit 40ed16ab authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

Merge branch 'jm/aosoa2' into 'master'

Jm/aosoa2

See merge request exahype/ExaHyPE-Engine!40
parents 31b5eb94 0babe09f
......@@ -78,6 +78,7 @@ class ArgumentParser:
("useGaussLobatto", ArgType.OptionalBool, "use Gauss Lobatto Quadrature instead of Gauss Legendre"),
("predictorRecompute", ArgType.OptionalBool, "predictor step will recompute the PDE instead of relying on stored values from the picard loop (nonlinear only)"),
("useVectPDE", ArgType.OptionalBool, "use vectorized PDE terms (applies when present to: Flux, NCP, Source, FusedSource and MaterialParam)"),
("useAoSoA2", ArgType.OptionalBool, "use AoSoA[2] data layout in SpaceTimePredictor kernel (WiP: linear only), requires useVectPDE"), #TODO JMG: WiP
("tempVarsOnStack", ArgType.OptionalBool, "put the big scratch arrays on the stack instead of the heap (you can use ulimit -s to increase the stack size)")
]
......
......@@ -98,6 +98,7 @@ class Controller:
"useCERKGuess" : args["useCERKGuess"],
"useSplitCK" : args["useSplitCK"],
"useVectPDE" : args["useVectPDE"],
"useAoSoA2" : args["useAoSoA2"],
"predictorRecompute" : args["predictorRecompute"],
"initialGuess" : "mixedPicard" #TODO JMG put as proper toolkit arg
#"initialGuess" : "default" #TODO JMG put as proper toolkit arg
......@@ -174,6 +175,7 @@ class Controller:
context["nDataPad"] = self.getSizeWithPadding(context["nData"])
context["nDofPad"] = self.getSizeWithPadding(context["nDof"])
context["nDof3D"] = 1 if context["nDim"] == 2 else context["nDof"]
context["nDof2Pad"] = self.getSizeWithPadding(context["nDof"]*context["nDof"])
context["solverHeader"] = context["solverName"].split("::")[1] + ".h"
context["codeNamespaceList"] = context["codeNamespace"].split("::")
context["guardNamespace"] = "_".join(context["codeNamespaceList"]).upper()
......
......@@ -37,6 +37,7 @@ class ConfigurationParametersModel(AbstractModelBaseClass):
nDataPad = self.context["nDataPad"]
nDof = self.context["nDof"]
nDofPad = self.context["nDofPad"]
nDof2Pad = self.context["nDof2Pad"]
nDof3D = self.context["nDof3D"]
nDim = self.context["nDim"]
......@@ -65,14 +66,24 @@ class ConfigurationParametersModel(AbstractModelBaseClass):
if self.context["isLinear"]:
if(self.context["useSplitCK"]):
if self.context["useVectPDE"]:
# Linear + split CK vect
self.context["lQiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lQiNextSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lPiSize"] = nDof3D*nDof*nPar*nDofPad
self.context["lQhiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lFhiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["gradQSize"] = nDof3D*nDof*nVar*nDofPad
self.context["PSiSize"] = nDof*nDof3D*nDof*nVar*nDofPad
if self.context["useAoSoA2"]:
# Linear + split CK vect AoSoA2
self.context["lQiSize"] = nDof3D*nVar*nDof2Pad
self.context["lQiNextSize"] = nDof3D*nVar*nDof2Pad
self.context["lPiSize"] = nDof3D*nPar*nDof2Pad
self.context["lQhiSize"] = nDof3D*nVar*nDof2Pad
self.context["lFhiSize"] = nDof3D*nVar*nDof2Pad
self.context["gradQSize"] = nDof3D*nVar*nDof2Pad
self.context["PSiSize"] = nDof*nDof3D*nVar*nDof2Pad
else:
# Linear + split CK vect (AoSoA)
self.context["lQiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lQiNextSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lPiSize"] = nDof3D*nDof*nPar*nDofPad
self.context["lQhiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["lFhiSize"] = nDof3D*nDof*nVar*nDofPad
self.context["gradQSize"] = nDof3D*nDof*nVar*nDofPad
self.context["PSiSize"] = nDof*nDof3D*nDof*nVar*nDofPad
else:
# Linear + split CK scalar
self.context["lQiSize"] = nVarPad*(nDof**nDim)
......
......@@ -40,7 +40,10 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
if self.context["useSplitCK"]:
if self.context["useVectPDE"]:
template = "fusedSPTVI_linear_split_ck_vect_cpp.template"
if self.context["useAoSoA2"]:
template = "fusedSPTVI_linear_split_ck_aosoa2_cpp.template"
else:
template = "fusedSPTVI_linear_split_ck_vect_cpp.template"
else:
template = "fusedSPTVI_linear_split_ck_cpp.template"
......@@ -97,24 +100,39 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
nDof3 = nDof2*nDof
nDof3D = self.context["nDof3D"]
nDofPad = self.context["nDofPad"]
nDof2Pad = self.context["nDof2Pad"]
nDim = self.context["nDim"]
if self.context["isLinear"]:
if self.context["useSplitCK"]:
if self.context["useVectPDE"]: # split_ck vect
if self.context["useFlux"]:
if self.context["useMaterialParam"]:
self.context["matmulConfigs"]["flux_x_sck_vect"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad , nDofPad, nDofPad , 1, 0, 1, 1, "flux_x_sck_vect", "nopf", "gemm")
self.context["matmulConfigs"]["flux_y_or_z_sck_vect"] = MatmulConfig(nDofPad*nVar, nVar, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 0, 1, 1, "flux_y_or_z_sck_vect", "nopf", "gemm")
else:
self.context["matmulConfigs"]["flux_x_sck_vect"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad , nDofPad, nDofPad , 1, 1, 1, 1, "flux_x_sck_vect", "nopf", "gemm")
self.context["matmulConfigs"]["flux_y_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 1, 1, 1, "flux_y_sck_vect", "nopf", "gemm")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["flux_z_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, "flux_z_sck_vect", "nopf", "gemm")
self.context["matmulConfigs"]["gradQ_x_sck_vect"] = MatmulConfig(nDofPad, nVar*nDof*nDof3D, nDof, nDofPad , nDofPad, nDofPad , 1, 0, 1, 1, "gradQ_x_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_y_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 0, 1, 1, "gradQ_y_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z_sck_vect"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 0, 1, 1, "gradQ_z_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
if self.context["useVectPDE"]:
if self.context["useAoSoA2"]: #split_ck aosoa2
if self.context["useFlux"]:
if self.context["useMaterialParam"]:
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, "flux_x_sck_aosoa2", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, "flux_y_sck_aosoa2", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 0, 1, 1, "flux_z_sck_aosoa2", "nopf", "gemm") # beta, 0 => overwrite C
else:
self.context["matmulConfigs"]["flux_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 1, 1, 1, "flux_x_sck_aosoa2", "nopf", "gemm")
self.context["matmulConfigs"]["flux_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 1, 1, 1, "flux_y_sck_aosoa2", "nopf", "gemm")
self.context["matmulConfigs"]["flux_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar , nDofPad, nDof2Pad*nVar , 1, 1, 1, 1, "flux_z_sck_aosoa2", "nopf", "gemm")
self.context["matmulConfigs"]["gradQ_x_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDofPad , nDof, nDof , 1, 0, 1, 1, "gradQ_x_sck_aosoa2", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_y_sck_aosoa2"] = MatmulConfig(nDof, nDof, nDof, nDof , nDofPad, nDof , 1, 0, 1, 1, "gradQ_y_sck_aosoa2", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_z_sck_aosoa2"] = MatmulConfig(nDof2Pad*nVar, nDof, nDof, nDof2Pad*nVar, nDofPad, nDof2Pad*nVar, 1, 0, 1, 1, "gradQ_z_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
else:# split_ck vect
if self.context["useFlux"]:
if self.context["useMaterialParam"]:
self.context["matmulConfigs"]["flux_x_sck_vect"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad , nDofPad, nDofPad , 1, 0, 1, 1, "flux_x_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["flux_y_or_z_sck_vect"] = MatmulConfig(nDofPad*nVar, nVar, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 0, 1, 1, "flux_y_or_z_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
else:
self.context["matmulConfigs"]["flux_x_sck_vect"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad , nDofPad, nDofPad , 1, 1, 1, 1, "flux_x_sck_vect", "nopf", "gemm")
self.context["matmulConfigs"]["flux_y_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 1, 1, 1, "flux_y_sck_vect", "nopf", "gemm")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["flux_z_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, "flux_z_sck_vect", "nopf", "gemm")
self.context["matmulConfigs"]["gradQ_x_sck_vect"] = MatmulConfig(nDofPad, nVar*nDof*nDof3D, nDof, nDofPad , nDofPad, nDofPad , 1, 0, 1, 1, "gradQ_x_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
self.context["matmulConfigs"]["gradQ_y_sck_vect"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar , nDofPad, nDofPad*nVar , 1, 0, 1, 1, "gradQ_y_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["gradQ_z_sck_vect"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 0, 1, 1, "gradQ_z_sck_vect", "nopf", "gemm") # beta, 0 => overwrite C
else: # split_ck scalar
if self.context["useFlux"]:
self.context["matmulConfigs"]["flux_x_sck"] = MatmulConfig(nVarPad, nDof, nDof, nVarPad , nDofPad, nVarPad , 1, 1, 1, 1, "flux_x_sck", "nopf", "gemm")
......
......@@ -27,14 +27,15 @@ namespace {{namespaceName}} {
constexpr int getBasisSize() {return {{nDof}};}
constexpr int getBasisSizePadded() {return {{nDofPad}};}
constexpr int getBasisSquaredSizePadded() {return {{nDof2Pad}};}
constexpr int getDimension() {return {{nDim}};}
constexpr bool isLinear() {return {{isLinearCText}};}
// Vect methods configuration
constexpr int getVectStride() {return {{nDofPad}};}
constexpr int getVectLength() {return {{nDof}};}
constexpr int getVectStride() {return {{nDof2Pad if useAoSoA2 else nDofPad}};}
constexpr int getVectLength() {return {{nDof*nDof if useAoSoA2 else nDof}};}
// Array sizes
constexpr int getlQiSize() {return {{lQiSize}};}
......
......@@ -6,6 +6,14 @@
Will simplify 0 index values and fuse dimension if possible index_3(x1,0,x3,L2,L3) => x1*(L2*L3)+x3
The index_fixed macro assume L2, L3, ... represent the full dimension length factoring the inner dimensions.
index_fixed_4(x1,x2,x3,x4,L2,L3,L4) => x1*L2+x2*L3+x3*L4+x4
Will simplify 0 index values (no need to fuse dimensions)
To use without having to specify Lx everytime, define a local macro in your template
Assuming the macro where imported with
{% import "subtemplates/index.template" as i with context %}
......@@ -17,6 +25,7 @@
B[(t*40+y)*2+i]
The index macros are defined recursively to correctly perform the dimension fusion
*/#}
{#
......@@ -45,7 +54,8 @@ Size 3 index
{% else %}
{{index_2("("~x1~"*"~L2~"+"~x2~")",x3,L3)}}{##}
{% endif %}
{% endmacro %}{#
{% endmacro %}
{#
Size 4 index
......@@ -100,4 +110,88 @@ Size 7 index
{% else %}
{{index_6("("~x1~"*"~L2~"+"~x2~")",x3,x4,x5,x6,x7,L3,L4,L5,L6,L7)}}{##}
{% endif %}
{% endmacro %}
{#
Size 2 index fixed size
#}
{% macro index_fixed_2(x1, x2, L2) %}
{% if x1 == 0 or x1 == "0" %}
{{x2}}{##}
{% elif x2 == 0 or x2 == "0" %}{# fuse dimension #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1}}*{{L2}}+{{x2}}{##}
{% endif %}
{% endmacro %}
{#
Size 3 index fixed size
#}
{% macro index_fixed_3(x1, x2, x3, L2, L3) %}
{% if x1 == 0 or x1 == "0" %}
{{index_fixed_2(x2,x3,L3)}}{##}
{% elif (x2 == 0 or x2 == "0") and (x3 == 0 or x3 == "0") %}{# remove trailling 0 #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1~"*"~L2~"+"~index_fixed_2(x2,x3,L3)}}{##}
{% endif %}
{% endmacro %}
{#
Size 4 index fixed size
#}
{% macro index_fixed_4(x1, x2, x3, x4, L2, L3, L4) %}
{% if x1 == 0 or x1 == "0" %}
{{index_fixed_3(x2,x3,x4,L3,L4)}}{##}
{% elif (x2 == 0 or x2 == "0") and (x3 == 0 or x3 == "0") and (x4 == 0 or x4 == "0") %}{# remove trailling 0 #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1~"*"~L2~"+"~index_fixed_3(x2,x3,x4,L3,L4)}}{##}
{% endif %}
{% endmacro %}
{#
Size 5 index fixed size
#}
{% macro index_fixed_5(x1, x2, x3, x4, x5, L2, L3, L4, L5) %}
{% if x1 == 0 or x1 == "0" %}
{{index_fixed_4(x2,x3,x4,x5,L3,L4,L5)}}{##}
{% elif (x2 == 0 or x2 == "0") and (x3 == 0 or x3 == "0") and (x4 == 0 or x4 == "0") and (x5 == 0 or x5 == "0") %}{# remove trailling 0 #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1~"*"~L2~"+"~index_fixed_4(x2,x3,x4,x5,L3,L4,L5)}}{##}
{% endif %}
{% endmacro %}
{#
Size 6 index fixed size
#}
{% macro index_fixed_6(x1, x2, x3, x4, x5, x6, L2, L3, L4, L5, L6) %}
{% if x1 == 0 or x1 == "0" %}
{{index_fixed_5(x2,x3,x4,x5,x6,L3,L4,L5,L6)}}{##}
{% elif (x2 == 0 or x2 == "0") and (x3 == 0 or x3 == "0") and (x4 == 0 or x4 == "0") and (x5 == 0 or x5 == "0") and (x6 == 0 or x6 == "0") %}{# remove trailling 0 #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1~"*"~L2~"+"~index_fixed_5(x2,x3,x4,x5,x6,L3,L4,L5,L6)}}{##}
{% endif %}
{% endmacro %}
{#
Size 7 index fixed size
#}
{% macro index_fixed_7(x1, x2, x3, x4, x5, x6, x7, L2, L3, L4, L5, L6, L7) %}
{% if x1 == 0 or x1 == "0" %}
{{index_fixed_6(x2,x3,x4,x5,x6,x7,L3,L4,L5,L6,L7)}}{##}
{% elif (x2 == 0 or x2 == "0") and (x3 == 0 or x3 == "0") and (x4 == 0 or x4 == "0") and (x5 == 0 or x5 == "0") and (x6 == 0 or x6 == "0") and (x7 == 0 or x7 == "0") %}{# remove trailling 0 #}
{{x1}}*{{L2}}{##}
{% else %}
{{x1~"*"~L2~"+"~index_fixed_6(x2,x3,x4,x5,x6,x7,L3,L4,L5,L6,L7)}}{##}
{% endif %}
{% endmacro %}
\ No newline at end of file
......@@ -23,6 +23,16 @@
{# /**************************************
**** Set up helper template values ****
**************************************/ #}
{# replace 0 with '0' #}
{% if A_shift == 0 %}
{% set A_shift = '0' %}
{% endif %}
{% if B_shift == 0 %}
{% set B_shift = '0' %}
{% endif %}
{% if C_shift == 0 %}
{% set C_shift = '0' %}
{% endif %}
{% set conf = matmulConfigs[matmulKey] %}
{% if overrideUseLibxsmm is not defined or overrideUseLibxsmm == "BoolNotDefined" %}
{% set overrideUseLibxsmm = useLibxsmm %}{# if no override then take the current value #}
......@@ -78,12 +88,14 @@ volatile {{fpFormat}} doNotOptimizeAway_{{B}} = {{B}}[0]; //used to prevent the
#}
{% elif useEigen %}
{# old direct mapper #}
{#{
{#
{
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> > {{Ap}}_m({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}); //A = {{A}}
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.K}},{{conf.N}}>, Eigen::Aligned, Eigen::OuterStride<{{conf.LDB}}> > {{Bp}}_m({{trueB}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}); //B = {{B}}, assume it is aligned
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.N}}>, Eigen::{{"Aligned"if conf.alignment_C == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDC}}> > {{Cp}}_m({{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %}); //C = {{C}}
{{Cp}}_m.noalias() {{ '+' if conf.beta == 1 }}= {{ '-' if conf.alpha == -1 }}{{Ap}}_m * {{Bp}}_m {% if (useTrueB and not forceCoeffMatrix) %}* {{trueAlpha}}{% endif %};
}#}
}
#}
#pragma forceinline recursive
{
new (&{{conf.baseroutinename}}_A_map) Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> >({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}); //{{conf.baseroutinename}}_A_map = {{A}}
......@@ -111,7 +123,7 @@ for (int it = 0; it < {{conf.LDB*conf.K}}; it++) {
for (int it_1 = 0; it_1 < {{conf.N}}; it_1++) {
#pragma omp simd aligned({{Cp}}:ALIGNMENT)
for (int it_3 = 0; it_3 < {{conf.M}}; it_3++) {
{{C}}[{{C_shift}}+it_1*{{conf.LDC}}+it_3] = 0.;
{{C}}[{% if C_shift != '0' %}{{C_shift}}+{% endif %}it_1*{{conf.LDC}}+it_3] = 0.;
}
}
{% endif %}
......@@ -119,7 +131,7 @@ for (int it_1 = 0; it_1 < {{conf.N}}; it_1++) {
for (int it_2 = 0; it_2 < {{conf.K}}; it_2++) {
#pragma omp simd aligned({{Cp}},{{Ap}},{{trueBp}}:ALIGNMENT)
for (int it_3 = 0; it_3 < {{conf.M}}; it_3++) {
{{C}}[{{C_shift}}+it_1*{{conf.LDC}}+it_3] {{ '+' if conf.alpha == 1 else '-' }}= {{A}}[{{A_shift}}+it_2*{{conf.LDA}}+it_3] * {% if (useTrueB and not forceCoeffMatrix) %}{{trueAlpha}}* {% endif %}{{trueB}}[{% if B_shift != '0' %}{{B_shift}}+{% endif %}it_1*{{conf.LDB}}+it_2];
{{C}}[{% if C_shift != '0' %}{{C_shift}}+{% endif %}it_1*{{conf.LDC}}+it_3] {{ '+' if conf.alpha == 1 else '-' }}= {{A}}[{% if A_shift != '0' %}{{A_shift}}+{% endif %}it_2*{{conf.LDA}}+it_3] * {% if (useTrueB and not forceCoeffMatrix) %}{{trueAlpha}}* {% endif %}{{trueB}}[{% if B_shift != '0' %}{{B_shift}}+{% endif %}it_1*{{conf.LDB}}+it_2];
}
}
}
......
......@@ -699,6 +699,12 @@
"available-for" : ["optimised"],
"title" : "WiP: Use vectorised user function formulations (SoA data layout) for PDE functions related to the terms' options: flux, source, ncp, viscous_flux and fusedsource. Only available for optimised kernels with either split_ck (linear) or predictor_recompute (nonlinear)",
"default" : false
},
"AoSoA2_layout" : {
"type" : "boolean",
"available-for" : ["optimised"],
"title" : "WiP: use AoSoA[2] data layout, requires vectorise_terms true",
"default" : false
}
}
},
......
......@@ -65,6 +65,7 @@ class KernelgeneratorModel:
"useGaussLobatto" : solverContext["basis"] == "lobatto",
"predictorRecompute" : solverContext["predictorRecompute"],
"useVectPDE" : solverContext["useVectPDE"],
"useAoSoA2" : solverContext["useAoSoA2"],
# Optional int parameters (may set redundant flags)
"usePointSources" : solverContext["numberOfPointSources"] if solverContext["numberOfPointSources"] > 0 else -1,
"tempVarsOnStack" : solverContext["tempVarsOnStack"]
......
......@@ -223,6 +223,7 @@ class SolverController:
context["noTimeAveraging_s"] = "true" if kernel.get("space_time_predictor",{}).get("notimeavg",False) else "false"
context["predictorRecompute"] = kernel.get("space_time_predictor",{}).get("predictor_recompute",False)
context["useVectPDE"] = kernel.get("space_time_predictor",{}).get("vectorise_terms",False)
context["useAoSoA2"] = kernel.get("space_time_predictor",{}).get("AoSoA2_layout",False)
context.update(self.buildKernelTermsContext(kernel["terms"]))
return context
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment