Commit 77873b72 authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen Linear STP - refactor LoG and test on-the-fly transpose WiP

parent 94bf42ac
......@@ -60,7 +60,7 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
self.context["tmpArraySize"] = max((self.context["nDof"]*self.context["nVarPad"] if self.context["useFlux"] else 0), \
(self.context["nDim"]*self.context["nVarPad"] if self.context["useNCP"] else 0))
self.render(("aderdg", "fusedSPTVI_linear_v2_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
self.render(("aderdg", "fusedSPTVI_linear_cpp.template"), "fusedSpaceTimePredictorVolumeIntegral.cpp")
if self.context["usePointSources"]:
self.context["usePointSources"] = False
......
{# requires
Subtemplate to call flux
#}
{% with %}
{# /** Set up helper template values */ #}
{% set chunkRest=((nDof**nDim)%chunkSize) %}
{% set restStart=nDof**nDim-chunkRest %}
{# /** Subtemplate */ #}
{% if useFluxVect %}{# Vectorized flux, require transposition from AoS to SoA and back #}
{ // Compute PDE fluxes in chunk
{{m.allocateArray('Fx', nVarPad*chunkSize, forceStack=True) | indent(2) }}{##}
{{m.allocateArray('Fy', nVarPad*chunkSize, forceStack=True) | indent(2) }}{##}
{{m.allocateArray('Fz', nVarPad*chunkSize, forceStack=True) | indent(2) }}{##}
{{m.allocateArray('lQiT', nVarPad*chunkSize, forceStack=True) | indent(2) }}{##}
double* F[3] = {Fx, Fy, Fz};
for (int xyz = 0; xyz < {{restStart}}; xyz+={{chunkSize}}) {
{{m.transpose('lQi', 'lQiT', idxLQi(t,0,0,xyz,0), 0, chunkSize, nVarPad) | indent(4) }}{##}
#ifdef USE_IPO
#pragma forceinline recursive
#endif
solver.{{solverName}}::flux(lQiT, F);
{{m.transpose('Fx', 'lFi', 0, idxLFi(0,t,0,0,xyz,0), nVarPad, chunkSize) | indent(4) }}{##}
{{m.transpose('Fy', 'lFi', 0, idxLFi(1,t,0,0,xyz,0), nVarPad, chunkSize) | indent(4) }}{##}
{% if nDim == 3 %}
{{m.transpose('Fz', 'lFi', 0, idxLFi(2,t,0,0,xyz,0), nVarPad, chunkSize) | indent(4) }}{##}
{% endif %}
}
{% if chunkRest > 0%}
{ // process the last non complete chunk
{{m.transpose_rest('lQi', 'lQiT', idxLQi(t,0,0,restStart,0), 0, chunkSize, nVarPad, chunkRest, nVarpad, safe=True) | indent(4) }}{##}
#ifdef USE_IPO
#pragma forceinline recursive
#endif
solver.{{solverName}}::flux(lQiT, F);
{{m.transpose_rest('Fx', 'lFi', 0, idxLFi(0,t,0,0,restStart,0), nVarPad, chunkSize, nVarPad, chunkRest) | indent(4) }}{##}
{{m.transpose_rest('Fy', 'lFi', 0, idxLFi(1,t,0,0,restStart,0), nVarPad, chunkSize, nVarPad, chunkRest) | indent(4) }}{##}
{% if nDim == 3 %}
{{m.transpose_rest('Fz', 'lFi', 0, idxLFi(2,t,0,0,restStart,0), nVarPad, chunkSize, nVarPad, chunkRest) | indent(4) }}{##}
{% endif %}
}
{% endif %}{# chunkRest > 0 #}
}
{% else %}{# useFluxVect #}
{#
/** Default scalar case */
#}
{ // Compute the fluxes
double* F[{{nDim}}];
for (int xyz = 0; xyz < {{nDof**nDim}}; xyz++) {
// Call PDE fluxes
F[0] = lFi+{{idxLFi(0,t,0,0,xyz,0)}};
F[1] = lFi+{{idxLFi(1,t,0,0,xyz,0)}};
{% if nDim == 3 %}
F[2] = lFi+{{idxLFi(2,t,0,0,xyz,0)}};
{% endif %}
#ifdef USE_IPO
#pragma forceinline recursive
#endif
solver.{{solverName}}::flux(lQi+{{idxLQi(t,0,0,xyz,0)}}, F);
}
}
{% endif %}{# useFluxVect#}
{% endwith %}
......@@ -135,3 +135,38 @@ std::copy_n({{gradQY}}, {{incr}}, {{gradQ}}+{{1*incr}}); //y
std::copy_n({{gradQZ}}, {{incr}}, {{gradQ}}+{{2*incr}}); //z
{% endif %}
{% endmacro %}
{#
/**
test transpose macro
*/
#}
{% macro transpose(in, out, in_offset, out_offset, A, B) %}
{# Transpose base, input is a [A][B] matrix #}
for(int it_1=0; it_1<{{B}}; it_1++) {
#pragma omp simd aligned({{out}},{{in}}:ALIGNMENT)
for(int it_2=0; it_2<{{A}}; it_2++) {
{{out}}[it_1*{{A}}+it_2+{{out_offset}}] = {{in}}[it_2*{{B}}+it_1+{{in_offset}}];
}
}
{% endmacro %}
{% macro transpose_rest(in, out, in_offset, out_offset, A, B, trueA, trueB, safe=False) %}
{# Transpose base, input is a [trueA][trueB] matrix padded to [A][B] when transposing#}
for(int it_1=0; it_1<{{trueB}}; it_1++) {
#pragma omp simd aligned({{out}},{{in}}:ALIGNMENT)
for(int it_2=0; it_2<{{trueA}}; it_2++) {
{{out}}[it_1*{{A}}+it_2+{{out_offset}}] = {{in}}[it_2*{{B}}+it_1+{{in_offset}}];
}
{% if safe %}
#pragma omp simd aligned({{out}},{{in}}:ALIGNMENT)
for(int it_2={{trueA}}; it_2<{{A}}; it_2++) {
{{out}}[it_1*{{A}}+it_2+{{out_offset}}] = {{in}}[it_1+{{in_offset}}]; // copy first entry to fill output
}
{% endif %}
}
{% endmacro %}
{% macro callflux(inputQ, inputQ_dataSize, outputF, timeInterleaved, time_var) %}
{% include 'subtemplates/flux_PDE_over_xyz.template' %}
{% endmacro %}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment