Commit 3f9e4a9f authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard
Browse files

KernelGen - Fix viscous_flux + prefetch tests

parent c6e542fd
......@@ -207,11 +207,13 @@ class FusedSpaceTimePredictorVolumeIntegralModel(AbstractModelBaseClass):
else:
if self.context["useFlux"]:
self.context["matmulConfigs"]["rhs_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "rhs_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
#self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "rhs_y", prefetchInput="A", prefetchOutput="C")
self.context["matmulConfigs"]["rhs_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "rhs_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["rhs_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof , 1, 1, 1, 1, 1, "rhs_z")
self.context["matmulConfigs"]["lduh_x"] = MatmulConfig(nDofPad, nVar, nDof, nDofPad, nDofPad, nDofPad , 1, 1, 1, 1, 1, "lduh_x", prefetchInput="B", prefetchOutput="C")
self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "lduh_y", prefetchInput="A", prefetchOutput="C")
#self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "lduh_y", prefetchInput="A", prefetchOutput="C")
self.context["matmulConfigs"]["lduh_y"] = MatmulConfig(nDofPad*nVar, nDof, nDof, nDofPad*nVar, nDofPad, nDofPad*nVar , 1, 1, 1, 1, 1, "lduh_y")
if self.context["nDim"]>=3:
self.context["matmulConfigs"]["lduh_z"] = MatmulConfig(nDofPad*nVar*nDof, nDof, nDof, nDofPad*nVar*nDof, nDofPad, nDofPad*nVar*nDof, 1, 1, 1, 1, 1, "lduh_z")
if self.context["useNCP"] or self.context['useViscousFlux']:
......
......@@ -37,8 +37,18 @@ class GemmsGeneratorModel(AbstractModelBaseClass):
return {"gemmList": gemmList}
def generateLIBXSMMgemm(self, outputFileName, matmul):
prefecthing = "nopf" # No native prefetching supported!
type = "dense" # for plain assembly code (rather than inline assembly) choose dense_asm
# if matmul.prefetchInput == "A":
# prefetching = "AL2"
# elif matmul.prefetchInput == "B":
# prefetching = "BL2viaC"
# elif matmul.prefetchInput == "AB":
# prefetching = "AL2_BL2viaC"
# else:
# prefetching = "nopf"
prefetching = "nopf"
commandLineArguments = " " + type + \
" " + os.path.join(self.context["pathToOutputDirectory"], outputFileName) + \
" " + self.context["codeNamespace"] + "::" + matmul.baseroutinename + \
......@@ -53,10 +63,10 @@ class GemmsGeneratorModel(AbstractModelBaseClass):
" " + str(matmul.alignment_A) + \
" " + str(matmul.alignment_C) + \
" " + self.context["architecture"] + \
" " + prefecthing + \
" " + prefetching + \
" " + matmul.precision
bashCommand = self.context["pathToLibxsmmGemmGenerator"] + commandLineArguments
subprocess.call(bashCommand.split())
return (matmul.baseroutinename, matmul.precision)
return (matmul.baseroutinename, matmul.precision, prefetching != "nopf")
......@@ -174,8 +174,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
lQi[{{idxLQi(0,z,n,0,yx)}}] = luh[{{idxLuh(z,0,yx,n)}}];
}
{% if nPar > 0 %}
for (int n = {{nVar}}; n < {{nData}}; n++) {
lPi[{{idxLPi(z,n,0,yx)}}] = luh[{{idxLuh(z,0,yx,n)}}];
for (int n = 0; n < {{nPar}}; n++) {
lPi[{{idxLPi(z,n,0,yx)}}] = luh[{{idxLuh(z,0,yx,"n+"~nVar)}}];
}
{% endif %}
}
......@@ -623,7 +623,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zn < {{nDof3D*nVar}}; zn++) {
for (int zn = 0; zn < {{nDof3D*nVar}}; zn++) {
{{ m.matmul('gradQ_x', 'dudx_by_dx', 'lQhi', 'gradQ', '0', idxLQhi(0,zn,0,0), idxGradQ(0,0,zn,0,0)) | indent(4) }}{##}
}
......
......@@ -174,8 +174,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
lQi[{{idxLQi(0,0,zy,n,x)}}] = luh[{{idxLuh(0,zy,x,n)}}];
}
{% if nPar > 0 %}
for (int n = {{nVar}}; n < {{nData}}; n++) {
lPi[{{idxLPi(0,zy,n,x)}}] = luh[{{idxLuh(0,zy,x,n)}}];
for (int n = 0; n < {{nPar}}; n++) {
lPi[{{idxLPi(0,zy,n,x)}}] = luh[{{idxLuh(0,zy,x,"n+"~nVar)}}];
}
{% endif %}
}
......@@ -313,7 +313,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul_prefetch('rhs_y', 'lFhi', 'rhsCoeff', 'rhs', idxLFhi(1,z,0,0,0), '0', idxRhs(t,z,0,0,0), idxLFhi(1,'(z+1)',0,0,0), '0', idxRhs(t,'(z+1)',0,0,0)) | indent(8) }}{##}
{#{{ m.matmul_prefetch('rhs_y', 'lFhi', 'rhsCoeff', 'rhs', idxLFhi(1,z,0,0,0), '0', idxRhs(t,z,0,0,0), idxLFhi(1,'(z+1)',0,0,0), '0', idxRhs(t,'(z+1)',0,0,0)) | indent(8) }}{##}
{{ m.matmul('rhs_y', 'lFhi', 'rhsCoeff', 'rhs', idxLFhi(1,z,0,0,0), '0', idxRhs(t,z,0,0,0)) | indent(8) }}{##}
}
{% if nDim==3 %}
......@@ -845,7 +846,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
//y, fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul_prefetch('lduh_y', 'lFhi', 'coeffVolume', 'lQi', idxLFhi(1,z,0,0,0), '0', idxLQhi(z,0,0,0), idxLFhi(1,'(z+1)',0,0,0), '0', idxLQhi('(z+1)',0,0,0)) | indent(4) }}{##}
{#{{ m.matmul_prefetch('lduh_y', 'lFhi', 'coeffVolume', 'lQi', idxLFhi(1,z,0,0,0), '0', idxLQhi(z,0,0,0), idxLFhi(1,'(z+1)',0,0,0), '0', idxLQhi('(z+1)',0,0,0)) | indent(4) }}{##}
{{ m.matmul('lduh_y', 'lFhi', 'coeffVolume', 'lQi', idxLFhi(1,z,0,0,0), '0', idxLQhi(z,0,0,0)) | indent(4) }}{##}
}
{% if nDim == 3 %}
......
......@@ -20,11 +20,13 @@
namespace {{namespaceName}} {
{% endfor %}
{% for gemm,precision in gemmList %}
{% for gemm,precision,prefetch in gemmList %}
{% if precision == "DP" %}
void {{gemm}}(const double* A, const double* B, double* C);
{#void {{gemm}}(const double* A, const double* B, double* C{% if prefetch %}, const double* A2, const double* B2, const double* C2{% endif %});#}
{% else %}
void {{gemm}}(const float* A, const float* B, float* C);
{#void {{gemm}}(const float* A, const float* B, float* C{% if prefetch %}, const double* A2, const double* B2, const double* C2{% endif %});#}
{% endif %}
{% endfor %}
......
......@@ -45,6 +45,7 @@
{% set prefetchA = (prefetchLoGInputs and (conf.prefetchInput == "A" or conf.prefetchInput == "AB")) %}
{% set prefetchB = (prefetchLoGInputs and (conf.prefetchInput == "B" or conf.prefetchInput == "AB")) %}
{% set prefetchC = (prefetchLoGOutputs and conf.prefetchOutput == "C") %}
{% set prefetchGemm = prefetchA or prefetchB or prefetchC %}
{# /*******************
**** Sub macros ****
********************/ #}
......@@ -82,6 +83,20 @@ _mm_prefetch({{array}}+{{offset}}{% if offsetLine != 0 %}+{{offsetLine}}{% endif
#pragma forceinline
#endif
{{conf.baseroutinename}}({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}, {{B}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}, {{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %});
{% if false %}{# prefetch in gemm, disabled #}
#ifdef USE_IPO
#pragma forceinline
#endif
{% if prefetchGemm %}
{{conf.baseroutinename}}({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}, {{B}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}, {{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %}, {{A}}{% if A_next != '0' %}+{{A_next}}{% endif %}, {{B}}{% if B_next != '0' %}+{{B_next}}{% endif %}, {{C}}{% if C_next != '0' %}+{{C_next}}{% endif %});
{% else %}
{{conf.baseroutinename}}({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}, {{B}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}, {{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %});
{% endif %}
{% endif %}
{#
// Eigen case
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment