Commit 92f88d59 authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard

KernelGen - Eigen support (WiP)

parent 536ba89a
...@@ -38,13 +38,16 @@ class Configuration: ...@@ -38,13 +38,16 @@ class Configuration:
# path to the gemm generator from this file # path to the gemm generator from this file
pathToLibxsmmGemmGenerator = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "libxsmm", "bin", "libxsmm_gemm_generator")) pathToLibxsmmGemmGenerator = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "libxsmm", "bin", "libxsmm_gemm_generator"))
# path to eigen, will be symlinked into the kernels directory if eigen is used (see matmulLib)
pathToEigen = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "eigen"))
# path to jinja2 # path to jinja2
pathToJinja2 = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "jinja", "src")) pathToJinja2 = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "jinja", "src"))
# path to markupsafe # path to markupsafe
pathToMarkupsafe = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "markupsafe", "src")) pathToMarkupsafe = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "markupsafe", "src"))
# simd size of the accepted architectures # simd size of the accepted architectures
simdWidth = { "noarch" : 1, simdWidth = { "noarch" : 1,
"wsm" : 2, "wsm" : 2,
...@@ -55,9 +58,11 @@ class Configuration: ...@@ -55,9 +58,11 @@ class Configuration:
"skx" : 8 "skx" : 8
} }
# set to false to use standard loops instead of libxsmm # choose the BLAS library for the matmul: "None" (= C++ loops), "Libxsmm" or "Eigen"
useLibxsmm = True; matmulLib = "Libxsmm";
#matmulLib = "Eigen";
#matmulLib = "None";
# set to true to print models runtime # set to true to print models runtime
runtimeDebug = False; runtimeDebug = False;
......
...@@ -67,7 +67,8 @@ class Controller: ...@@ -67,7 +67,8 @@ class Controller:
"codeNamespace" : args["namespace"], "codeNamespace" : args["namespace"],
"tempVarsOnStack" : args["tempVarsOnStack"], "tempVarsOnStack" : args["tempVarsOnStack"],
"architecture" : args["architecture"], "architecture" : args["architecture"],
"useLibxsmm" : Configuration.useLibxsmm, "useLibxsmm" : Configuration.matmulLib == "Libxsmm",
"useEigen" : Configuration.matmulLib == "Eigen",
"pathToLibxsmmGemmGenerator" : Configuration.pathToLibxsmmGemmGenerator, "pathToLibxsmmGemmGenerator" : Configuration.pathToLibxsmmGemmGenerator,
"runtimeDebug" : Configuration.runtimeDebug #for debug "runtimeDebug" : Configuration.runtimeDebug #for debug
} }
...@@ -209,12 +210,15 @@ class Controller: ...@@ -209,12 +210,15 @@ class Controller:
if exception.errno != errno.EEXIST: if exception.errno != errno.EEXIST:
raise raise
# remove all .cpp, .cpph, .c and .h files (we are in append mode!) # remove all .cpp, .cpph, .c and .h files (we are in append mode!) as well as previous symlink (see symlinkBLASlib())
for fileName in os.listdir(self.config["pathToOutputDirectory"]): for fileName in os.listdir(self.config["pathToOutputDirectory"]):
_ , ext = os.path.splitext(fileName) _ , ext = os.path.splitext(fileName)
if(ext in [".cpp", ".cpph", ".c", ".h"]): if ext in [".cpp", ".cpph", ".c", ".h"] or fileName in ["Eigen"]:
os.remove(self.config["pathToOutputDirectory"] + "/" + fileName) os.remove(self.config["pathToOutputDirectory"] + "/" + fileName)
# Symlink the BLAS library if needed
self.symlinkBLASlib()
# run the models new files # run the models new files
self.runModel( "kernelsHeader", kernelsHeaderModel.KernelsHeaderModel(self.baseContext)) self.runModel( "kernelsHeader", kernelsHeaderModel.KernelsHeaderModel(self.baseContext))
...@@ -287,3 +291,7 @@ class Controller: ...@@ -287,3 +291,7 @@ class Controller:
" " + matmul.precision " " + matmul.precision
bashCommand = self.config["pathToLibxsmmGemmGenerator"] + commandLineArguments bashCommand = self.config["pathToLibxsmmGemmGenerator"] + commandLineArguments
subprocess.call(bashCommand.split()) subprocess.call(bashCommand.split())
def symlinkBLASlib(self):
if self.config["useEigen"]:
os.symlink(os.path.join(Configuration.pathToEigen, "Eigen"), os.path.join(self.config["pathToOutputDirectory"], "Eigen"))
...@@ -18,9 +18,7 @@ ...@@ -18,9 +18,7 @@
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %} {{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
// local help function // local help function
inline int powOf3(int exp){ inline int powOf3(int exp){
...@@ -67,8 +65,8 @@ void {{codeNamespace}}::faceUnknownsProlongation( ...@@ -67,8 +65,8 @@ void {{codeNamespace}}::faceUnknownsProlongation(
{% endif %} {% endif %}
// read only input, start with the function input = coarse // read only input, start with the function input = coarse
const double* inputQ = lQhbndCoarse; double* inputQ = const_cast<double*>(lQhbndCoarse);
const double* inputF = lFhbndCoarse; double* inputF = const_cast<double*>(lFhbndCoarse);
// output pointer, ensures that the output of the last iteration points to the function output // output pointer, ensures that the output of the last iteration points to the function output
double* outputQ; double* outputQ;
...@@ -90,6 +88,13 @@ void {{codeNamespace}}::faceUnknownsProlongation( ...@@ -90,6 +88,13 @@ void {{codeNamespace}}::faceUnknownsProlongation(
int subintervalIndex_1; int subintervalIndex_1;
{% endif %} {% endif %}
{{ m.setupMatmul('face_Q_x')| indent(2) }}{##}
{{ m.setupMatmul('face_F_x')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('face_Q_y')| indent(2) }}{##}
{{ m.setupMatmul('face_F_y')| indent(2) }}{##}
{% endif %}
// This loop decodes the elements of subfaceIndex into a tertiary basis // This loop decodes the elements of subfaceIndex into a tertiary basis
// starting with the highest significance 3^(levelDelta-1). // starting with the highest significance 3^(levelDelta-1).
// //
...@@ -170,7 +175,7 @@ void {{codeNamespace}}::faceFluxRestriction( ...@@ -170,7 +175,7 @@ void {{codeNamespace}}::faceFluxRestriction(
{% endif %} {% endif %}
// read only input, start with the function input = fine // read only input, start with the function input = fine
const double* inputF = lFhbndFine; double* inputF = const_cast<double*>(lFhbndFine);
// output pointer, ensures that the output of the last iteration points to the function output // output pointer, ensures that the output of the last iteration points to the function output
double* outputF; double* outputF;
...@@ -186,6 +191,11 @@ void {{codeNamespace}}::faceFluxRestriction( ...@@ -186,6 +191,11 @@ void {{codeNamespace}}::faceFluxRestriction(
int subfaceIndexCurrent_1 = subfaceIndex[1]; int subfaceIndexCurrent_1 = subfaceIndex[1];
int subintervalIndex_1; int subintervalIndex_1;
{% endif %} {% endif %}
{{ m.setupMatmul('face_F_x')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('face_F_y')| indent(2) }}{##}
{% endif %}
// This loop decodes the indices of subfaceIndex into a tertiary basis // This loop decodes the indices of subfaceIndex into a tertiary basis
// starting with the lowest significance 3^0 (in contrast to the prolongation loop). // starting with the lowest significance 3^0 (in contrast to the prolongation loop).
...@@ -248,7 +258,7 @@ void {{codeNamespace}}::volumeUnknownsProlongation( ...@@ -248,7 +258,7 @@ void {{codeNamespace}}::volumeUnknownsProlongation(
{% endif %} {% endif %}
// read only input, start with the function input = fine // read only input, start with the function input = fine
const double* inputLuh = luhCoarse; double* inputLuh = const_cast<double*>(luhCoarse);
// output pointer, ensures that the output of the last iteration points to the function output // output pointer, ensures that the output of the last iteration points to the function output
double* outputLuh; double* outputLuh;
...@@ -270,6 +280,12 @@ void {{codeNamespace}}::volumeUnknownsProlongation( ...@@ -270,6 +280,12 @@ void {{codeNamespace}}::volumeUnknownsProlongation(
int subintervalIndex_2; int subintervalIndex_2;
{% endif %} {% endif %}
{{ m.setupMatmul('volume_x')| indent(2) }}{##}
{{ m.setupMatmul('volume_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('volume_z')| indent(2) }}{##}
{% endif %}
// This loop step by step decodes the elements of subcellIndex into a tertiary basis // This loop step by step decodes the elements of subcellIndex into a tertiary basis
// starting with the highest significance 3^(levelDelta-1). // starting with the highest significance 3^(levelDelta-1).
// //
...@@ -343,7 +359,7 @@ void {{codeNamespace}}::volumeUnknownsRestriction( ...@@ -343,7 +359,7 @@ void {{codeNamespace}}::volumeUnknownsRestriction(
const int levelDelta = fineGridLevel - coarseGridLevel; const int levelDelta = fineGridLevel - coarseGridLevel;
// read only input, start with the function input = fine // read only input, start with the function input = fine
const double* inputLuh = luhFine; double* inputLuh = const_cast<double*>(luhFine);
int subintervalIndex_0 = subcellIndex[0]; int subintervalIndex_0 = subcellIndex[0];
int subintervalIndex_1 = subcellIndex[1]; int subintervalIndex_1 = subcellIndex[1];
{% if nDim==3 %} {% if nDim==3 %}
...@@ -359,6 +375,14 @@ void {{codeNamespace}}::volumeUnknownsRestriction( ...@@ -359,6 +375,14 @@ void {{codeNamespace}}::volumeUnknownsRestriction(
double* tmpLuh2; //allocated and freed only if levelDelta > 1 double* tmpLuh2; //allocated and freed only if levelDelta > 1
{% endif %} {% endif %}
{{ m.setupMatmul('volume_x')| indent(2) }}{##}
{{ m.setupMatmul('volume_y')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('volume_y_add')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('volume_z')| indent(2) }}{##}
{{ m.setupMatmul('volume_z_add')| indent(2) }}{##}
{% endif %}
if(levelDelta > 1) { if(levelDelta > 1) {
{{m.allocateArray('tmpLuh', nDof3D*nDof*nDof*nDataPad, pointerExists=True ) | indent(2)}}{##} {{m.allocateArray('tmpLuh', nDof3D*nDof*nDof*nDataPad, pointerExists=True ) | indent(2)}}{##}
......
...@@ -18,9 +18,8 @@ ...@@ -18,9 +18,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -54,6 +53,13 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}( ...@@ -54,6 +53,13 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
const double dt, const double dt,
std::vector<int>* pointSources // will be deleted in the end if set std::vector<int>* pointSources // will be deleted in the end if set
) { ) {
{{ m.setupMatmul('flux_x') | indent(2) }}{##}
{{ m.setupMatmul('flux_y') | indent(2) }}{##}
{{ m.setupMatmul('flux_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
const double invDt = 1. / dt; const double invDt = 1. / dt;
const double invDx = 1. / dx; const double invDx = 1. / dx;
......
...@@ -28,9 +28,8 @@ ...@@ -28,9 +28,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -84,6 +83,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}( ...@@ -84,6 +83,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
__assume_aligned(iweights3,ALIGNMENT); __assume_aligned(iweights3,ALIGNMENT);
#endif #endif
{{ m.setupMatmul('flux_x_sck') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_sck') | indent(2) }}{##}
{{ m.setupMatmul('flux_z_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_sck') | indent(2) }}{##}
const double invDt = 1. / dt; const double invDt = 1. / dt;
const double invDx = 1. / dx; const double invDx = 1. / dx;
......
...@@ -29,9 +29,8 @@ ...@@ -29,9 +29,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -85,6 +84,14 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}( ...@@ -85,6 +84,14 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
__assume_aligned(iweights3,ALIGNMENT); __assume_aligned(iweights3,ALIGNMENT);
#endif #endif
{{ m.setupMatmul('flux_x_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_or_z_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_z_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_sck_vect')| indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_sck_vect')| indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_sck_vect')| indent(2) }}{##}
const double invDt = 1. / dt; const double invDt = 1. / dt;
const double invDx = 1. / dx; const double invDx = 1. / dx;
......
...@@ -18,9 +18,8 @@ ...@@ -18,9 +18,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -72,9 +71,25 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral( ...@@ -72,9 +71,25 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useViscousFlux %} {% if useViscousFlux %}
__assume_aligned(gradQAvg, ALIGNMENT); __assume_aligned(gradQAvg, ALIGNMENT);
{% endif %} {% endif %}
#endif #endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradF_x_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradF_y_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradF_z_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable // 0. Allocate local variable
{% if useFluxVect %} {% if useFluxVect %}
// transposed F slice for flux_vect // transposed F slice for flux_vect
......
...@@ -35,9 +35,8 @@ ...@@ -35,9 +35,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -85,9 +84,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral( ...@@ -85,9 +84,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useNCP or useViscousFlux %} {% if useNCP or useViscousFlux %}
__assume_aligned(gradQ, ALIGNMENT); __assume_aligned(gradQ, ALIGNMENT);
{% endif %} {% endif %}
#endif #endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable // 0. Allocate local variable
double new_lQi_slice[{{nDof*nVarPad}}] __attribute__((aligned(ALIGNMENT))); //for step 4 (computing new lQi value), doesn't update parameters double new_lQi_slice[{{nDof*nVarPad}}] __attribute__((aligned(ALIGNMENT))); //for step 4 (computing new lQi value), doesn't update parameters
const double inverseDt = 1.0 / dt; const double inverseDt = 1.0 / dt;
......
...@@ -35,9 +35,8 @@ ...@@ -35,9 +35,8 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h" #include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h" {{ m.matmulInclude() }}{# include required headers for matmul #}
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
...@@ -89,9 +88,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral( ...@@ -89,9 +88,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useNCP or useViscousFlux %} {% if useNCP or useViscousFlux %}
__assume_aligned(gradQ, ALIGNMENT); __assume_aligned(gradQ, ALIGNMENT);
{% endif %} {% endif %}
#endif #endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable // 0. Allocate local variable
constexpr int MaxIterations = {{2*nDof+1}}; constexpr int MaxIterations = {{2*nDof+1}};
double new_lQi_slice[{{nDof*nVar*nDofPad}}] __attribute__((aligned(ALIGNMENT))) = {0.}; //for step 4 (computing new lQi value), doesn't update parameters double new_lQi_slice[{{nDof*nVar*nDofPad}}] __attribute__((aligned(ALIGNMENT))) = {0.}; //for step 4 (computing new lQi value), doesn't update parameters
......
...@@ -18,15 +18,19 @@ ...@@ -18,15 +18,19 @@
#include "{{pathToOptKernel}}/Kernels.h" #include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/Quadrature.h" #include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %} {{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
#include "{{solverHeader}}" #include "{{solverHeader}}"
//Fortran (Limiter.f90): GetSubcellData //Fortran (Limiter.f90): GetSubcellData
void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double* const lim) { void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double* const lim) {
{{ m.setupMatmul('dg2fv_x')| indent(2) }}{##}
{{ m.setupMatmul('dg2fv_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('dg2fv_z')| indent(2) }}{##}
{% endif %}
//compact projection without ghostlayer //compact projection without ghostlayer
// x // x
{{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##} {{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##}
...@@ -74,6 +78,12 @@ void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double* ...@@ -74,6 +78,12 @@ void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double*
//Fortran (Limiter.f90): PutSubcellData //Fortran (Limiter.f90): PutSubcellData
void {{codeNamespace}}::projectOnDGSpace(const double* const lim, double* const luh) { void {{codeNamespace}}::projectOnDGSpace(const double* const lim, double* const luh) {
{{ m.setupMatmul('fv2dg_x')| indent(2) }}{##}
{{ m.setupMatmul('fv2dg_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('fv2dg_z')| indent(2) }}{##}
{% endif %}
// x // x
// ignore and remove ghostlayers // ignore and remove ghostlayers
{{m.allocateArray('tmpX', nDofLim3D*nDofLim*nDof*nDataPad) | indent(2)}}{##} {{m.allocateArray('tmpX', nDofLim3D*nDofLim*nDof*nDataPad) | indent(2)}}{##}
...@@ -238,6 +248,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtGaussLobattoNodes( ...@@ -238,6 +248,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtGaussLobattoNodes(
double* max double* max
) { ) {
{{ m.setupMatmul('uh2lob_x')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('uh2lob_y_slice')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('uh2lob_y')| indent(2) }}{##}
{{ m.setupMatmul('uh2lob_z_slice')| indent(2) }}{##}
{% endif %}
// x // x
{{m.allocateArray('tmpX', nDof3D*nDof*nDof*nDataPad) | indent(2)}}{##} {{m.allocateArray('tmpX', nDof3D*nDof*nDof*nDataPad) | indent(2)}}{##}
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) { for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
...@@ -304,6 +322,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtFVSubcellCenters( ...@@ -304,6 +322,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtFVSubcellCenters(
double* max double* max
) { ) {
{{ m.setupMatmul('dg2fv_x')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('dg2fv_y_slice')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('dg2fv_y')| indent(2) }}{##}
{{ m.setupMatmul('dg2fv_z_slice')| indent(2) }}{##}
{% endif %}
// x // x
{{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##} {{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##}
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) { for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
......
...@@ -44,11 +44,54 @@ _mm_free({{name}}); ...@@ -44,11 +44,54 @@ _mm_free({{name}});
The gemm config (fetched through matmulKey) contains M, N, K, LDA, LDB, LDC, alpha and beta The gemm config (fetched through matmulKey) contains M, N, K, LDA, LDB, LDC, alpha and beta
*/ */
#} #}
{% macro matmul(matmulKey, A, B, C, A_shift, B_shift, C_shift, overrideUseLibxsmm="BoolNotDefined", trueAlpha="", trueB="", forceCoeffMatrix=False) %} {% macro matmul(matmulKey, A, B, C, A_shift, B_shift, C_shift, trueAlpha="", trueB="", forceCoeffMatrix=False) %}
{% include "subtemplates/matmul.template" %} {% include "subtemplates/matmul.template" %}
{% endmacro %} {% endmacro %}
{# {#
/**
Matmul include
*/
#}
{% macro matmulInclude() %}
{% if useEigen %}
// include Eigen for matmul
#include <{{pathToOptKernel}}/Eigen/Dense>
{% endif %}
{% if useLibxsmm %}
// include libxsmms' gemms for matmul
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{% endmacro %}
{#
/**
Setup matmul
*/
#}
{% macro setupMatmul(matmulKey) %}
{% if matmulKey in matmulConfigs %}
{% with %}
{% set conf = matmulConfigs[matmulKey] %}
{% if conf.precision == "DP" %}
{% set fpFormat = "double" %}
{% else %}
{% set fpFormat = "float" %}
{% endif %}
{#
// Eigen case
#}
{% if useEigen %}
// setup Map for {{conf.baseroutinename}}
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> > {{conf.baseroutinename}}_A_map(nullptr);
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.K}},{{conf.N}}>, Eigen::Aligned, Eigen::OuterStride<{{conf.LDB}}> > {{conf.baseroutinename}}_B_map(nullptr); // assume B is aligned
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.M}},{{conf.N}}>, Eigen::{{"Aligned"if conf.alignment_C == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDC}}> > {{conf.baseroutinename}}_C_map(nullptr);
{% endif %}
{% endwith %}
{% endif %}{# matmulKey in matmulConfigs #}
{% endmacro %}
{#