In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit bca1fd3d authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard

Merge branch 'jm/eigen' into 'master'

Jm/eigen

See merge request exahype/ExaHyPE-Engine!39
parents 3dea4387 f059aa80
......@@ -26,3 +26,6 @@
[submodule "Submodules/six"]
path = Submodules/six
url = https://github.com/benjaminp/six.git
[submodule "Submodules/eigen"]
path = Submodules/eigen
url = https://gitlab.com/libeigen/eigen.git
......@@ -38,13 +38,16 @@ class Configuration:
# path to the gemm generator from this file
pathToLibxsmmGemmGenerator = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "libxsmm", "bin", "libxsmm_gemm_generator"))
# path to eigen, will be symlinked into the kernels directory if eigen is used (see matmulLib)
pathToEigen = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "eigen"))
# path to jinja2
pathToJinja2 = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "jinja", "src"))
# path to markupsafe
pathToMarkupsafe = os.path.abspath(os.path.join(pathToExaHyPERoot, "Submodules", "markupsafe", "src"))
# simd size of the accepted architectures
simdWidth = { "noarch" : 1,
"wsm" : 2,
......@@ -55,9 +58,11 @@ class Configuration:
"skx" : 8
}
# set to false to use standard loops instead of libxsmm
useLibxsmm = True;
# choose the BLAS library for the matmul: "None" (= C++ loops), "Libxsmm" or "Eigen"
matmulLib = "Libxsmm";
#matmulLib = "Eigen";
#matmulLib = "None";
# set to true to print models runtime
runtimeDebug = False;
......
......@@ -67,7 +67,8 @@ class Controller:
"codeNamespace" : args["namespace"],
"tempVarsOnStack" : args["tempVarsOnStack"],
"architecture" : args["architecture"],
"useLibxsmm" : Configuration.useLibxsmm,
"useLibxsmm" : Configuration.matmulLib == "Libxsmm",
"useEigen" : Configuration.matmulLib == "Eigen",
"pathToLibxsmmGemmGenerator" : Configuration.pathToLibxsmmGemmGenerator,
"runtimeDebug" : Configuration.runtimeDebug #for debug
}
......@@ -209,12 +210,15 @@ class Controller:
if exception.errno != errno.EEXIST:
raise
# remove all .cpp, .cpph, .c and .h files (we are in append mode!)
# remove all .cpp, .cpph, .c and .h files (we are in append mode!) as well as previous symlink (see symlinkBLASlib())
for fileName in os.listdir(self.config["pathToOutputDirectory"]):
_ , ext = os.path.splitext(fileName)
if(ext in [".cpp", ".cpph", ".c", ".h"]):
if ext in [".cpp", ".cpph", ".c", ".h"] or fileName in ["Eigen"]:
os.remove(self.config["pathToOutputDirectory"] + "/" + fileName)
# Symlink the BLAS library if needed
self.symlinkBLASlib()
# run the models new files
self.runModel( "kernelsHeader", kernelsHeaderModel.KernelsHeaderModel(self.baseContext))
......@@ -287,3 +291,7 @@ class Controller:
" " + matmul.precision
bashCommand = self.config["pathToLibxsmmGemmGenerator"] + commandLineArguments
subprocess.call(bashCommand.split())
def symlinkBLASlib(self):
if self.config["useEigen"]:
os.symlink(os.path.join(Configuration.pathToEigen, "Eigen"), os.path.join(self.config["pathToOutputDirectory"], "Eigen"))
......@@ -18,9 +18,7 @@
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
// local help function
inline int powOf3(int exp){
......@@ -67,8 +65,8 @@ void {{codeNamespace}}::faceUnknownsProlongation(
{% endif %}
// read only input, start with the function input = coarse
const double* inputQ = lQhbndCoarse;
const double* inputF = lFhbndCoarse;
double* inputQ = const_cast<double*>(lQhbndCoarse);
double* inputF = const_cast<double*>(lFhbndCoarse);
// output pointer, ensures that the output of the last iteration points to the function output
double* outputQ;
......@@ -90,6 +88,13 @@ void {{codeNamespace}}::faceUnknownsProlongation(
int subintervalIndex_1;
{% endif %}
{{ m.setupMatmul('face_Q_x')| indent(2) }}{##}
{{ m.setupMatmul('face_F_x')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('face_Q_y')| indent(2) }}{##}
{{ m.setupMatmul('face_F_y')| indent(2) }}{##}
{% endif %}
// This loop decodes the elements of subfaceIndex into a tertiary basis
// starting with the highest significance 3^(levelDelta-1).
//
......@@ -170,7 +175,7 @@ void {{codeNamespace}}::faceFluxRestriction(
{% endif %}
// read only input, start with the function input = fine
const double* inputF = lFhbndFine;
double* inputF = const_cast<double*>(lFhbndFine);
// output pointer, ensures that the output of the last iteration points to the function output
double* outputF;
......@@ -186,6 +191,11 @@ void {{codeNamespace}}::faceFluxRestriction(
int subfaceIndexCurrent_1 = subfaceIndex[1];
int subintervalIndex_1;
{% endif %}
{{ m.setupMatmul('face_F_x')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('face_F_y')| indent(2) }}{##}
{% endif %}
// This loop decodes the indices of subfaceIndex into a tertiary basis
// starting with the lowest significance 3^0 (in contrast to the prolongation loop).
......@@ -248,7 +258,7 @@ void {{codeNamespace}}::volumeUnknownsProlongation(
{% endif %}
// read only input, start with the function input = fine
const double* inputLuh = luhCoarse;
double* inputLuh = const_cast<double*>(luhCoarse);
// output pointer, ensures that the output of the last iteration points to the function output
double* outputLuh;
......@@ -270,6 +280,12 @@ void {{codeNamespace}}::volumeUnknownsProlongation(
int subintervalIndex_2;
{% endif %}
{{ m.setupMatmul('volume_x')| indent(2) }}{##}
{{ m.setupMatmul('volume_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('volume_z')| indent(2) }}{##}
{% endif %}
// This loop step by step decodes the elements of subcellIndex into a tertiary basis
// starting with the highest significance 3^(levelDelta-1).
//
......@@ -343,7 +359,7 @@ void {{codeNamespace}}::volumeUnknownsRestriction(
const int levelDelta = fineGridLevel - coarseGridLevel;
// read only input, start with the function input = fine
const double* inputLuh = luhFine;
double* inputLuh = const_cast<double*>(luhFine);
int subintervalIndex_0 = subcellIndex[0];
int subintervalIndex_1 = subcellIndex[1];
{% if nDim==3 %}
......@@ -359,6 +375,14 @@ void {{codeNamespace}}::volumeUnknownsRestriction(
double* tmpLuh2; //allocated and freed only if levelDelta > 1
{% endif %}
{{ m.setupMatmul('volume_x')| indent(2) }}{##}
{{ m.setupMatmul('volume_y')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('volume_y_add')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('volume_z')| indent(2) }}{##}
{{ m.setupMatmul('volume_z_add')| indent(2) }}{##}
{% endif %}
if(levelDelta > 1) {
{{m.allocateArray('tmpLuh', nDof3D*nDof*nDof*nDataPad, pointerExists=True ) | indent(2)}}{##}
......
......@@ -18,9 +18,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -54,6 +53,13 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
const double dt,
std::vector<int>* pointSources // will be deleted in the end if set
) {
{{ m.setupMatmul('flux_x') | indent(2) }}{##}
{{ m.setupMatmul('flux_y') | indent(2) }}{##}
{{ m.setupMatmul('flux_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
const double invDt = 1. / dt;
const double invDx = 1. / dx;
......
......@@ -28,9 +28,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -84,6 +83,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
__assume_aligned(iweights3,ALIGNMENT);
#endif
{{ m.setupMatmul('flux_x_sck') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_sck') | indent(2) }}{##}
{{ m.setupMatmul('flux_z_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_sck') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_sck') | indent(2) }}{##}
const double invDt = 1. / dt;
const double invDx = 1. / dx;
......
......@@ -29,9 +29,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -85,6 +84,14 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral{{nameSuffix}}(
__assume_aligned(iweights3,ALIGNMENT);
#endif
{{ m.setupMatmul('flux_x_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_or_z_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_y_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('flux_z_sck_vect') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_sck_vect')| indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_sck_vect')| indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_sck_vect')| indent(2) }}{##}
const double invDt = 1. / dt;
const double invDx = 1. / dx;
......
......@@ -18,9 +18,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -72,9 +71,25 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useViscousFlux %}
__assume_aligned(gradQAvg, ALIGNMENT);
{% endif %}
#endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradF_x_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradF_y_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradF_z_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z_RKLoop') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable
{% if useFluxVect %}
// transposed F slice for flux_vect
......
......@@ -35,9 +35,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -85,9 +84,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useNCP or useViscousFlux %}
__assume_aligned(gradQ, ALIGNMENT);
{% endif %}
#endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable
double new_lQi_slice[{{nDof*nVarPad}}] __attribute__((aligned(ALIGNMENT))); //for step 4 (computing new lQi value), doesn't update parameters
const double inverseDt = 1.0 / dt;
......
......@@ -35,9 +35,8 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/DGMatrices.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
......@@ -89,9 +88,19 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
{% if useNCP or useViscousFlux %}
__assume_aligned(gradQ, ALIGNMENT);
{% endif %}
#endif
{{ m.setupMatmul('rhs_x') | indent(2) }}{##}
{{ m.setupMatmul('rhs_y') | indent(2) }}{##}
{{ m.setupMatmul('rhs_z') | indent(2) }}{##}
{{ m.setupMatmul('lduh_x') | indent(2) }}{##}
{{ m.setupMatmul('lduh_y') | indent(2) }}{##}
{{ m.setupMatmul('lduh_z') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_x') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_y') | indent(2) }}{##}
{{ m.setupMatmul('gradQ_z') | indent(2) }}{##}
{{ m.setupMatmul('lqi') | indent(2) }}{##}
// 0. Allocate local variable
constexpr int MaxIterations = {{2*nDof+1}};
double new_lQi_slice[{{nDof*nVar*nDofPad}}] __attribute__((aligned(ALIGNMENT))) = {0.}; //for step 4 (computing new lQi value), doesn't update parameters
......
......@@ -18,15 +18,19 @@
#include "{{pathToOptKernel}}/Kernels.h"
#include "{{pathToOptKernel}}/Quadrature.h"
{% if useLibxsmm %}
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{{ m.matmulInclude() }}{# include required headers for matmul #}
#include "{{solverHeader}}"
//Fortran (Limiter.f90): GetSubcellData
void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double* const lim) {
{{ m.setupMatmul('dg2fv_x')| indent(2) }}{##}
{{ m.setupMatmul('dg2fv_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('dg2fv_z')| indent(2) }}{##}
{% endif %}
//compact projection without ghostlayer
// x
{{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##}
......@@ -74,6 +78,12 @@ void {{codeNamespace}}::projectOnFVLimiterSpace(const double* const luh, double*
//Fortran (Limiter.f90): PutSubcellData
void {{codeNamespace}}::projectOnDGSpace(const double* const lim, double* const luh) {
{{ m.setupMatmul('fv2dg_x')| indent(2) }}{##}
{{ m.setupMatmul('fv2dg_y')| indent(2) }}{##}
{% if nDim == 3 %}
{{ m.setupMatmul('fv2dg_z')| indent(2) }}{##}
{% endif %}
// x
// ignore and remove ghostlayers
{{m.allocateArray('tmpX', nDofLim3D*nDofLim*nDof*nDataPad) | indent(2)}}{##}
......@@ -238,6 +248,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtGaussLobattoNodes(
double* max
) {
{{ m.setupMatmul('uh2lob_x')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('uh2lob_y_slice')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('uh2lob_y')| indent(2) }}{##}
{{ m.setupMatmul('uh2lob_z_slice')| indent(2) }}{##}
{% endif %}
// x
{{m.allocateArray('tmpX', nDof3D*nDof*nDof*nDataPad) | indent(2)}}{##}
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
......@@ -304,6 +322,14 @@ void {{codeNamespace}}::compareWithADERDGSolutionAtFVSubcellCenters(
double* max
) {
{{ m.setupMatmul('dg2fv_x')| indent(2) }}{##}
{% if nDim == 2 %}
{{ m.setupMatmul('dg2fv_y_slice')| indent(2) }}{##}
{% else %}
{{ m.setupMatmul('dg2fv_y')| indent(2) }}{##}
{{ m.setupMatmul('dg2fv_z_slice')| indent(2) }}{##}
{% endif %}
// x
{{m.allocateArray('tmpX', nDof3D*nDof*nDofLim*nDataPad) | indent(2)}}{##}
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
......
......@@ -44,11 +44,54 @@ _mm_free({{name}});
The gemm config (fetched through matmulKey) contains M, N, K, LDA, LDB, LDC, alpha and beta
*/
#}
{% macro matmul(matmulKey, A, B, C, A_shift, B_shift, C_shift, overrideUseLibxsmm="BoolNotDefined", trueAlpha="", trueB="", forceCoeffMatrix=False) %}
{% macro matmul(matmulKey, A, B, C, A_shift, B_shift, C_shift, trueAlpha="", trueB="", forceCoeffMatrix=False) %}
{% include "subtemplates/matmul.template" %}
{% endmacro %}
{#
/**
Matmul include
*/
#}
{% macro matmulInclude() %}
{% if useEigen %}
// include Eigen for matmul
#include <{{pathToOptKernel}}/Eigen/Dense>
{% endif %}
{% if useLibxsmm %}
// include libxsmms' gemms for matmul
#include "{{pathToOptKernel}}/gemmsCPP.h"
{% endif %}
{% endmacro %}
{#
/**
Setup matmul
*/
#}
{% macro setupMatmul(matmulKey) %}
{% if matmulKey in matmulConfigs %}
{% with %}
{% set conf = matmulConfigs[matmulKey] %}
{% if conf.precision == "DP" %}
{% set fpFormat = "double" %}
{% else %}
{% set fpFormat = "float" %}
{% endif %}
{#
// Eigen case
#}
{% if useEigen %}
// setup Map for {{conf.baseroutinename}}
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> > {{conf.baseroutinename}}_A_map(nullptr);
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.K}},{{conf.N}}>, Eigen::Aligned, Eigen::OuterStride<{{conf.LDB}}> > {{conf.baseroutinename}}_B_map(nullptr); // assume B is aligned
Eigen::Map<Eigen::Matrix<{{"double" if conf.precision == "DP" else "float"}},{{conf.M}},{{conf.N}}>, Eigen::{{"Aligned"if conf.alignment_C == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDC}}> > {{conf.baseroutinename}}_C_map(nullptr);
{% endif %}
{% endwith %}
{% endif %}{# matmulKey in matmulConfigs #}
{% endmacro %}
{#
/**
Allocate stack tmp arrays for vect PDE.
Subarray will be allocated too
......
......@@ -13,7 +13,6 @@
String C_shift : shift to the zero of C
optional
bool overrideUseLibxsmm : force locally useLibxsmm to take this value if set
String trueB : true array B, B must b a true matrix, not a tensor slice
String trueAlpha : true value of the coefficent alpha (note: it will be multiplicated by the configuration alpha, /!\ sign error)
bool forceCoeffMatrix : only when using trueB, trueAlpha, force the no libxsmm case to also generate the coeff matrix
......@@ -37,11 +36,16 @@
{% if forceCoeffMatrix is not defined %}
{% set forceCoeffMatrix = False %}
{% endif %}
{# set arrays' name for pragma by removing eventual index #}
{# set arrays' name for pragma and eigen map by removing eventual index #}
{% set Ap = (A.split("["))[0] %}
{% set Bp = (B.split("["))[0] %}
{% set Cp = (C.split("["))[0] %}
{% set trueBp = (trueB.split("["))[0] %}
{% if conf.precision == "DP" %}
{% set fpFormat = "double" %}
{% else %}
{% set fpFormat = "float" %}
{% endif %}
{# /********************
**** Subtemplate ****
*********************/ #}
......@@ -51,30 +55,51 @@
//-------------
#}
{% if overrideUseLibxsmm %}
{% if useLibxsmm %}
{% if useTrueB %}{# will set B[it] to be trueAlpha*trueB[it] #}
double {{B}}[{{conf.LDB*conf.K}}] __attribute__((aligned(ALIGNMENT)));
{{fpFormat}} {{B}}[{{conf.LDB*conf.K}}] __attribute__((aligned(ALIGNMENT)));
#pragma omp simd aligned({{Bp}},{{trueBp}}:ALIGNMENT)
for (int it = 0; it < {{conf.LDB*conf.K}}; it++) {
{{B}}[it] = {{trueAlpha}} * {{trueB}}[it];
}
#if defined(USE_IPO) && !defined(UNSAFE_IPO)
volatile double doNotOptimizeAway_{{B}} = {{B}}[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile {{fpFormat}} doNotOptimizeAway_{{B}} = {{B}}[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
#endif
{% endif %}{# useTrueB #}
#ifdef USE_IPO
#pragma forceinline
#endif
{{conf.baseroutinename}}({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}, {{B}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}, {{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %});
{% else %}{# overrideUseLibxsmm #}
{#
// No LIBXSMM case
//----------------
// Eigen case
//-----------
#}
{% elif useEigen %}
{# old direct mapper #}
{#{
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> > {{Ap}}_m({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}); //A = {{A}}
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.K}},{{conf.N}}>, Eigen::Aligned, Eigen::OuterStride<{{conf.LDB}}> > {{Bp}}_m({{trueB}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}); //B = {{B}}, assume it is aligned
Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.N}}>, Eigen::{{"Aligned"if conf.alignment_C == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDC}}> > {{Cp}}_m({{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %}); //C = {{C}}
{{Cp}}_m.noalias() {{ '+' if conf.beta == 1 }}= {{ '-' if conf.alpha == -1 }}{{Ap}}_m * {{Bp}}_m {% if (useTrueB and not forceCoeffMatrix) %}* {{trueAlpha}}{% endif %};
}#}
#pragma forceinline recursive
{
new (&{{conf.baseroutinename}}_A_map) Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.K}}>, Eigen::{{"Aligned"if conf.alignment_A == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDA}}> >({{A}}{% if A_shift != '0' %}+{{A_shift}}{% endif %}); //{{conf.baseroutinename}}_A_map = {{A}}
new (&{{conf.baseroutinename}}_B_map) Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.K}},{{conf.N}}>, Eigen::Aligned, Eigen::OuterStride<{{conf.LDB}}> >({{trueB}}{% if B_shift != '0' %}+{{B_shift}}{% endif %}); //{{conf.baseroutinename}}_B_map = {{trueB}}, assume it is aligned
new (&{{conf.baseroutinename}}_C_map) Eigen::Map<Eigen::Matrix<{{fpFormat}},{{conf.M}},{{conf.N}}>, Eigen::{{"Aligned"if conf.alignment_C == 1 else "Unaligned"}}, Eigen::OuterStride<{{conf.LDC}}> >({{C}}{% if C_shift != '0' %}+{{C_shift}}{% endif %}); //{{conf.baseroutinename}}_C_map = {{C}}
{{conf.baseroutinename}}_C_map.noalias() {{ '+' if conf.beta == 1 }}= {{ '-1. * ' if conf.alpha == -1 }}{{conf.baseroutinename}}_A_map * {{conf.baseroutinename}}_B_map{% if (useTrueB and not forceCoeffMatrix) %} * {{trueAlpha}}{% endif %};
}
{#
// No BLAS case
//-------------
#}
{% else %}{# no BLAS library #}
{% if forceCoeffMatrix %}
double {{B}}[{{conf.LDB*conf.K}}] __attribute__((aligned(ALIGNMENT)));
{{fpFormat}} {{B}}[{{conf.LDB*conf.K}}] __attribute__((aligned(ALIGNMENT)));
#pragma omp simd aligned({{Bp}},{{trueBp}}:ALIGNMENT)
for (int it = 0; it < {{conf.LDB*conf.K}}; it++) {
{{B}}[it] = {{trueAlpha}} * {{trueB}}[it];
......@@ -98,5 +123,5 @@ for (int it_1 = 0; it_1 < {{conf.N}}; it_1++) {
}
}
}
{% endif %}
{% endif %}{# end choice of BLAS lib #}
{% endwith %}
\ No newline at end of file
Subproject commit dcf7655b3d469a399c1182f350c9009e13ad8654
Subproject commit 724205cb0e2005399159ae23d880819f2fc2380f
Subproject commit 9b7bbb95c86939d679494db5560f4b35b7aaa1c8
......@@ -53,6 +53,27 @@ update_Peano() {
}
update_others() {
#eigen
if [ ! -d eigen ]; then
mkdir eigen
fi
if [ ! -f eigen/.git ]; then
echo "Initialize eigen submodule"
cd "$pathToTopLevel" # move to the top level (required for git version below 1.8.4)
git submodule update --init Submodules/eigen
cd "$scriptDir" #move back
else
echo "Update eigen submodule"
if [ "$toNewest" = true ]; then
cd eigen
git pull origin master
cd ..
else
cd "$pathToTopLevel" # move to the top level (required for git version below 1.8.4)
git submodule update Submodules/eigen
cd "$scriptDir" #move back
fi
fi
#Jinja2
if [ ! -d jinja ]; then
mkdir jinja
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment