In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 2ff25f0b authored by Jean-Matthieu Gallard's avatar Jean-Matthieu Gallard

Merge branch 'jm/mixedPicard' into 'master'

Jm/mixed picard

See merge request exahype/ExaHyPE-Engine!38
parents 74557523 154792b2
......@@ -97,7 +97,9 @@ class Controller:
"useCERKGuess" : args["useCERKGuess"],
"useSplitCK" : args["useSplitCK"],
"useVectPDE" : args["useVectPDE"],
"predictorRecompute" : args["predictorRecompute"]
"predictorRecompute" : args["predictorRecompute"],
"initialGuess" : "mixedPicard" #TODO JMG put as proper toolkit arg
#"initialGuess" : "default" #TODO JMG put as proper toolkit arg
})
self.config["useSourceOrNCP"] = self.config["useSource"] or self.config["useNCP"]
elif self.config["kernelType"] == "limiter":
......@@ -265,7 +267,7 @@ class Controller:
"""Generate the gemms with the given config list using LIBXSMM"""
for matmul in matmulConfigList:
# add the gemm name to the list of generated gemm
self.gemmList.append(matmul.baseroutinename)
self.gemmList.append((matmul.baseroutinename, matmul.precision))
# for plain assembly code (rather than inline assembly) choose dense_asm
commandLineArguments = " " + "dense" + \
" " + os.path.join(self.config["pathToOutputDirectory"], outputFileName) + \
......@@ -282,6 +284,6 @@ class Controller:
" " + str(matmul.alignment_C) + \
" " + self.config["architecture"] + \
" " + matmul.prefetchStrategy + \
" " + "DP" #always use double precision, "SP" for single
" " + matmul.precision
bashCommand = self.config["pathToLibxsmmGemmGenerator"] + commandLineArguments
subprocess.call(bashCommand.split())
......@@ -93,16 +93,16 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
const double inverseDt = 1.0 / dt;
const double dtBydx = inverseDx * dt; //Assume dx[0] == dx[1] == dx[2]
{% if useNCP or (useFlux and useCERKGuess) or useViscousFlux %}
double dudxT_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_T_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
// 0. precompute 1/dx * dudx_T. Assume dx[0] == dx[1] == dx[2]
#pragma omp simd aligned(dudxT_by_dx,dudx_T:ALIGNMENT)
#pragma omp simd aligned(dudx_T_by_dx,dudx_T:ALIGNMENT)
for(int it=0;it<{{nDof*nDofPad}};it++) {
dudxT_by_dx[it] = inverseDx * dudx_T[it];
dudx_T_by_dx[it] = inverseDx * dudx_T[it];
}
{% if useLibxsmm %}
#if defined(USE_IPO) && ! defined(UNSAFE_IPO)
volatile double doNotOptimizeAway_dudx_by_dt = dudxT_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_by_dt = dudx_T_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
#endif
{% endif %}
{% endif %}
......@@ -227,20 +227,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_x', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
}
{% endif %}
{% endif %}{# useNCP or useViscousFlux #}
......@@ -406,20 +406,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_x', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(8) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(10) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(8) }}{##}
}
{% endif %}
{% endif %}
......@@ -623,20 +623,20 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// Compute the "derivatives" (contributions of the stiffness matrix)
// x direction (independent from the y and z derivatives)
for (int zy = 0; zy < {{nDof3D*nDof}}; zy++) {
{{ m.matmul('gradQ_x', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_x', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,zy,0,0), '0', idxGradQ(0,0,zy,0,0)) | indent(4) }}{##}
}
// y direction (independent from the x and z derivatives)
for (int z = 0; z < {{nDof3D}}; z++) {
for (int x = 0; x < {{nDof}}; x++) {
{{ m.matmul('gradQ_y', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(6) }}{##}
{{ m.matmul('gradQ_y', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(z,0,x,0), '0', idxGradQ(1,z,0,x,0)) | indent(6) }}{##}
}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives)
for (int yx = 0; yx < {{nDof*nDof}}; yx++) {
{{ m.matmul('gradQ_z', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_z', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,0,yx,0), '0', idxGradQ(2,0,0,yx,0)) | indent(4) }}{##}
}
{% endif %}
......
......@@ -75,6 +75,8 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
__assume_aligned(lFhi, ALIGNMENT);
{% endif %}
__assume_aligned(FLCoeff, ALIGNMENT); // == F0
__assume_aligned(dudx, ALIGNMENT);
__assume_aligned(dudx_T, ALIGNMENT);
__assume_aligned(Kxi, ALIGNMENT);
__assume_aligned(Kxi_T, ALIGNMENT);
__assume_aligned(iK1_T, ALIGNMENT);
......@@ -91,22 +93,23 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
#endif
// 0. Allocate local variable
constexpr int MaxIterations = {{2*nDof+1}};
double new_lQi_slice[{{nDof*nVar*nDofPad}}] __attribute__((aligned(ALIGNMENT))) = {0.}; //for step 4 (computing new lQi value), doesn't update parameters
const double inverseDt = 1.0 / dt;
const double dtBydx = inverseDx * dt; //Assume dx[0] == dx[1] == dx[2]
{% if useNCP or (useFlux and useCERKGuess) or useViscousFlux %}
double dudxT_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_T_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
double dudx_by_dx[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
// 0. precompute 1/dx * dudx_T. Assume dx[0] == dx[1] == dx[2]
#pragma omp simd aligned(dudxT_by_dx,dudx_T:ALIGNMENT)
#pragma omp simd aligned(dudx_T_by_dx,dudx_T,dudx:ALIGNMENT)
for(int it=0;it<{{nDof*nDofPad}};it++) {
dudxT_by_dx[it] = inverseDx * dudx_T[it];
dudx_T_by_dx[it] = inverseDx * dudx_T[it];
dudx_by_dx[it] = inverseDx * dudx[it];
}
{% if useLibxsmm %}
#if defined(USE_IPO) && ! defined(UNSAFE_IPO)
volatile double doNotOptimizeAway_dudxT_by_dt = dudxT_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_T_by_dt = dudx_T_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
volatile double doNotOptimizeAway_dudx_by_dt = dudx_by_dx[0]; //used to prevent the compiler from optimizing temp array away. Needs to be volatile
#endif
{% endif %}
......@@ -122,7 +125,7 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
}
double rhsCoeff_T[{{nDof*nDofPad}}] __attribute__((aligned(ALIGNMENT)));
for (int i = 0; i < {{nDof}}; i++) {
#pragma omp simd aligned(rhsCoeff,Kxi,iweights1:ALIGNMENT)
#pragma omp simd aligned(rhsCoeff_T,Kxi_T,iweights1:ALIGNMENT)
for (int j = 0; j < {{nDofPad}}; j++) {
rhsCoeff_T[i*{{nDofPad}}+j] = -inverseDx * Kxi_T[i*{{nDofPad}}+j] * iweights1[j];
}
......@@ -233,7 +236,6 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// 2. Discrete Picard iterations
constexpr int MaxIterations = {% if useCERKGuess %}{% if nDof-3 <= 1 %}1; //cannot be lower than 1{% else %}{{nDof-3}}; //nDof-3{% endif %}{% else %}{{2*nDof+1}};{% endif %}
int iter = 0;
for (; iter < MaxIterations; iter++) {
......@@ -252,12 +254,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(10) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(10) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(8) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(8) }}{##}
{% endif %}
{% endif %}{# useNCP or useViscousFlux #}
......@@ -425,12 +427,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(6) }}{##}
{{ m.matmul('gradQ_y', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(6) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQi', 'dudxT_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_z', 'lQi', 'dudx_T_by_dx', 'gradQ', idxLQi(t,0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(4) }}{##}
{% endif %}
{% endif %}
......@@ -639,12 +641,12 @@ int {{codeNamespace}}::fusedSpaceTimePredictorVolumeIntegral(
// y direction (independent from the x and z derivatives), fuse nx
for (int z = 0; z < {{nDof3D}}; z++) {
{{ m.matmul('gradQ_y', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(4) }}{##}
{{ m.matmul('gradQ_y', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(z,0,0,0), '0', idxGradQ(1,z,0,0,0)) | indent(4) }}{##}
}
{% if nDim==3 %}
// z direction (independent from the x and y derivatives), fuse ynx
{{ m.matmul('gradQ_z', 'lQhi', 'dudxT_by_dx', 'gradQ', idxLQhi(0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(2) }}{##}
{{ m.matmul('gradQ_z', 'lQhi', 'dudx_T_by_dx', 'gradQ', idxLQhi(0,0,0,0), '0', idxGradQ(2,0,0,0,0)) | indent(2) }}{##}
{% endif %}
std::memset(lGradQhbnd, 0, {{2*nDim*nVarPad*nDof*nDof3D*nDim}} * sizeof(double));
......
......@@ -20,8 +20,12 @@
namespace {{namespaceName}} {
{% endfor %}
{% for gemm in gemmList %}
void {{gemm}}(const double* A, const double* B, double* C);
{% for gemm,precision in gemmList %}
{% if precision == "DP" %}
void {{gemm}}(const double* A, const double* B, double* C);
{% else %}
void {{gemm}}(const float* A, const float* B, float* C);
{% endif %}
{% endfor %}
{% for namespaceName in codeNamespaceList %}
......
......@@ -20,8 +20,12 @@
namespace {{namespaceName}} {
{% endfor %}
{% for gemm in gemmList %}
void {{gemm}}(const double* A, const double* B, double* C);
{% for gemm,precision in gemmList %}
{% if precision == "DP" %}
void {{gemm}}(const double* A, const double* B, double* C);
{% else %}
void {{gemm}}(const float* A, const float* B, float* C);
{% endif %}
{% endfor %}
{% for namespaceName in codeNamespaceList %}
......
......@@ -69,8 +69,14 @@ class MatmulConfig:
# prefetching
prefetchStrategy = ""
# precision
precision="DP" # "DP" = double, "SP" = float
# Constructor
def __init__(self, M, N, K, LDA, LDB, LDC, alpha, beta, alignment_A, alignment_C, name, prefetchStrategy, operationType="gemm"):
def __init__(self, M, N, K, LDA, LDB, LDC, alpha, beta, alignment_A, alignment_C, name, prefetchStrategy, operationType="gemm", precision="DP"):
if precision not in ["DP", "SP"]:
print("MatmulConfig: Unknown precision")
exit()
if((M > LDC) or (K > LDB) or (M > LDA)):
print("MatmulConfig: Incompatible matrix sizes and leading dimensions")
exit()
......@@ -95,6 +101,7 @@ class MatmulConfig:
self.name = name
self.prefetchStrategy = prefetchStrategy
self.baseroutinename = operationType+"_"+str(M)+"_"+str(N)+"_"+str(K)+"_"+name
self.precision = precision
def __repr__(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment