In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 40431896 authored by Carla Guillen Carias's avatar Carla Guillen Carias

Adding new derived metrics

parent 5295027d
......@@ -161,3 +161,46 @@ bool calculateFrequency(reading_t & unhaltedRef, reading_t & unhaltedClocks,
}
return false; //Division by zero
}
bool calculatePacked128PerSec(reading_t & packedDP128, reading_t & packedSP128,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP128.value * 2 + packedSP128.value * 4)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP128, packedSP128);
}
bool calculatePacked256PerSec(reading_t & packedDP256, reading_t & packedSP256,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP256.value * 4 + packedSP256.value * 8)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP256, packedSP256);
}
bool calculatePacked512PerSec(reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP512.value * 8 + packedSP512.value * 16)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP512, packedSP512);
}
bool calculateSinglePrecisionPerSec(reading_t & scalarSP, reading_t & packedSP128,
reading_t & packedSP256, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s) {
if(!measuring_interval_s) return false;
result.value = (packedSP128.value * 4 + packedSP256.value * 8 + packedSP512.value * 16 + scalarSP.value)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, scalarSP, packedSP128, packedSP256, packedSP512);
}
bool calculateDoublePerSec(reading_t &scalarDB, reading_t & packedDP128,
reading_t & packedDP256, reading_t & packedDP512,
reading_t & result, double scaling_factor, double measuring_interval_s) {
if(!measuring_interval_s) return false;
result.value = (packedDP128.value * 2 + packedDP256.value * 4
+ packedDP512.value * 8 + scalarDB.value)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, scalarDB, packedDP128, packedDP256, packedDP512);
}
......@@ -70,6 +70,23 @@ bool calculateSP_TO_TOTAL_RATIO(reading_t &scalarDB, reading_t & scalarSP,
reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor);
bool calculatePacked128PerSec(reading_t & packedDP128, reading_t & packedSP128,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculatePacked256PerSec(reading_t & packedDP256, reading_t & packedSP256,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculatePacked512PerSec(reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateSinglePrecisionPerSec(reading_t & scalarSP, reading_t & packedSP128,
reading_t & packedSP256, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateDoublePerSec(reading_t &scalarDB, reading_t & packedDP128,
reading_t & packedDP256, reading_t & packedDP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateL3HitToL3MissRatio(reading_t & l3_misses, reading_t& l3_load_hits,
reading_t & l3_load_misses, reading_t & result, double scaling_factor);
......
......@@ -84,7 +84,7 @@ SMUCNGPerfConfigurator::SMUCNGPerfConfigurator() : OperatorConfiguratorTemplate(
_metricMap["FLOPS"]=SMUCSensorBase::FLOPS;
_metricMap["PACKED_FLOPS"]=SMUCSensorBase::PACKED_FLOPS;
_metricMap["AVX512_TOVECTORIZED_RATIO"]=SMUCSensorBase::AVX512_TOVECTORIZED_RATIO;
_metricMap["VECTORIZED_RATIO"]=SMUCSensorBase::VECTORIZED_RATIO;
_metricMap["VECTORIZATION_RATIO"]=SMUCSensorBase::VECTORIZATION_RATIO;
_metricMap["SINGLE_PRECISION_TO_TOTAL_RATIO"]=SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO;
_metricMap["EXPENSIVE_INSTRUCTIONS_PER_SECOND"]=SMUCSensorBase::EXPENSIVE_INSTRUCTIONS_PER_SECOND;
_metricMap["INTRA_NODE_LOADIMBALANCE"]=SMUCSensorBase::INTRA_NODE_LOADIMBALANCE;
......@@ -126,6 +126,11 @@ SMUCNGPerfConfigurator::SMUCNGPerfConfigurator() : OperatorConfiguratorTemplate(
_metricMap["IOWRITES_PER_SECOND_PROF"]=SMUCSensorBase::IOWRITES_PER_SECOND_PROF;
_metricMap["IO_BYTES_READ_PER_OP_PROF"]=SMUCSensorBase::IO_BYTES_READ_PER_OP_PROF;
_metricMap["IO_BYTES_WRITE_PER_OP_PROF"]=SMUCSensorBase::IO_BYTES_WRITE_PER_OP_PROF;
_metricMap["PACKED128_FLOPS"] = SMUCSensorBase::PACKED128_FLOPS;
_metricMap["PACKED256_FLOPS"] = SMUCSensorBase::PACKED256_FLOPS;
_metricMap["PACKED512_FLOPS"] = SMUCSensorBase::PACKED512_FLOPS;
_metricMap["SINGLE_PRECISION_FLOPS"] = SMUCSensorBase::SINGLE_PRECISION_FLOPS;
_metricMap["DOUBLE_PRECISION_FLOPS"] = SMUCSensorBase::DOUBLE_PRECISION_FLOPS;
}
SMUCNGPerfConfigurator::~SMUCNGPerfConfigurator() {
......
......@@ -91,6 +91,9 @@ SMUCNGPerfOperator::SMUCNGPerfOperator(const std::string& name): OperatorTemplat
_profileMetricToMetricIds[SMUCSensorBase::IOWRITES_PER_SECOND_PROF] = {SMUCSensorBase::IOWRITES};
_profileMetricToMetricIds[SMUCSensorBase::IO_BYTES_READ_PER_OP_PROF] = {SMUCSensorBase::IOBYTESREAD, SMUCSensorBase::IOREADS};
_profileMetricToMetricIds[SMUCSensorBase::IO_BYTES_WRITE_PER_OP_PROF] = {SMUCSensorBase::IOBYTESWRITE, SMUCSensorBase::IOWRITES};
_flop_metric = {SMUCSensorBase::FLOPS, SMUCSensorBase::PACKED_FLOPS, SMUCSensorBase::AVX512_TOVECTORIZED_RATIO,
SMUCSensorBase::VECTORIZATION_RATIO, SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO};
}
SMUCNGPerfOperator::~SMUCNGPerfOperator() {
......@@ -135,9 +138,7 @@ void SMUCNGPerfOperator::compute(U_Ptr unit) {
}
if (outSensor->getMetric() == SMUCSensorBase::FREQUENCY) {
computeFREQUENCY(inputs, outSensor, timestamp);
} else if (outSensor->getMetric() == SMUCSensorBase::FLOPS || outSensor->getMetric() == SMUCSensorBase::PACKED_FLOPS ||
outSensor->getMetric() == SMUCSensorBase::AVX512_TOVECTORIZED_RATIO || outSensor->getMetric() == SMUCSensorBase::VECTORIZED_RATIO ||
outSensor->getMetric() == SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO) {
} else if (_flop_metric.find(outSensor->getMetric()) != _flop_metric.end()) {
computeFLOPS(inputs, outSensor, timestamp);
} else if (outSensor->getMetric() == SMUCSensorBase::L3HIT_TO_L3MISS_RATIO ){
computeL3HIT_TO_L3MISS_RATIO(inputs, outSensor, timestamp);
......@@ -303,36 +304,77 @@ void SMUCNGPerfOperator::computeFLOPS(std::vector<SMUCNGPtr>& inputs, SMUCNGPtr&
reading_t & packed512_single = fp_arith_512b_packed_single.size() > 0 ? fp_arith_512b_packed_single[0] : empty;
reading_t result;
if(flop_metric == SMUCSensorBase::FLOPS) {
if (calculateFlopsPerSec(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s) ) {
outSensor->storeReading(result);
}
} else if(flop_metric == SMUCSensorBase::PACKED_FLOPS){
if (calculatePackedFlopsPerSec(packed128_double, packed128_single,
packed256_double, packed256_single, packed512_double,
packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
} else if(flop_metric == SMUCSensorBase::VECTORIZED_RATIO) {
if(calculateVectorizationRatio(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
} else if (flop_metric == SMUCSensorBase::AVX512_TOVECTORIZED_RATIO) {
if (calculateAVX512FlopsToVectorizedRatio(packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
} else if (flop_metric == SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO) {
if(calculateSP_TO_TOTAL_RATIO(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())){
outSensor->storeReading(result);
}
switch (flop_metric) {
case SMUCSensorBase::FLOPS:
if (calculateFlopsPerSec(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s) ) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED_FLOPS:
if (calculatePackedFlopsPerSec(packed128_double, packed128_single,
packed256_double, packed256_single, packed512_double,
packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::VECTORIZATION_RATIO:
if(calculateVectorizationRatio(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::AVX512_TOVECTORIZED_RATIO:
if (calculateAVX512FlopsToVectorizedRatio(packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO:
if(calculateSP_TO_TOTAL_RATIO(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED128_FLOPS:
if(calculatePacked128PerSec(packed128_double, packed128_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED256_FLOPS:
if(calculatePacked256PerSec(packed256_double, packed256_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED512_FLOPS:
if (calculatePacked512PerSec(packed512_double, packed512_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::SINGLE_PRECISION_FLOPS:
if (calculateSinglePrecisionPerSec(scalar_single, packed128_single, packed256_single, packed512_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::DOUBLE_PRECISION_FLOPS:
if (calculateDoublePerSec(scalar_double, packed128_double,
packed256_double, packed512_double, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
default:
//no default...
LOG(error) << "Flop metric " << flop_metric << " not implemented.";
break;
}
}
......
......@@ -32,6 +32,7 @@
#include "SMUCSensorBase.h"
#include "SKXPMUMetrics.h"
#include <map>
#include <set>
class SMUCNGPerfOperator: virtual public OperatorTemplate<SMUCSensorBase>{
public:
......@@ -57,6 +58,7 @@ protected:
std::map<SMUCSensorBase::Metric_t, SMUCSensorBase::Metric_t> _metricPerSecToId;
std::map<SMUCSensorBase::Metric_t, std::pair<SMUCSensorBase::Metric_t, SMUCSensorBase::Metric_t>> _metricRatioToPair;
std::map<SMUCSensorBase::Metric_t, std::vector<SMUCSensorBase::Metric_t>> _profileMetricToMetricIds;
std::set<SMUCSensorBase::Metric_t> _flop_metric;
vector<vector<reading_t>> _buffers;
const unsigned int MAX_FREQ_MHZ = 2700;
const unsigned int MIN_FREQ_MHZ = 1200;
......
......@@ -87,7 +87,7 @@ public:
FLOPS=53,
PACKED_FLOPS=54,
AVX512_TOVECTORIZED_RATIO=55, //AVX512/(TOTAL VECTORIZED)
VECTORIZED_RATIO=56, //(TOTAL VECTORIZED)/(ALL FLOPS)
VECTORIZATION_RATIO=56, //(TOTAL VECTORIZED)/(ALL FLOPS)
SINGLE_PRECISION_TO_TOTAL_RATIO=57, //Flops
EXPENSIVE_INSTRUCTIONS_PER_SECOND=58,
INTRA_NODE_LOADIMBALANCE=59,
......@@ -129,6 +129,11 @@ public:
IOWRITES_PER_SECOND_PROF=95,
IO_BYTES_READ_PER_OP_PROF=96,
IO_BYTES_WRITE_PER_OP_PROF=97,
PACKED128_FLOPS=98,
PACKED256_FLOPS=99,
PACKED512_FLOPS=100,
SINGLE_PRECISION_FLOPS=101,
DOUBLE_PRECISION_FLOPS=102,
NONE
};
public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment