Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

Commit 40431896 authored by Carla Guillen Carias's avatar Carla Guillen Carias
Browse files

Adding new derived metrics

parent 5295027d
......@@ -161,3 +161,46 @@ bool calculateFrequency(reading_t & unhaltedRef, reading_t & unhaltedClocks,
}
return false; //Division by zero
}
bool calculatePacked128PerSec(reading_t & packedDP128, reading_t & packedSP128,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP128.value * 2 + packedSP128.value * 4)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP128, packedSP128);
}
bool calculatePacked256PerSec(reading_t & packedDP256, reading_t & packedSP256,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP256.value * 4 + packedSP256.value * 8)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP256, packedSP256);
}
bool calculatePacked512PerSec(reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s){
if(!measuring_interval_s) return false;
result.value = (packedDP512.value * 8 + packedSP512.value * 16)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, packedDP512, packedSP512);
}
bool calculateSinglePrecisionPerSec(reading_t & scalarSP, reading_t & packedSP128,
reading_t & packedSP256, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s) {
if(!measuring_interval_s) return false;
result.value = (packedSP128.value * 4 + packedSP256.value * 8 + packedSP512.value * 16 + scalarSP.value)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, scalarSP, packedSP128, packedSP256, packedSP512);
}
bool calculateDoublePerSec(reading_t &scalarDB, reading_t & packedDP128,
reading_t & packedDP256, reading_t & packedDP512,
reading_t & result, double scaling_factor, double measuring_interval_s) {
if(!measuring_interval_s) return false;
result.value = (packedDP128.value * 2 + packedDP256.value * 4
+ packedDP512.value * 8 + scalarDB.value)/(scaling_factor * measuring_interval_s);
return getTimestampFromReadings(result.timestamp, scalarDB, packedDP128, packedDP256, packedDP512);
}
......@@ -70,6 +70,23 @@ bool calculateSP_TO_TOTAL_RATIO(reading_t &scalarDB, reading_t & scalarSP,
reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor);
bool calculatePacked128PerSec(reading_t & packedDP128, reading_t & packedSP128,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculatePacked256PerSec(reading_t & packedDP256, reading_t & packedSP256,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculatePacked512PerSec(reading_t & packedDP512, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateSinglePrecisionPerSec(reading_t & scalarSP, reading_t & packedSP128,
reading_t & packedSP256, reading_t & packedSP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateDoublePerSec(reading_t &scalarDB, reading_t & packedDP128,
reading_t & packedDP256, reading_t & packedDP512,
reading_t & result, double scaling_factor, double measuring_interval_s);
bool calculateL3HitToL3MissRatio(reading_t & l3_misses, reading_t& l3_load_hits,
reading_t & l3_load_misses, reading_t & result, double scaling_factor);
......
......@@ -84,7 +84,7 @@ SMUCNGPerfConfigurator::SMUCNGPerfConfigurator() : OperatorConfiguratorTemplate(
_metricMap["FLOPS"]=SMUCSensorBase::FLOPS;
_metricMap["PACKED_FLOPS"]=SMUCSensorBase::PACKED_FLOPS;
_metricMap["AVX512_TOVECTORIZED_RATIO"]=SMUCSensorBase::AVX512_TOVECTORIZED_RATIO;
_metricMap["VECTORIZED_RATIO"]=SMUCSensorBase::VECTORIZED_RATIO;
_metricMap["VECTORIZATION_RATIO"]=SMUCSensorBase::VECTORIZATION_RATIO;
_metricMap["SINGLE_PRECISION_TO_TOTAL_RATIO"]=SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO;
_metricMap["EXPENSIVE_INSTRUCTIONS_PER_SECOND"]=SMUCSensorBase::EXPENSIVE_INSTRUCTIONS_PER_SECOND;
_metricMap["INTRA_NODE_LOADIMBALANCE"]=SMUCSensorBase::INTRA_NODE_LOADIMBALANCE;
......@@ -126,6 +126,11 @@ SMUCNGPerfConfigurator::SMUCNGPerfConfigurator() : OperatorConfiguratorTemplate(
_metricMap["IOWRITES_PER_SECOND_PROF"]=SMUCSensorBase::IOWRITES_PER_SECOND_PROF;
_metricMap["IO_BYTES_READ_PER_OP_PROF"]=SMUCSensorBase::IO_BYTES_READ_PER_OP_PROF;
_metricMap["IO_BYTES_WRITE_PER_OP_PROF"]=SMUCSensorBase::IO_BYTES_WRITE_PER_OP_PROF;
_metricMap["PACKED128_FLOPS"] = SMUCSensorBase::PACKED128_FLOPS;
_metricMap["PACKED256_FLOPS"] = SMUCSensorBase::PACKED256_FLOPS;
_metricMap["PACKED512_FLOPS"] = SMUCSensorBase::PACKED512_FLOPS;
_metricMap["SINGLE_PRECISION_FLOPS"] = SMUCSensorBase::SINGLE_PRECISION_FLOPS;
_metricMap["DOUBLE_PRECISION_FLOPS"] = SMUCSensorBase::DOUBLE_PRECISION_FLOPS;
}
SMUCNGPerfConfigurator::~SMUCNGPerfConfigurator() {
......
......@@ -91,6 +91,9 @@ SMUCNGPerfOperator::SMUCNGPerfOperator(const std::string& name): OperatorTemplat
_profileMetricToMetricIds[SMUCSensorBase::IOWRITES_PER_SECOND_PROF] = {SMUCSensorBase::IOWRITES};
_profileMetricToMetricIds[SMUCSensorBase::IO_BYTES_READ_PER_OP_PROF] = {SMUCSensorBase::IOBYTESREAD, SMUCSensorBase::IOREADS};
_profileMetricToMetricIds[SMUCSensorBase::IO_BYTES_WRITE_PER_OP_PROF] = {SMUCSensorBase::IOBYTESWRITE, SMUCSensorBase::IOWRITES};
_flop_metric = {SMUCSensorBase::FLOPS, SMUCSensorBase::PACKED_FLOPS, SMUCSensorBase::AVX512_TOVECTORIZED_RATIO,
SMUCSensorBase::VECTORIZATION_RATIO, SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO};
}
SMUCNGPerfOperator::~SMUCNGPerfOperator() {
......@@ -135,9 +138,7 @@ void SMUCNGPerfOperator::compute(U_Ptr unit) {
}
if (outSensor->getMetric() == SMUCSensorBase::FREQUENCY) {
computeFREQUENCY(inputs, outSensor, timestamp);
} else if (outSensor->getMetric() == SMUCSensorBase::FLOPS || outSensor->getMetric() == SMUCSensorBase::PACKED_FLOPS ||
outSensor->getMetric() == SMUCSensorBase::AVX512_TOVECTORIZED_RATIO || outSensor->getMetric() == SMUCSensorBase::VECTORIZED_RATIO ||
outSensor->getMetric() == SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO) {
} else if (_flop_metric.find(outSensor->getMetric()) != _flop_metric.end()) {
computeFLOPS(inputs, outSensor, timestamp);
} else if (outSensor->getMetric() == SMUCSensorBase::L3HIT_TO_L3MISS_RATIO ){
computeL3HIT_TO_L3MISS_RATIO(inputs, outSensor, timestamp);
......@@ -303,36 +304,77 @@ void SMUCNGPerfOperator::computeFLOPS(std::vector<SMUCNGPtr>& inputs, SMUCNGPtr&
reading_t & packed512_single = fp_arith_512b_packed_single.size() > 0 ? fp_arith_512b_packed_single[0] : empty;
reading_t result;
if(flop_metric == SMUCSensorBase::FLOPS) {
if (calculateFlopsPerSec(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s) ) {
outSensor->storeReading(result);
}
} else if(flop_metric == SMUCSensorBase::PACKED_FLOPS){
if (calculatePackedFlopsPerSec(packed128_double, packed128_single,
packed256_double, packed256_single, packed512_double,
packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
} else if(flop_metric == SMUCSensorBase::VECTORIZED_RATIO) {
if(calculateVectorizationRatio(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
} else if (flop_metric == SMUCSensorBase::AVX512_TOVECTORIZED_RATIO) {
if (calculateAVX512FlopsToVectorizedRatio(packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
} else if (flop_metric == SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO) {
if(calculateSP_TO_TOTAL_RATIO(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())){
outSensor->storeReading(result);
}
switch (flop_metric) {
case SMUCSensorBase::FLOPS:
if (calculateFlopsPerSec(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s) ) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED_FLOPS:
if (calculatePackedFlopsPerSec(packed128_double, packed128_single,
packed256_double, packed256_single, packed512_double,
packed512_single, result, *outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::VECTORIZATION_RATIO:
if(calculateVectorizationRatio(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::AVX512_TOVECTORIZED_RATIO:
if (calculateAVX512FlopsToVectorizedRatio(packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::SINGLE_PRECISION_TO_TOTAL_RATIO:
if(calculateSP_TO_TOTAL_RATIO(scalar_double, scalar_single, packed128_double,
packed128_single, packed256_double, packed256_single,
packed512_double, packed512_single, result, *outSensor->getMetadata()->getScale())){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED128_FLOPS:
if(calculatePacked128PerSec(packed128_double, packed128_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED256_FLOPS:
if(calculatePacked256PerSec(packed256_double, packed256_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)){
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::PACKED512_FLOPS:
if (calculatePacked512PerSec(packed512_double, packed512_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::SINGLE_PRECISION_FLOPS:
if (calculateSinglePrecisionPerSec(scalar_single, packed128_single, packed256_single, packed512_single, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
case SMUCSensorBase::DOUBLE_PRECISION_FLOPS:
if (calculateDoublePerSec(scalar_double, packed128_double,
packed256_double, packed512_double, result,
*outSensor->getMetadata()->getScale(), _measuring_interval_s)) {
outSensor->storeReading(result);
}
break;
default:
//no default...
LOG(error) << "Flop metric " << flop_metric << " not implemented.";
break;
}
}
......
......@@ -32,6 +32,7 @@
#include "SMUCSensorBase.h"
#include "SKXPMUMetrics.h"
#include <map>
#include <set>
class SMUCNGPerfOperator: virtual public OperatorTemplate<SMUCSensorBase>{
public:
......@@ -57,6 +58,7 @@ protected:
std::map<SMUCSensorBase::Metric_t, SMUCSensorBase::Metric_t> _metricPerSecToId;
std::map<SMUCSensorBase::Metric_t, std::pair<SMUCSensorBase::Metric_t, SMUCSensorBase::Metric_t>> _metricRatioToPair;
std::map<SMUCSensorBase::Metric_t, std::vector<SMUCSensorBase::Metric_t>> _profileMetricToMetricIds;
std::set<SMUCSensorBase::Metric_t> _flop_metric;
vector<vector<reading_t>> _buffers;
const unsigned int MAX_FREQ_MHZ = 2700;
const unsigned int MIN_FREQ_MHZ = 1200;
......
......@@ -87,7 +87,7 @@ public:
FLOPS=53,
PACKED_FLOPS=54,
AVX512_TOVECTORIZED_RATIO=55, //AVX512/(TOTAL VECTORIZED)
VECTORIZED_RATIO=56, //(TOTAL VECTORIZED)/(ALL FLOPS)
VECTORIZATION_RATIO=56, //(TOTAL VECTORIZED)/(ALL FLOPS)
SINGLE_PRECISION_TO_TOTAL_RATIO=57, //Flops
EXPENSIVE_INSTRUCTIONS_PER_SECOND=58,
INTRA_NODE_LOADIMBALANCE=59,
......@@ -129,6 +129,11 @@ public:
IOWRITES_PER_SECOND_PROF=95,
IO_BYTES_READ_PER_OP_PROF=96,
IO_BYTES_WRITE_PER_OP_PROF=97,
PACKED128_FLOPS=98,
PACKED256_FLOPS=99,
PACKED512_FLOPS=100,
SINGLE_PRECISION_FLOPS=101,
DOUBLE_PRECISION_FLOPS=102,
NONE
};
public:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment