Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit b9df90b9 authored by Alessio Netti's avatar Alessio Netti
Browse files

Analytics: critical temperature threshold in Cooling Control plugin

parent 0805217f
......@@ -1010,6 +1010,7 @@ Sensors in the Cooling Control plugin support the following parameters:
| Value | Explanation |
|:----- |:----------- |
| hotThreshold | Threshold value for the sensor to be considered _hot_. It must have the same scale as the readings of the sensor itself. Defaults to 70.
| critThreshold | Additional threshold value higher than _hotThreshold_. If a component reaches this threshold it is considered to be in a critical state (e.g., subject to thermal throttling) and hence a decrease in inlet temperature is triggered regardless of the current number of _hot_ nodes. Disabled by default.
Finally, the plugin supports the following REST API actions:
......
......@@ -39,6 +39,8 @@ void CoolingControlConfigurator::sensorBase(CoolingControlSensorBase& s, CFG_VAL
{
if (boost::iequals(val.first, "hotThreshold")) {
s.setHotThreshold(std::stoull(val.second.data()));
} else if (boost::iequals(val.first, "critThreshold")) {
s.setCriticalThreshold(std::stoull(val.second.data()));
}
}
}
......
......@@ -152,7 +152,11 @@ uint64_t CoolingControlOperator::getNumHotNodes(std::vector<std::vector<reading_
if (!readings[idx].empty()) {
bool hotNode = true;
for (const auto &r : readings[idx]) {
if (r.value < (int64_t)unit->getInputs()[idx]->getHotThreshold()) {
// If a single reading in a single component exceeds it critical temperature,
// we immediately trigger a steep cooling temperature decrease by counting all components as hot
if (unit->getInputs()[idx]->getCriticalThreshold() != 0 && r.value >= (int64_t)unit->getInputs()[idx]->getCriticalThreshold()) {
return readings.size();
} else if (r.value < (int64_t)unit->getInputs()[idx]->getHotThreshold()) {
hotNode = false;
break;
}
......
......@@ -49,28 +49,36 @@ public:
// Constructor and destructor
CoolingControlSensorBase(const std::string& name) : SNMPSensorBase(name) {
_hotThreshold = 70;
_critThreshold = 0;
}
// Copy constructor
CoolingControlSensorBase(CoolingControlSensorBase& other) : SNMPSensorBase(other) {
_hotThreshold = other._hotThreshold;
_critThreshold = other._critThreshold;
}
virtual ~CoolingControlSensorBase() {}
void setHotThreshold(uint64_t t) { _hotThreshold = t; }
void setHotThreshold(uint64_t t) { _hotThreshold = t; }
void setCriticalThreshold(uint64_t t) { _critThreshold = t; }
uint64_t getHotThreshold() { return _hotThreshold; }
uint64_t getHotThreshold() { return _hotThreshold; }
uint64_t getCriticalThreshold() { return _critThreshold; }
void printConfig(LOG_LEVEL ll, LOGGER& lg, unsigned leadingSpaces=16) {
SNMPSensorBase::printConfig(ll, lg, leadingSpaces);
std::string leading(leadingSpaces, ' ');
LOG_VAR(ll) << leading << " Hot Threshold: " << _hotThreshold;
if( _critThreshold != 0 ) {
LOG_VAR(ll) << leading << " Crit Threshold: " << _critThreshold;
}
}
protected:
uint64_t _hotThreshold;
uint64_t _critThreshold;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment