Commit c61332cc authored by Michael Ott's avatar Michael Ott
Browse files

Implement retries on IPMI errors, merge openSdrCache() and closeSdrCache() into getSdrRecord()

parent 51c75916
......@@ -21,11 +21,12 @@
#include <freeipmi/freeipmi.h>
#include <freeipmi/api/ipmi-api.h>
#define RETRIES 2
namespace DCDB {
IPMIHost::IPMIHost(const std::string& hostName, uint32_t retransmissionTimeout, uint32_t sessionTimeout) {
_ipmiCtx = NULL;
_sdrCtx = NULL;
_sensorReadCtx = NULL;
_hostName = hostName;
_userName = std::string("admin");
......@@ -90,6 +91,7 @@ int IPMIHost::disconnect() {
}
void IPMIHost::checkConnection() {
/*
if (_ipmiCtx) {
uint64_t ts = getTimestamp();
if ((ts - _lastRead) > MS_TO_NS(_sessionTimeout)) {
......@@ -97,7 +99,7 @@ void IPMIHost::checkConnection() {
disconnect();
}
}
*/
if (!_ipmiCtx) {
try {
connect();
......@@ -108,86 +110,75 @@ void IPMIHost::checkConnection() {
}
}
}
bool IPMIHost::openSdrCache() {
if (_sdrCtx) {
return true;
}
if (!(_sdrCtx = ipmi_sdr_ctx_create())) {
return false;
}
bool IPMIHost::getSdrRecord(uint16_t recordId, std::vector<uint8_t>& record) {
bool ret = false;
try {
checkConnection();
} catch (const std::runtime_error& e) {
increaseErrorCount();
throw e;
return false;
}
std::string errorMsg;
if (ipmi_sdr_cache_open(_sdrCtx, _ipmiCtx, _cache.c_str()) < 0) {
if ((ipmi_sdr_ctx_errnum (_sdrCtx) == IPMI_SDR_ERR_CACHE_READ_CACHE_DOES_NOT_EXIST) || (ipmi_sdr_ctx_errnum (_sdrCtx) == IPMI_SDR_ERR_CACHE_INVALID) || (ipmi_sdr_ctx_errnum (_sdrCtx) == IPMI_SDR_ERR_CACHE_OUT_OF_DATE)) {
if ((ipmi_sdr_ctx_errnum (_sdrCtx) == IPMI_SDR_ERR_CACHE_INVALID) || (ipmi_sdr_ctx_errnum (_sdrCtx) == IPMI_SDR_ERR_CACHE_OUT_OF_DATE)) {
LOG(debug) << "Deleting SDR cache " << _cache;
ipmi_sdr_cache_close(_sdrCtx);
ipmi_sdr_cache_delete(_sdrCtx, _cache.c_str());
}
if (ipmi_sdr_cache_create(_sdrCtx, _ipmiCtx, _cache.c_str(), IPMI_SDR_CACHE_CREATE_FLAGS_DEFAULT, NULL, NULL) == 0) {
LOG(debug) << _hostName << ": Created new SDR cache " << _cache;
ipmi_sdr_cache_open(_sdrCtx, _ipmiCtx, _cache.c_str());
}
}
}
if (ipmi_sdr_ctx_errnum (_sdrCtx) != IPMI_SDR_ERR_SUCCESS) {
errorMsg = ipmi_sdr_ctx_errormsg(_sdrCtx);
} else {
return true;
}
closeSdrCache();
throw std::runtime_error("ipmi_sdr_cache_open Error:" + errorMsg);
return false;
}
void IPMIHost::closeSdrCache() {
if (_sdrCtx) {
ipmi_sdr_cache_close(_sdrCtx);
ipmi_sdr_ctx_destroy(_sdrCtx);
_sdrCtx = NULL;
ipmi_sdr_ctx_t sdrCtx = ipmi_sdr_ctx_create();
if (!sdrCtx) {
return ret;
}
return;
}
bool IPMIHost::getSdrRecord(uint16_t recordId, std::vector<uint8_t>& record) {
if (openSdrCache()) {
int recordLength = 0;
uint8_t recordBuf[IPMI_SDR_MAX_RECORD_LENGTH];
if (ipmi_sdr_cache_search_record_id(_sdrCtx, recordId) < 0) {
increaseErrorCount();
throw std::runtime_error("ipmi_sdr_cache_search_record_id() Error: " + std::string(ipmi_sdr_ctx_errormsg(_sdrCtx)));
closeSdrCache();
return false;
}
std::string errorMsg;
int retries = RETRIES;
while (retries--) {
if (ipmi_sdr_cache_open(sdrCtx, _ipmiCtx, _cache.c_str()) < 0) {
if ((ipmi_sdr_ctx_errnum(sdrCtx) == IPMI_SDR_ERR_CACHE_READ_CACHE_DOES_NOT_EXIST) || (ipmi_sdr_ctx_errnum(sdrCtx) == IPMI_SDR_ERR_CACHE_INVALID) || (ipmi_sdr_ctx_errnum(sdrCtx) == IPMI_SDR_ERR_CACHE_OUT_OF_DATE)) {
if ((ipmi_sdr_ctx_errnum(sdrCtx) == IPMI_SDR_ERR_CACHE_INVALID) || (ipmi_sdr_ctx_errnum(sdrCtx) == IPMI_SDR_ERR_CACHE_OUT_OF_DATE)) {
LOG(debug) << _hostName << "Deleting SDR cache " << _cache;
ipmi_sdr_cache_close(sdrCtx);
ipmi_sdr_cache_delete(sdrCtx, _cache.c_str());
}
if (ipmi_sdr_cache_create(sdrCtx, _ipmiCtx, _cache.c_str(), IPMI_SDR_CACHE_CREATE_FLAGS_DEFAULT, NULL, NULL) == 0) {
LOG(debug) << _hostName << ": Created new SDR cache " << _cache;
} else {
LOG(debug) << _hostName << ": Error creating new SDR cache " << _cache;
}
} else {
if (retries == 0) {
LOG(error) << _hostName << ": ipmi_sdr_cache_open " << ipmi_sdr_ctx_errormsg(sdrCtx);
}
increaseErrorCount();
disconnect();
try {
connect();
} catch (const std::runtime_error& e) {
increaseErrorCount();
}
}
} else {
int recordLength = 0;
uint8_t recordBuf[IPMI_SDR_MAX_RECORD_LENGTH];
if ((recordLength = ipmi_sdr_cache_record_read(_sdrCtx, recordBuf, IPMI_SDR_MAX_RECORD_LENGTH)) < 0) {
increaseErrorCount();
throw std::runtime_error("ipmi_sdr_cache_record_read Error: " + std::string(ipmi_sdr_ctx_errormsg(_sdrCtx)));
closeSdrCache();
return false;
if (ipmi_sdr_cache_search_record_id(sdrCtx, recordId) < 0) {
increaseErrorCount();
if (retries == 0) {
LOG(error) << _hostName << ": ipmi_sdr_cache_search_record_id() " << ipmi_sdr_ctx_errormsg(sdrCtx);
}
} else {
if ((recordLength = ipmi_sdr_cache_record_read(sdrCtx, recordBuf, IPMI_SDR_MAX_RECORD_LENGTH)) < 0) {
increaseErrorCount();
if (retries == 0) {
LOG(error) << _hostName << ": ipmi_sdr_cache_record_read " << ipmi_sdr_ctx_errormsg(sdrCtx);
}
} else {
_lastRead = getTimestamp();
record.insert(record.end(), &recordBuf[0], &recordBuf[recordLength]);
ret = true;
}
}
ipmi_sdr_cache_close(sdrCtx);
}
_lastRead = getTimestamp();
record.insert(record.end(), &recordBuf[0], &recordBuf[recordLength]);
closeSdrCache();
return true;
} else {
closeSdrCache();
return false;
}
ipmi_sdr_ctx_destroy(sdrCtx);
return ret;
}
uint64_t IPMIHost::sendRawCmd(const std::vector<uint8_t>& rawCmd,
......@@ -205,15 +196,24 @@ uint64_t IPMIHost::sendRawCmd(const std::vector<uint8_t>& rawCmd,
checkConnection();
} catch (const std::runtime_error& e) {
increaseErrorCount();
throw e;
return 0;
}
if ((len = ipmi_cmd_raw(_ipmiCtx, rawCmd[0], rawCmd[1], &rawCmd[2],
rawCmd.size() - 2, buf, sizeof(buf))) < 0) {
increaseErrorCount();
throw std::runtime_error("ipmi_cmd_raw" + std::string(ipmi_ctx_errormsg(_ipmiCtx)));
return 0;
int retries = RETRIES;
while (retries--) {
if ((len = ipmi_cmd_raw(_ipmiCtx, rawCmd[0], rawCmd[1], &rawCmd[2], rawCmd.size() - 2, buf, sizeof(buf))) < 0) {
if (retries == 0) {
LOG(error) << _hostName << ": ipmi_sensor_read " << ipmi_ctx_errormsg(_ipmiCtx);
increaseErrorCount();
disconnect();
try {
connect();
} catch (const std::runtime_error& e) {
increaseErrorCount();
}
} else {
break;
}
}
}
_errorCount = 0;
......@@ -251,18 +251,28 @@ double IPMIHost::readSensorRecord(std::vector<uint8_t>& record) {
checkConnection();
} catch (const std::runtime_error& e) {
increaseErrorCount();
throw e;
return 0;
}
if (!_sensorReadCtx) {
_sensorReadCtx = ipmi_sensor_read_ctx_create(_ipmiCtx);
}
int retries = RETRIES;
while (retries--) {
if (!_sensorReadCtx) {
_sensorReadCtx = ipmi_sensor_read_ctx_create(_ipmiCtx);
}
if (ipmi_sensor_read(_sensorReadCtx, &record[0], record.size(), 0, &rawReading, &reading, &eventBitmask) < 0) {
increaseErrorCount();
throw std::runtime_error("ipmi_sensor_read Error: " + std::string(ipmi_sensor_read_ctx_errormsg(_sensorReadCtx)));
return 0;
if (ipmi_sensor_read(_sensorReadCtx, &record[0], record.size(), 0, &rawReading, &reading, &eventBitmask) < 0) {
if (retries == 0) {
LOG(error) << _hostName << ": ipmi_sensor_read " << ipmi_sensor_read_ctx_errormsg(_sensorReadCtx);
}
increaseErrorCount();
disconnect();
try {
connect();
} catch (const std::runtime_error& e) {
increaseErrorCount();
}
} else {
break;
}
}
double ret = .0;
......
......@@ -128,13 +128,8 @@ namespace DCDB {
int disconnect();
void checkConnection() ;
/* Open and create/destroy SDR cache (sets/destroys _sdrCtx) */
bool openSdrCache();
void closeSdrCache();
/* Various context structs, required to make use of FreeIPMI */
ipmi_ctx_t _ipmiCtx;
ipmi_sdr_ctx_t _sdrCtx;
ipmi_sensor_read_ctx_t _sensorReadCtx;
std::string _hostName;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment