diff --git a/dcdbpusher/sensors/nvml/nvml.conf b/dcdbpusher/sensors/nvml/nvml.conf index 8b5b63cecd7a1629c01663a4c4fc17300ede5f02..a92cf27263a36d2c1c1e42326eea883f06088ff7 100644 --- a/dcdbpusher/sensors/nvml/nvml.conf +++ b/dcdbpusher/sensors/nvml/nvml.conf @@ -101,6 +101,11 @@ group nvml_g2 { feature GPU_ECC_ERR } + sensor gpu_pcie_thru { + mqttsuffix /pcie_thru + feature GPU_PCIE_THRU + } + } diff --git a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp index 034aa4263cb7768463df435fc3dbabfa395d8959..9b71512774fa5a779e70cf51590b5c7eeeaff097 100644 --- a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp +++ b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp @@ -46,6 +46,7 @@ nvmlConfigurator::nvmlConfigurator() { _gpuFeatureMAP["GPU_UTL_MEM"] = GPU_UTL_MEM; _gpuFeatureMAP["GPU_UTL_GPU"] = GPU_UTL_GPU; _gpuFeatureMAP["GPU_ECC_ERR"] = GPU_ECC_ERR; + _gpuFeatureMAP["GPU_PCIE_THRU"] = GPU_PCIE_THRU; _groupName = "group"; _baseName = "sensor"; diff --git a/dcdbpusher/sensors/nvml/nvmlSensorBase.h b/dcdbpusher/sensors/nvml/nvmlSensorBase.h index 4a82ccf29bdfa1414407fbf8e7858150be8c1ad3..b23aebe9d4adef66f7aaeea75b4c7a344316064d 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorBase.h +++ b/dcdbpusher/sensors/nvml/nvmlSensorBase.h @@ -51,6 +51,8 @@ enum GPU_FEATURE { GPU_UTL_MEM = 10, GPU_UTL_GPU = 11, GPU_ECC_ERR = 13, + GPU_PCIE_THRU = 14, + }; /** @@ -142,9 +144,12 @@ class nvmlSensorBase : public SensorBase { case GPU_UTL_GPU: feature = "GPU_UTL_GPU"; break; - case GPU_ECC_ERR: - feature = "GPU_ECC_ERR"; - break; + case GPU_ECC_ERR: + feature = "GPU_ECC_ERR"; + break; + case GPU_PCIE_THRU: + feature = "GPU_PCIE_THRU"; + break; } LOG_VAR(ll) << leading << " Feature type: " << feature; } diff --git a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp index 102ee4d9666686449797f19f5a8254d133035cb5..f70c6fad8f7bd3d74a1e61f15e5fa129a7f05092 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp +++ b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp @@ -41,9 +41,6 @@ struct counters_t { unsigned long long energy_initial; unsigned long long energy_current; unsigned long long energy_previous; - // unsigned int freq1; - // unsigned int freq2; - // unsigned int freq3; unsigned int temperature; unsigned int fanspeed; unsigned int clockspeed_graphics; @@ -53,6 +50,7 @@ struct counters_t { unsigned int power; unsigned long long ecc_counts; nvmlUtilization_t utilization; + unsigned int pcie_throughput; } counters; nvmlSensorGroup::nvmlSensorGroup(const std::string& name) : @@ -190,6 +188,10 @@ void nvmlSensorGroup::read() { err = nvmlDeviceGetTotalEccErrors (env.device, NVML_MEMORY_ERROR_TYPE_CORRECTED,NVML_VOLATILE_ECC,&(counters.ecc_counts)); reading.value = counters.ecc_counts; break; + case(GPU_PCIE_THRU): + err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput)); + reading.value = counters.pcie_throughput; + break; } s->storeReading(reading);