Commit 08880247 authored by Carla Guillen's avatar Carla Guillen
Browse files

Merge branch 'development' of gitlab.lrz.de:dcdb/dcdb into development

parents ae2a4448 2914c79f
......@@ -855,6 +855,7 @@ The following are the configuration parameters available for the _Regressor_ plu
| window | Length in milliseconds of the time window that is used to retrieve recent readings for the input sensors, starting from the latest one.
| trainingSamples | Number of samples necessary to perform training of the current model.
| targetDistance | Temporal distance (in terms of lags) of the sample that is to be predicted.
| smoothResponses | If false, the regressor will attempt to predict one single sensor reading as specified by the _targetDistance_ parameter. If true, it will instead predict the _average_ of the upcoming _targetDistance_ sensor readings.
| inputPath | Path of a file from which a pre-trained random forest model must be loaded.
| outputPath | Path of a file to which the random forest model trained at runtime must be saved.
| getImportances | If true, the random forest will also compute feature importance values when trained, which are printed.
......
......@@ -53,6 +53,8 @@ void RegressorConfigurator::operatorAttributes(RegressorOperator& op, CFG_VAL co
op.setAggregationWindow(stoull(val.second.data()) * 1000000);
else if(boost::iequals(val.first, "targetDistance"))
op.setTargetDistance(stoull(val.second.data()));
else if(boost::iequals(val.first, "smoothResponses"))
op.setSmoothResponses(to_bool(val.second.data()));
else if(boost::iequals(val.first, "inputPath"))
op.setInputPath(val.second.data());
else if(boost::iequals(val.first, "outputPath"))
......
......@@ -32,6 +32,7 @@ RegressorOperator::RegressorOperator(const std::string& name) : OperatorTemplate
_modelOut = "";
_aggregationWindow = 0;
_targetDistance = 1;
_smoothResponses = false;
_numFeatures = REG_NUMFEATURES;
_trainingSamples = 256;
_trainingPending = true;
......@@ -47,6 +48,7 @@ RegressorOperator::RegressorOperator(const RegressorOperator& other) : OperatorT
_modelOut = "";
_aggregationWindow = other._aggregationWindow;
_targetDistance = other._targetDistance;
_smoothResponses = other._smoothResponses;
_numFeatures = other._numFeatures;
_trainingSamples = other._trainingSamples;
_importances = other._importances;
......@@ -105,6 +107,7 @@ void RegressorOperator::execOnInit() {
void RegressorOperator::printConfig(LOG_LEVEL ll) {
LOG_VAR(ll) << " Window: " << _aggregationWindow;
LOG_VAR(ll) << " Target Distance: " << _targetDistance;
LOG_VAR(ll) << " Smooth Response: " << (_smoothResponses ? "enabled" : "disabled");
LOG_VAR(ll) << " Training Sample: " << _trainingSamples;
LOG_VAR(ll) << " Input Path: " << (_modelIn!="" ? _modelIn : std::string("none"));
LOG_VAR(ll) << " Output Path: " << (_modelOut!="" ? _modelOut : std::string("none"));
......@@ -143,10 +146,15 @@ void RegressorOperator::trainRandomForest(bool categorical) {
if((uint64_t)_responseSet->size().height <= _targetDistance)
throw std::runtime_error("Operator " + _name + ": cannot perform training, insufficient data!");
// Shifting the training and response sets so as to obtain the desired prediction distance
*_responseSet = _responseSet->rowRange(_targetDistance, _responseSet->size().height);
if(!categorical && _smoothResponses && _targetDistance>0) {
smoothResponsesArray();
*_responseSet = _responseSet->rowRange(0, _responseSet->size().height-_targetDistance);
} else {
*_responseSet = _responseSet->rowRange(_targetDistance, _responseSet->size().height);
}
*_trainingSet = _trainingSet->rowRange(0, _trainingSet->size().height-_targetDistance);
shuffleTrainingSet();
cv::Mat varType = cv::Mat(_trainingSet->size().width + 1, 1, CV_8U);
varType.setTo(cv::Scalar(cv::ml::VAR_NUMERICAL));
varType.at<unsigned char>(_trainingSet->size().width, 0) = categorical ? cv::ml::VAR_CATEGORICAL : cv::ml::VAR_NUMERICAL;
......@@ -171,6 +179,16 @@ void RegressorOperator::trainRandomForest(bool categorical) {
}
}
void RegressorOperator::smoothResponsesArray() {
for(size_t midx=0; midx < (_responseSet->size().height - _targetDistance); midx++) {
float meanBuf = 0;
for(size_t nidx=0; nidx <= _targetDistance; nidx++) {
meanBuf += _responseSet->at<float>(midx + nidx);
}
_responseSet->at<float>(midx) = meanBuf / (_targetDistance + 1);
}
}
void RegressorOperator::shuffleTrainingSet() {
if(!_trainingSet || !_responseSet)
return;
......
......@@ -65,6 +65,7 @@ public:
void setComputeImportances(bool i) { _importances = i; }
void setRawMode(bool r) { _numFeatures = r ? 1 : REG_NUMFEATURES; }
void triggerTraining() { _trainingPending = true;}
void setSmoothResponses(bool s) { _smoothResponses = s; }
std::string getInputPath() { return _modelIn;}
std::string getOutputPath() { return _modelOut; }
......@@ -72,6 +73,7 @@ public:
unsigned long long getTrainingSamples() { return _trainingSamples; }
bool getComputeImportances() { return _importances; }
bool getRawMode() { return _numFeatures != REG_NUMFEATURES; }
bool getSmoothResponses() { return _smoothResponses; }
virtual void printConfig(LOG_LEVEL ll) override;
......@@ -81,6 +83,7 @@ protected:
bool computeFeatureVector(U_Ptr unit);
void trainRandomForest(bool categorical=false);
void shuffleTrainingSet();
void smoothResponsesArray();
std::string getImportances();
std::string _modelOut;
......@@ -89,6 +92,7 @@ protected:
unsigned long long _trainingSamples;
unsigned long long _targetDistance;
unsigned long long _numFeatures;
bool _smoothResponses;
bool _trainingPending;
bool _importances;
bool _includeTarget;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment