Commit 261b6ccf authored by andibraimllari's avatar andibraimllari Committed by AndiBraimllari
Browse files

statistics namespace

parent ffc3d862
Pipeline #872025 failed with stages
in 5 minutes and 25 seconds
......@@ -10,7 +10,6 @@ set(MODULE_HEADERS
Utilities/DataContainerFormatter.hpp
Utilities/FormatConfig.h
Utilities/Math.hpp
Utilities/Statistics.h
Utilities/TypeCasts.hpp
Descriptors/DataDescriptor.h
Descriptors/DescriptorUtils.h
......@@ -43,7 +42,6 @@ set(MODULE_SOURCES
Utilities/Assertions.cpp
Descriptors/DataDescriptor.cpp
Descriptors/VolumeDescriptor.cpp
Utilities/Statistics.cpp
Descriptors/PlanarDetectorDescriptor.cpp
Descriptors/RandomBlocksDescriptor.cpp
Descriptors/DescriptorUtils.cpp
......
#pragma once
#include "elsaDefines.h"
#include "DataContainer.h"
#include <numeric>
#include <cmath>
#include <utility>
namespace elsa
{
......@@ -46,23 +51,210 @@ namespace elsa
}
} // namespace math
/// proposed in Y. Meyer, Oscillating Patterns in Image Processing and Nonlinear Evolution
/// Equations. AMS, 2001
template <typename data_t>
data_t meyerFunction(data_t x)
namespace statistics
{
if (x < 0) {
return 0;
} else if (0 <= x && x <= 1) {
return 35 * std::pow(x, 4) - 84 * std::pow(x, 5) + 70 * std::pow(x, 6)
- 20 * std::pow(x, 7);
} else {
return 1;
/**
* @brief Compute the Mean Squared Error of two given signals.
*
* Calculate it based on the formula of @f$ \frac{1}{n} \sum_{i=1}^n (x_{i} - y_{i})^{2}
* @f$.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*
* @author Andi Braimllari - initial code
*
* @tparam data_t data type for the domain and range of the problem, defaulting to real_t
*/
template <typename data_t = real_t>
data_t meanSquaredError(DataContainer<data_t> dc1, DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw LogicError(std::string(
"Statistics::meanSquaredError: shapes of both signals should match"));
}
DataContainer<data_t> diff = dc1 - dc2;
return diff.squaredL2Norm() / dc1.getSize();
}
/// @brief Calculate mean and standard deviation of a container
/// @param v Any container, such as `std::vector`
/// @tparam Container Container type of argument (e.g. `std::vector`)
/// @return a pair of mean and standard deviation (of type `Container::value_type`)
template <typename Container>
constexpr auto calculateMeanStddev(Container v)
-> std::pair<typename Container::value_type, typename Container::value_type>
{
// value type of the vector
using T = typename Container::value_type;
// Sum all elements and divide by the size to get mean
auto sum = std::accumulate(std::begin(v), std::end(v), T());
auto mean = sum / static_cast<T>(std::size(v));
// New vector with x_i - mean entries
std::vector<T> diff(v.size());
std::transform(std::begin(v), std::end(v), std::begin(diff),
[mean](auto x) { return x - mean; });
// Sum of product of each element
auto sqSum =
std::inner_product(std::begin(diff), std::end(diff), std::begin(diff), T());
auto stdev = std::sqrt(sqSum / mean);
return std::make_pair(mean, stdev);
}
/**
* @brief Compute the 95% confidence interval for a given number of samples `n`
* and the mean and standard deviation of the measurements.
*
* Compute it as \f$mean - c(n) * stddev, mean + c(n) * stddev\f$, where
* \f$c(n)\f$ is the n-th entry in the two tails T distribution table. For \f$n > 30\f$,
* it is assumed that \f$n = 1.96\f$.
*
* @param n Number of of samples
* @param mean mean of samples
* @param stddev standard deviation of samples
* @return pair of lower and upper bound of 95% confidence interval
*/
template <typename data_t = real_t>
std::pair<real_t, real_t> confidenceInterval95(std::size_t n, data_t mean, data_t stddev)
{
// Do we run often enough to assume a large sample size?
if (n > 30) {
// 1.96 is a magic number for 95% confidence intervals, equivalent to c in the other
// branch
const auto lower = mean - 1.96 * stddev;
const auto upper = mean + 1.96 * stddev;
return std::make_pair(lower, upper);
} else {
// t Table for 95% with offset to handle 1 iteration
// In that case the mean is the lower and upper bound
constexpr std::array c = {
0.0, 12.70620, 4.302653, 3.182446, 2.776445, 2.570582, 2.446912, 2.364624,
2.306004, 2.262157, 2.228139, 2.200985, 2.178813, 2.160369, 2.144787, 2.131450,
2.119905, 2.109816, 2.100922, 2.093024, 2.085963, 2.079614, 2.073873, 2.068658,
2.063899, 2.059539, 2.055529, 2.051831, 2.048407, 2.045230};
const auto degreeOfFreedome = n - 1;
const auto lower = mean - c[degreeOfFreedome] * stddev;
const auto upper = mean + c[degreeOfFreedome] * stddev;
return std::make_pair(lower, upper);
}
}
/**
* @brief Compute the Relative Error between two given signals.
*
* Calculate it based on the formula of @f$ \| x - y \|_{2} / \| y \|_{2} @f$.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*
* @author Andi Braimllari - initial code
*
* @tparam data_t data type for the domain and range of the problem, defaulting to real_t
*/
template <typename data_t = real_t>
data_t relativeError(DataContainer<data_t> dc1, DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(
std::string("statistics::relativeError: shapes of both signals should match"));
}
DataContainer<data_t> diff = dc1 - dc2;
return diff.l2Norm() / dc2.l2Norm();
}
}
/**
* @brief Compute the Peak Signal-to-Noise Ratio of a given signal S.
*
* Calculate it based on the formula of @f$ 10 * log_{10}(MAX_{I}^{2} / MSE) @f$ in which
* @f$ MAX_{I} @f$ is the maximum possible pixel value of the image and @f$ MSE @f$ is the
* mean squared error.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
* @param dataRange The data range of the signals (distance between minimum and maximum
* possible values).
*
* @author Andi Braimllari - initial code
*
* @tparam data_t data type for the domain and range of the problem, defaulting to real_t
*/
template <typename data_t = real_t>
data_t peakSignalToNoiseRatio(DataContainer<data_t> dc1, DataContainer<data_t> dc2,
data_t dataRange)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(std::string(
"statistics::peakSignalToNoiseRatio: shapes of both signals should match"));
}
return 10 * std::log10((std::pow(dataRange, 2) / meanSquaredError(dc1, dc2)));
}
/**
* @brief Compute the Peak Signal-to-Noise Ratio.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*
* @author Andi Braimllari - initial code
*
* @tparam data_t data type for the domain and range of the problem, defaulting to real_t
*/
template <typename data_t = real_t>
data_t peakSignalToNoiseRatio(DataContainer<data_t> dc1, DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(std::string(
"statistics::peakSignalToNoiseRatio: shapes of both signals should match"));
}
data_t dataMin = std::numeric_limits<data_t>::min();
data_t dataMax = std::numeric_limits<data_t>::max();
data_t trueMin = std::min(dc1.minElement(), dc2.minElement());
data_t trueMax = std::max(dc1.maxElement(), dc2.maxElement());
if (trueMin < dataMin || trueMax > dataMax) {
throw InvalidArgumentError(
std::string("statistics::peakSignalToNoiseRatio: extreme values of the signals "
"exceed the range expected for its data type"));
}
data_t dataRange;
if (trueMin >= 0) {
dataRange = dataMax;
} else {
dataRange = dataMax - dataMin;
}
return peakSignalToNoiseRatio(dc1, dc2, dataRange);
}
} // namespace statistics
namespace shearlet
{
/// proposed in Y. Meyer, Oscillating Patterns in Image Processing and Nonlinear Evolution
/// Equations. AMS, 2001
template <typename data_t>
data_t meyerFunction(data_t x)
{
if (x < 0) {
return 0;
} else if (0 <= x && x <= 1) {
return 35 * std::pow(x, 4) - 84 * std::pow(x, 5) + 70 * std::pow(x, 6)
- 20 * std::pow(x, 7);
} else {
return 1;
}
}
/// defined in Sören Häuser and Gabriele Steidl, Fast Finite Shearlet Transform: a
/// tutorial, 2014
template <typename data_t>
......
#include "Statistics.h"
namespace elsa
{
template <typename data_t>
data_t Statistics<data_t>::meanSquaredError(DataContainer<data_t> dc1,
DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw LogicError(
std::string("Statistics::meanSquaredError: shapes of both signals should match"));
}
DataContainer<data_t> diff = dc1 - dc2;
return diff.squaredL2Norm() / dc1.getSize();
}
template <typename data_t>
template <typename Container>
constexpr auto Statistics<data_t>::calculateMeanStddev(Container v)
-> std::pair<typename Container::value_type, typename Container::value_type>
{
// value type of the vector
using T = typename Container::value_type;
// Sum all elements and divide by the size to get mean
auto sum = std::accumulate(std::begin(v), std::end(v), T());
auto mean = sum / static_cast<T>(std::size(v));
// New vector with x_i - mean entries
std::vector<T> diff(v.size());
std::transform(std::begin(v), std::end(v), std::begin(diff),
[mean](auto x) { return x - mean; });
// Sum of product of each element
auto sqSum = std::inner_product(std::begin(diff), std::end(diff), std::begin(diff), T());
auto stdev = std::sqrt(sqSum / mean);
return std::make_pair(mean, stdev);
}
template <typename data_t>
std::pair<real_t, real_t> Statistics<data_t>::confidenceInterval95(std::size_t n, data_t mean,
data_t stddev)
{
// Do we run often enough to assume a large sample size?
if (n > 30) {
// 1.96 is a magic number for 95% confidence intervals, equivalent to c in the other
// branch
const auto lower = mean - 1.96 * stddev;
const auto upper = mean + 1.96 * stddev;
return std::make_pair(lower, upper);
} else {
// t Table for 95% with offset to handle 1 iteration
// In that case the mean is the lower and upper bound
constexpr std::array c = {0.0, 12.70620, 4.302653, 3.182446, 2.776445, 2.570582,
2.446912, 2.364624, 2.306004, 2.262157, 2.228139, 2.200985,
2.178813, 2.160369, 2.144787, 2.131450, 2.119905, 2.109816,
2.100922, 2.093024, 2.085963, 2.079614, 2.073873, 2.068658,
2.063899, 2.059539, 2.055529, 2.051831, 2.048407, 2.045230};
const auto degreeOfFreedome = n - 1;
const auto lower = mean - c[degreeOfFreedome] * stddev;
const auto upper = mean + c[degreeOfFreedome] * stddev;
return std::make_pair(lower, upper);
}
}
template <typename data_t>
data_t Statistics<data_t>::relativeError(DataContainer<data_t> dc1, DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(
std::string("Metrics::relativeError: shapes of both signals should match"));
}
DataContainer<data_t> diff = dc1 - dc2;
return diff.l2Norm() / dc2.l2Norm();
}
template <typename data_t>
data_t Statistics<data_t>::peakSignalToNoiseRatio(DataContainer<data_t> dc1,
DataContainer<data_t> dc2, data_t dataRange)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(std::string(
"Metrics::peakSignalToNoiseRatio: shapes of both signals should match"));
}
return data_t(10) * std::log10((std::pow(dataRange, 2) / meanSquaredError(dc1, dc2)));
}
template <typename data_t>
data_t Statistics<data_t>::peakSignalToNoiseRatio(DataContainer<data_t> dc1,
DataContainer<data_t> dc2)
{
if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) {
throw InvalidArgumentError(std::string(
"Metrics::peakSignalToNoiseRatio: shapes of both signals should match"));
}
data_t dataMin = std::numeric_limits<data_t>::min();
data_t dataMax = std::numeric_limits<data_t>::max();
data_t trueMin = std::min(dc1.minElement(), dc2.minElement());
data_t trueMax = std::max(dc1.maxElement(), dc2.maxElement());
if (trueMin < dataMin || trueMax > dataMax) {
throw InvalidArgumentError(
std::string("Metrics::peakSignalToNoiseRatio: extreme values of the signals "
"exceed the range expected for its data type"));
}
data_t dataRange;
if (trueMin >= 0) {
dataRange = dataMax;
} else {
dataRange = dataMax - dataMin;
}
return peakSignalToNoiseRatio(dc1, dc2, dataRange);
}
// ------------------------------------------
// explicit template instantiation
template class Statistics<float>;
template class Statistics<double>;
} // namespace elsa
#pragma once
#include "elsaDefines.h"
#include "DataContainer.h"
#include <cmath>
#include <numeric>
#include <algorithm>
#include <utility>
namespace elsa
{
/**
* @brief Class representing ...
*
* This class represents an ...
*
* @tparam data_t data type for the domain and range of the problem, defaulting to real_t
*
* References:
*/
template <typename data_t = real_t>
class Statistics
{
/**
* @brief Compute the Mean Squared Error of two given signals.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*/
data_t meanSquaredError(DataContainer<data_t> dc1, DataContainer<data_t> dc2);
/// @brief Calculate mean and standard deviation of a container
/// @param v Any container, such as `std::vector`
/// @tparam Container Container type of argument (e.g. `std::vector`)
/// @return a pair of mean and standard deviation (of type `Container::value_type`)
template <typename Container>
constexpr auto calculateMeanStddev(Container v)
-> std::pair<typename Container::value_type, typename Container::value_type>;
/**
* @brief Compute the 95% confidence interval for a given number of samples `n`
* and the mean and standard deviation of the measurements.
*
* Compute it as \f$mean - c(n) * stddev, mean + c(n) * stddev\f$, where
* \f$c(n)\f$ is the n-th entry in the two tails T distribution table. For \f$n > 30\f$,
* it is assumed that \f$n = 1.96\f$.
*
* @param n Number of of samples
* @param mean mean of samples
* @param stddev standard deviation of samples
* @return pair of lower and upper bound of 95% confidence interval
*/
std::pair<real_t, real_t> confidenceInterval95(std::size_t n, data_t mean, data_t stddev);
/**
* @brief Compute the Relative Error between two given signals.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*/
data_t relativeError(DataContainer<data_t> dc1, DataContainer<data_t> dc2);
/**
* @brief Compute the Peak Signal-to-Noise Ratio.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
* @param dataRange The data range of the signals (distance between minimum and maximum
* possible values).
*/
data_t peakSignalToNoiseRatio(DataContainer<data_t> dc1, DataContainer<data_t> dc2,
data_t dataRange);
/**
* @brief Compute the Peak Signal-to-Noise Ratio.
*
* @param dc1 DataContainer signal
* @param dc2 DataContainer signal
*/
data_t peakSignalToNoiseRatio(DataContainer<data_t> dc1, DataContainer<data_t> dc2);
};
} // namespace elsa
......@@ -30,4 +30,3 @@ ELSA_DOCTEST(DataContainerFormatter)
ELSA_DOCTEST(CartesianIndices)
ELSA_DOCTEST(Bessel)
ELSA_DOCTEST(Math)
ELSA_DOCTEST(Statistics)
#include "doctest/doctest.h"
#include "Math.hpp"
#include "VolumeDescriptor.h"
#include <testHelpers.h>
TEST_SUITE_BEGIN("Math");
......@@ -62,4 +65,54 @@ TEST_CASE("Math::heaviside")
}
}
TEST_CASE_TEMPLATE("Math: Testing the statistics", TestType, float, double)
{
GIVEN("a DataContainer")
{
IndexVector_t sizeVector(2);
sizeVector << 2, 4;
VolumeDescriptor volDescr(sizeVector);
Vector_t<TestType> vect1(volDescr.getNumberOfCoefficients());
vect1 << 4, 2, 0.7f, 1, 0, 9, 53, 8;
Vector_t<TestType> vect2(volDescr.getNumberOfCoefficients());
vect2 << 5, 1, 6, 12, 22, 23, 9, 9;
DataContainer<TestType> dataCont1(volDescr, vect1);
DataContainer<TestType> dataCont2(volDescr, vect2);
WHEN("running the Mean Squared Error")
{
auto meanSqErr = statistics::meanSquaredError<TestType>(dataCont1, dataCont2);
THEN("it produces the correct result")
{
REQUIRE_UNARY(checkApproxEq(meanSqErr, 346.01125f));
}
}
WHEN("running the Relative Error")
{
auto relErr = statistics::relativeError<TestType>(dataCont1, dataCont2);
THEN("it produces the correct result")
{
REQUIRE_UNARY(checkApproxEq(relErr, 1.4157718f));
}
}
WHEN("running the Peak Signal-to-Noise Ratio")
{
auto dataRange = static_cast<TestType>(255);
auto psnr =
statistics::peakSignalToNoiseRatio<TestType>(dataCont1, dataCont2, dataRange);
auto expectedpsnr = static_cast<TestType>(22.73990);
THEN("it produces the correct result")
{
REQUIRE_UNARY(checkApproxEq(psnr, expectedpsnr));
}
}
}
}
TEST_SUITE_END();
/**
* @file test_Statistics.cpp
*
* @brief Tests for Statistics header
*
* @author Andi Braimllari
*/
#include "Statistics.h"
#include "DataContainer.h"
#include "VolumeDescriptor.h"
#include "doctest/doctest.h"
#include <testHelpers.h>
using namespace elsa;
using namespace doctest;
TEST_SUITE_BEGIN("core");
// TODO do the Statistics' tests here
TEST_CASE_TEMPLATE("Statistics: Testing the metrics", TestType, float, double)
{
GIVEN("a DataContainer")
{
IndexVector_t sizeVector(2);
sizeVector << 2, 4;
VolumeDescriptor volDescr(sizeVector);
DataContainer<TestType> dataCont1(volDescr);
dataCont1 = 5;
DataContainer<TestType> dataCont2(volDescr);
dataCont2 = 8;
WHEN("running the Relative Error")
{
Statistics statistics;
long double relErr = relativeError<TestType>(dataCont1, dataCont2);
THEN("it produces the correct result")
{
REQUIRE_UNARY(checkApproxEq(relErr, 3.0 / 8));
}
}
WHEN("running the Peak Signal-to-Noise Ratio")
{
long double psnr = peakSignalToNoiseRatio<TestType>(dataCont1, dataCont2, 255);
THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(psnr, 38.58837)); }
}
}
}
TEST_SUITE_END();
#pragma once
#include "Statistics.h"
#include "Math.hpp"
#include "SetupHelpers.h"
#include "NoiseGenerators.h"
#include "LoggingHelpers.h"
......
#include "elsa.h"
#include "LutProjector.h"
#include "Utilities/Statistics.h"
#include "IO.h"
#include "spdlog/fmt/bundled/core.h"
......