Commit 261b6ccf by andibraimllari Committed by AndiBraimllari

statistics namespace

parent ffc3d862
Pipeline #872025 failed with stages
in 5 minutes and 25 seconds
 ... ... @@ -10,7 +10,6 @@ set(MODULE_HEADERS Utilities/DataContainerFormatter.hpp Utilities/FormatConfig.h Utilities/Math.hpp Utilities/Statistics.h Utilities/TypeCasts.hpp Descriptors/DataDescriptor.h Descriptors/DescriptorUtils.h ... ... @@ -43,7 +42,6 @@ set(MODULE_SOURCES Utilities/Assertions.cpp Descriptors/DataDescriptor.cpp Descriptors/VolumeDescriptor.cpp Utilities/Statistics.cpp Descriptors/PlanarDetectorDescriptor.cpp Descriptors/RandomBlocksDescriptor.cpp Descriptors/DescriptorUtils.cpp ... ...
 #pragma once #include "elsaDefines.h" #include "DataContainer.h" #include #include #include namespace elsa { ... ... @@ -46,23 +51,210 @@ namespace elsa } } // namespace math /// proposed in Y. Meyer, Oscillating Patterns in Image Processing and Nonlinear Evolution /// Equations. AMS, 2001 template data_t meyerFunction(data_t x) namespace statistics { if (x < 0) { return 0; } else if (0 <= x && x <= 1) { return 35 * std::pow(x, 4) - 84 * std::pow(x, 5) + 70 * std::pow(x, 6) - 20 * std::pow(x, 7); } else { return 1; /** * @brief Compute the Mean Squared Error of two given signals. * * Calculate it based on the formula of @f$\frac{1}{n} \sum_{i=1}^n (x_{i} - y_{i})^{2} * @f$. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal * * @author Andi Braimllari - initial code * * @tparam data_t data type for the domain and range of the problem, defaulting to real_t */ template data_t meanSquaredError(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw LogicError(std::string( "Statistics::meanSquaredError: shapes of both signals should match")); } DataContainer diff = dc1 - dc2; return diff.squaredL2Norm() / dc1.getSize(); } /// @brief Calculate mean and standard deviation of a container /// @param v Any container, such as std::vector /// @tparam Container Container type of argument (e.g. std::vector) /// @return a pair of mean and standard deviation (of type Container::value_type) template constexpr auto calculateMeanStddev(Container v) -> std::pair { // value type of the vector using T = typename Container::value_type; // Sum all elements and divide by the size to get mean auto sum = std::accumulate(std::begin(v), std::end(v), T()); auto mean = sum / static_cast(std::size(v)); // New vector with x_i - mean entries std::vector diff(v.size()); std::transform(std::begin(v), std::end(v), std::begin(diff), [mean](auto x) { return x - mean; }); // Sum of product of each element auto sqSum = std::inner_product(std::begin(diff), std::end(diff), std::begin(diff), T()); auto stdev = std::sqrt(sqSum / mean); return std::make_pair(mean, stdev); } /** * @brief Compute the 95% confidence interval for a given number of samples n * and the mean and standard deviation of the measurements. * * Compute it as \f$mean - c(n) * stddev, mean + c(n) * stddev\f$, where * \f$c(n)\f$ is the n-th entry in the two tails T distribution table. For \f$n > 30\f$, * it is assumed that \f$n = 1.96\f$. * * @param n Number of of samples * @param mean mean of samples * @param stddev standard deviation of samples * @return pair of lower and upper bound of 95% confidence interval */ template std::pair confidenceInterval95(std::size_t n, data_t mean, data_t stddev) { // Do we run often enough to assume a large sample size? if (n > 30) { // 1.96 is a magic number for 95% confidence intervals, equivalent to c in the other // branch const auto lower = mean - 1.96 * stddev; const auto upper = mean + 1.96 * stddev; return std::make_pair(lower, upper); } else { // t Table for 95% with offset to handle 1 iteration // In that case the mean is the lower and upper bound constexpr std::array c = { 0.0, 12.70620, 4.302653, 3.182446, 2.776445, 2.570582, 2.446912, 2.364624, 2.306004, 2.262157, 2.228139, 2.200985, 2.178813, 2.160369, 2.144787, 2.131450, 2.119905, 2.109816, 2.100922, 2.093024, 2.085963, 2.079614, 2.073873, 2.068658, 2.063899, 2.059539, 2.055529, 2.051831, 2.048407, 2.045230}; const auto degreeOfFreedome = n - 1; const auto lower = mean - c[degreeOfFreedome] * stddev; const auto upper = mean + c[degreeOfFreedome] * stddev; return std::make_pair(lower, upper); } } /** * @brief Compute the Relative Error between two given signals. * * Calculate it based on the formula of @f$\| x - y \|_{2} / \| y \|_{2} @f$. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal * * @author Andi Braimllari - initial code * * @tparam data_t data type for the domain and range of the problem, defaulting to real_t */ template data_t relativeError(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError( std::string("statistics::relativeError: shapes of both signals should match")); } DataContainer diff = dc1 - dc2; return diff.l2Norm() / dc2.l2Norm(); } } /** * @brief Compute the Peak Signal-to-Noise Ratio of a given signal S. * * Calculate it based on the formula of @f$10 * log_{10}(MAX_{I}^{2} / MSE) @f$ in which * @f$MAX_{I} @f$ is the maximum possible pixel value of the image and @f$MSE @f$ is the * mean squared error. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal * @param dataRange The data range of the signals (distance between minimum and maximum * possible values). * * @author Andi Braimllari - initial code * * @tparam data_t data type for the domain and range of the problem, defaulting to real_t */ template data_t peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2, data_t dataRange) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError(std::string( "statistics::peakSignalToNoiseRatio: shapes of both signals should match")); } return 10 * std::log10((std::pow(dataRange, 2) / meanSquaredError(dc1, dc2))); } /** * @brief Compute the Peak Signal-to-Noise Ratio. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal * * @author Andi Braimllari - initial code * * @tparam data_t data type for the domain and range of the problem, defaulting to real_t */ template data_t peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError(std::string( "statistics::peakSignalToNoiseRatio: shapes of both signals should match")); } data_t dataMin = std::numeric_limits::min(); data_t dataMax = std::numeric_limits::max(); data_t trueMin = std::min(dc1.minElement(), dc2.minElement()); data_t trueMax = std::max(dc1.maxElement(), dc2.maxElement()); if (trueMin < dataMin || trueMax > dataMax) { throw InvalidArgumentError( std::string("statistics::peakSignalToNoiseRatio: extreme values of the signals " "exceed the range expected for its data type")); } data_t dataRange; if (trueMin >= 0) { dataRange = dataMax; } else { dataRange = dataMax - dataMin; } return peakSignalToNoiseRatio(dc1, dc2, dataRange); } } // namespace statistics namespace shearlet { /// proposed in Y. Meyer, Oscillating Patterns in Image Processing and Nonlinear Evolution /// Equations. AMS, 2001 template data_t meyerFunction(data_t x) { if (x < 0) { return 0; } else if (0 <= x && x <= 1) { return 35 * std::pow(x, 4) - 84 * std::pow(x, 5) + 70 * std::pow(x, 6) - 20 * std::pow(x, 7); } else { return 1; } } /// defined in Sören Häuser and Gabriele Steidl, Fast Finite Shearlet Transform: a /// tutorial, 2014 template ... ...
 #include "Statistics.h" namespace elsa { template data_t Statistics::meanSquaredError(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw LogicError( std::string("Statistics::meanSquaredError: shapes of both signals should match")); } DataContainer diff = dc1 - dc2; return diff.squaredL2Norm() / dc1.getSize(); } template template constexpr auto Statistics::calculateMeanStddev(Container v) -> std::pair { // value type of the vector using T = typename Container::value_type; // Sum all elements and divide by the size to get mean auto sum = std::accumulate(std::begin(v), std::end(v), T()); auto mean = sum / static_cast(std::size(v)); // New vector with x_i - mean entries std::vector diff(v.size()); std::transform(std::begin(v), std::end(v), std::begin(diff), [mean](auto x) { return x - mean; }); // Sum of product of each element auto sqSum = std::inner_product(std::begin(diff), std::end(diff), std::begin(diff), T()); auto stdev = std::sqrt(sqSum / mean); return std::make_pair(mean, stdev); } template std::pair Statistics::confidenceInterval95(std::size_t n, data_t mean, data_t stddev) { // Do we run often enough to assume a large sample size? if (n > 30) { // 1.96 is a magic number for 95% confidence intervals, equivalent to c in the other // branch const auto lower = mean - 1.96 * stddev; const auto upper = mean + 1.96 * stddev; return std::make_pair(lower, upper); } else { // t Table for 95% with offset to handle 1 iteration // In that case the mean is the lower and upper bound constexpr std::array c = {0.0, 12.70620, 4.302653, 3.182446, 2.776445, 2.570582, 2.446912, 2.364624, 2.306004, 2.262157, 2.228139, 2.200985, 2.178813, 2.160369, 2.144787, 2.131450, 2.119905, 2.109816, 2.100922, 2.093024, 2.085963, 2.079614, 2.073873, 2.068658, 2.063899, 2.059539, 2.055529, 2.051831, 2.048407, 2.045230}; const auto degreeOfFreedome = n - 1; const auto lower = mean - c[degreeOfFreedome] * stddev; const auto upper = mean + c[degreeOfFreedome] * stddev; return std::make_pair(lower, upper); } } template data_t Statistics::relativeError(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError( std::string("Metrics::relativeError: shapes of both signals should match")); } DataContainer diff = dc1 - dc2; return diff.l2Norm() / dc2.l2Norm(); } template data_t Statistics::peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2, data_t dataRange) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError(std::string( "Metrics::peakSignalToNoiseRatio: shapes of both signals should match")); } return data_t(10) * std::log10((std::pow(dataRange, 2) / meanSquaredError(dc1, dc2))); } template data_t Statistics::peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2) { if (dc1.getDataDescriptor() != dc2.getDataDescriptor()) { throw InvalidArgumentError(std::string( "Metrics::peakSignalToNoiseRatio: shapes of both signals should match")); } data_t dataMin = std::numeric_limits::min(); data_t dataMax = std::numeric_limits::max(); data_t trueMin = std::min(dc1.minElement(), dc2.minElement()); data_t trueMax = std::max(dc1.maxElement(), dc2.maxElement()); if (trueMin < dataMin || trueMax > dataMax) { throw InvalidArgumentError( std::string("Metrics::peakSignalToNoiseRatio: extreme values of the signals " "exceed the range expected for its data type")); } data_t dataRange; if (trueMin >= 0) { dataRange = dataMax; } else { dataRange = dataMax - dataMin; } return peakSignalToNoiseRatio(dc1, dc2, dataRange); } // ------------------------------------------ // explicit template instantiation template class Statistics; template class Statistics; } // namespace elsa
 #pragma once #include "elsaDefines.h" #include "DataContainer.h" #include #include #include #include namespace elsa { /** * @brief Class representing ... * * This class represents an ... * * @tparam data_t data type for the domain and range of the problem, defaulting to real_t * * References: */ template class Statistics { /** * @brief Compute the Mean Squared Error of two given signals. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal */ data_t meanSquaredError(DataContainer dc1, DataContainer dc2); /// @brief Calculate mean and standard deviation of a container /// @param v Any container, such as std::vector /// @tparam Container Container type of argument (e.g. std::vector) /// @return a pair of mean and standard deviation (of type Container::value_type) template constexpr auto calculateMeanStddev(Container v) -> std::pair; /** * @brief Compute the 95% confidence interval for a given number of samples n * and the mean and standard deviation of the measurements. * * Compute it as \f$mean - c(n) * stddev, mean + c(n) * stddev\f$, where * \f$c(n)\f$ is the n-th entry in the two tails T distribution table. For \f$n > 30\f$, * it is assumed that \f$n = 1.96\f$. * * @param n Number of of samples * @param mean mean of samples * @param stddev standard deviation of samples * @return pair of lower and upper bound of 95% confidence interval */ std::pair confidenceInterval95(std::size_t n, data_t mean, data_t stddev); /** * @brief Compute the Relative Error between two given signals. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal */ data_t relativeError(DataContainer dc1, DataContainer dc2); /** * @brief Compute the Peak Signal-to-Noise Ratio. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal * @param dataRange The data range of the signals (distance between minimum and maximum * possible values). */ data_t peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2, data_t dataRange); /** * @brief Compute the Peak Signal-to-Noise Ratio. * * @param dc1 DataContainer signal * @param dc2 DataContainer signal */ data_t peakSignalToNoiseRatio(DataContainer dc1, DataContainer dc2); }; } // namespace elsa
 ... ... @@ -30,4 +30,3 @@ ELSA_DOCTEST(DataContainerFormatter) ELSA_DOCTEST(CartesianIndices) ELSA_DOCTEST(Bessel) ELSA_DOCTEST(Math) ELSA_DOCTEST(Statistics)
 #include "doctest/doctest.h" #include "Math.hpp" #include "VolumeDescriptor.h" #include TEST_SUITE_BEGIN("Math"); ... ... @@ -62,4 +65,54 @@ TEST_CASE("Math::heaviside") } } TEST_CASE_TEMPLATE("Math: Testing the statistics", TestType, float, double) { GIVEN("a DataContainer") { IndexVector_t sizeVector(2); sizeVector << 2, 4; VolumeDescriptor volDescr(sizeVector); Vector_t vect1(volDescr.getNumberOfCoefficients()); vect1 << 4, 2, 0.7f, 1, 0, 9, 53, 8; Vector_t vect2(volDescr.getNumberOfCoefficients()); vect2 << 5, 1, 6, 12, 22, 23, 9, 9; DataContainer dataCont1(volDescr, vect1); DataContainer dataCont2(volDescr, vect2); WHEN("running the Mean Squared Error") { auto meanSqErr = statistics::meanSquaredError(dataCont1, dataCont2); THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(meanSqErr, 346.01125f)); } } WHEN("running the Relative Error") { auto relErr = statistics::relativeError(dataCont1, dataCont2); THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(relErr, 1.4157718f)); } } WHEN("running the Peak Signal-to-Noise Ratio") { auto dataRange = static_cast(255); auto psnr = statistics::peakSignalToNoiseRatio(dataCont1, dataCont2, dataRange); auto expectedpsnr = static_cast(22.73990); THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(psnr, expectedpsnr)); } } } } TEST_SUITE_END();
 /** * @file test_Statistics.cpp * * @brief Tests for Statistics header * * @author Andi Braimllari */ #include "Statistics.h" #include "DataContainer.h" #include "VolumeDescriptor.h" #include "doctest/doctest.h" #include using namespace elsa; using namespace doctest; TEST_SUITE_BEGIN("core"); // TODO do the Statistics' tests here TEST_CASE_TEMPLATE("Statistics: Testing the metrics", TestType, float, double) { GIVEN("a DataContainer") { IndexVector_t sizeVector(2); sizeVector << 2, 4; VolumeDescriptor volDescr(sizeVector); DataContainer dataCont1(volDescr); dataCont1 = 5; DataContainer dataCont2(volDescr); dataCont2 = 8; WHEN("running the Relative Error") { Statistics statistics; long double relErr = relativeError(dataCont1, dataCont2); THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(relErr, 3.0 / 8)); } } WHEN("running the Peak Signal-to-Noise Ratio") { long double psnr = peakSignalToNoiseRatio(dataCont1, dataCont2, 255); THEN("it produces the correct result") { REQUIRE_UNARY(checkApproxEq(psnr, 38.58837)); } } } } TEST_SUITE_END();
 #pragma once #include "Statistics.h" #include "Math.hpp" #include "SetupHelpers.h" #include "NoiseGenerators.h" #include "LoggingHelpers.h" ... ...
 #include "elsa.h" #include "LutProjector.h" #include "Utilities/Statistics.h" #include "IO.h" #include "spdlog/fmt/bundled/core.h" ... ...
Markdown is supported
0% or