Commit 319eaaa3 authored by Edwin Carlinet's avatar Edwin Carlinet
Browse files

Add dilation implementation.

parent d3919038
Pipeline #16149 passed with stages
in 10 minutes and 41 seconds
......@@ -66,6 +66,7 @@ target_sources(Pylene PRIVATE
src/morpho/component_tree.cpp
src/morpho/hvector.cpp
src/morpho/hvector_unbounded.cpp
src/morpho/running_max_vertical2d.cpp
)
# Compiler configurations
......
......@@ -138,6 +138,13 @@ namespace mln
/// \brief
mln::experimental::box2d compute_output_region(mln::experimental::box2d roi) const;
/// \brief
bool is_horizontal() const noexcept;
/// \brief
bool is_vertical() const noexcept;
private:
mln::experimental::point2d m_delta;
int m_k;
......
......@@ -37,10 +37,19 @@ namespace mln::morpho::experimental
namespace details
{
struct sup_t
{
template <class T>
T operator() (T x, T y) const { return mln::sup(x,y); }
template <class T>
std::experimental::simd<T> operator() (std::experimental::simd<T> x, std::experimental::simd<T> y) const { return std::experimental::max(x,y); }
};
template <class V>
struct dilation_value_set_base
{
static inline constexpr auto sup = [](V a, V b) { return mln::sup(a, b); };
static inline constexpr sup_t sup = {};
static inline constexpr auto zero = mln::value_traits<V>::inf();
};
......@@ -52,6 +61,7 @@ namespace mln::morpho::experimental
static inline constexpr auto accu_sup = mln::accu::accumulators::sup<V>{};
};
template <class V>
struct dilation_value_set<V, std::enable_if_t<std::is_integral_v<V> && (value_traits<V>::quant <= 16)>>
: dilation_value_set_base<V>
......
......@@ -37,10 +37,20 @@ namespace mln::morpho::experimental
namespace details
{
struct inf_t
{
template <class T>
T operator() (T x, T y) const { return mln::inf(x,y); }
template <class T>
std::experimental::simd<T> operator() (std::experimental::simd<T> x, std::experimental::simd<T> y) const { return std::experimental::min(x,y); }
};
template <class V>
struct erosion_value_set_base
{
static inline constexpr auto sup = [](V a, V b) { return mln::inf(a, b); };
static inline constexpr inf_t sup = {};
static inline constexpr auto zero = mln::value_traits<V>::sup();
};
......
......@@ -6,6 +6,7 @@
#include <mln/core/se/periodic_line2d.hpp>
#include <mln/core/trace.hpp>
#include <mln/morpho/experimental/private/running_max_1d.hpp>
#include <mln/morpho/experimental/private/dilation_vertical_block2d.hpp>
#include <mln/core/canvas/private/traverse2d.hpp>
......@@ -54,18 +55,21 @@ namespace mln::morpho::details
}
// Generic implementation
template <class I, class J, class BinaryFunction>
void dilation_by_periodic_line(I& in, J& out,
const mln::experimental::se::periodic_line2d& line,
BinaryFunction sup,
mln::experimental::box2d roi)
void dilation_by_periodic_line_generic(I& in, J& out,
const mln::experimental::se::periodic_line2d& line,
BinaryFunction sup,
mln::experimental::box2d roi)
{
using V = image_value_t<I>;
int k = line.repetition();
auto period = line.period();
// Some sanity check
{
assert(period.y() >= 0);
......@@ -95,4 +99,56 @@ namespace mln::morpho::details
mln::canvas::details::traverse_along_direction(roi, period, fun);
}
// Dispatch to generic version
template <class I, class J, class BinaryFunction>
void dilation_by_periodic_line(I& in, J& out,
const mln::experimental::se::periodic_line2d& line,
BinaryFunction sup,
mln::experimental::box2d roi)
{
dilation_by_periodic_line_generic(in, out, line, sup, roi);
}
template <class I, class T, class BinaryFunction>
std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>>
dilation_by_periodic_line(I& in, mln::experimental::image2d<T>& out,
const mln::experimental::se::periodic_line2d& line, BinaryFunction sup,
mln::experimental::box2d roi)
{
int k = line.repetition();
[[maybe_unused]] auto period = line.period();
// Some sanity check
{
assert(period.y() >= 0);
assert(out.domain().includes(roi));
assert(in.domain().includes(roi));
}
// Specialization for vertical line
if (line.is_vertical())
{
mln_entering("Running specialization for vertical dilation over 2d buffer with arithmetic types");
mln::morpho::experimental::details::vertical_running_max_algo_t<T, BinaryFunction> alg(sup);
alg.running_max_v2d(in, out, roi, k, true);
return;
}
else if (line.is_horizontal())
{
mln_entering("Running specialization for horizontal dilation over 2d buffer with arithmetic types");
mln::morpho::experimental::details::vertical_running_max_algo_t<T, BinaryFunction> alg(sup);
alg.running_max_h2d(in, out, roi, k, true);
return;
}
dilation_by_periodic_line_generic(in, out, line, sup, roi);
}
} // namespace mln::morpho::internal
#pragma once
#include <experimental/simd>
#include <cassert>
#include <range/v3/functional/concepts.hpp>
#include <mln/core/image/experimental/ndimage_fwd.hpp>
#include <mln/core/box.hpp>
namespace mln::morpho::experimental::details
{
class vertical_running_max_algo_base_t
{
// Accumulate the supremum column-wise (eq to the python A.cumsum(axis=0))
virtual void partial_sum_block2d(const std::byte* __restrict in, std::byte* __restrict out, int width, int height,
std::ptrdiff_t in_byte_stride, std::ptrdiff_t out_byte_stride) = 0;
// Apply PW OUT[x] = SUP(A[x], B[x])
virtual void apply_sup(std::byte* __restrict A, std::byte* __restrict B, std::byte* __restrict OUT, std::size_t n) = 0;
virtual int get_block_width() const = 0;
virtual std::size_t get_sample_size() const = 0;
public:
// Apply the running max algorithm over a block
// Memory has already been allocated
void running_max_block2d(std::byte* f, std::byte* g, std::byte* h, int width, int height, std::ptrdiff_t f_byte_stride,
std::ptrdiff_t g_byte_stride, std::ptrdiff_t h_byte_stride, int k, bool use_extension);
// Running the 2D dilation vertically using tiling
template <class I, class T>
void running_max_v2d(I& in, mln::experimental::image2d<T>& out, mln::experimental::box2d roi, int k, bool use_extension);
// Running the 2D dilation vertically using tiling
template <class I, class T>
void running_max_h2d(I& in, mln::experimental::image2d<T>& out, mln::experimental::box2d roi, int k, bool use_extension);
};
template <class T, class BinaryFunction>
class vertical_running_max_algo_t : public vertical_running_max_algo_base_t
{
using simd_t = std::experimental::simd<T>;
static constexpr int WARP_SIZE = simd_t::size();
static constexpr int BLOCK_WIDTH = WARP_SIZE * 4;
static_assert(::ranges::regular_invocable<BinaryFunction, simd_t, simd_t>);
static_assert(::ranges::regular_invocable<BinaryFunction, T, T>);
void apply_sup(std::byte* __restrict A, std::byte* __restrict B, std::byte* __restrict OUT, std::size_t n) final;
void partial_sum_block2d(const std::byte* __restrict in, std::byte* __restrict out, int width, int height,
std::ptrdiff_t in_byte_stride, std::ptrdiff_t out_byte_stride) final;
int get_block_width() const final { return BLOCK_WIDTH; }
std::size_t get_sample_size() const final { return sizeof(T); }
BinaryFunction m_sup;
public:
vertical_running_max_algo_t(BinaryFunction sup)
: m_sup{std::move(sup)}
{
}
};
template <class T, class BinaryFunction>
void vertical_running_max_algo_t<T, BinaryFunction>::apply_sup(std::byte* __restrict A, std::byte* __restrict B, std::byte* __restrict out, std::size_t n)
{
std::transform((T*)A, (T*)A + n, (T*)B, (T*)out, m_sup);
}
template <class T, class BinaryFunction>
void vertical_running_max_algo_t<T, BinaryFunction>::partial_sum_block2d(const std::byte* __restrict in, std::byte* __restrict out, int width, int height, std::ptrdiff_t in_byte_stride, std::ptrdiff_t out_byte_stride)
{
using simd_t = std::experimental::simd<T>;
constexpr int MAX_WARP_COUNT = BLOCK_WIDTH / WARP_SIZE;
const int K = width / WARP_SIZE;
const int rem = width % WARP_SIZE;
//fmt::print("BLOCK_WIDTH={} width={}\n", BLOCK_WIDTH, width);
//fmt::print("WARP_SIZE={}\n", WARP_SIZE);
assert(width <= BLOCK_WIDTH);
std::memcpy(out, in, sizeof(T) * width);
// By block
if (K > 0)
{
simd_t xsum[MAX_WARP_COUNT];
for (int k = 0; k < K; k++)
xsum[k].copy_from((T*)in + k * WARP_SIZE, std::experimental::element_aligned);
// Next lines
for (int y = 1; y < height; ++y)
{
const T* in_lineptr = (const T*)(in + y * in_byte_stride);
T* out_lineptr = (T*)(out + y * out_byte_stride);
for (int k = 0; k < K; k++)
{
simd_t v;
v.copy_from(in_lineptr + k * WARP_SIZE, std::experimental::element_aligned);
xsum[k] = m_sup(xsum[k], v);
xsum[k].copy_to(out_lineptr + k * WARP_SIZE, std::experimental::element_aligned);
}
}
}
if (rem > 0)
{
in += K * WARP_SIZE * sizeof(T);
out += K * WARP_SIZE * sizeof(T);
T xsum[WARP_SIZE];
std::memcpy(xsum, in, sizeof(T) * rem);
for (int y = 1; y < height; ++y)
{
const T* in_lineptr = (const T*)(in + y * in_byte_stride);
T* out_lineptr = (T*)(out + y * out_byte_stride);
for (int c = 0; c < rem; ++c)
{
xsum[c] = m_sup(xsum[c], in_lineptr[c]);
out_lineptr[c] = xsum[c];
}
}
}
}
template <class I, class T>
[[gnu::noinline]] void copy_block(I& in, mln::experimental::box2d roi, T* __restrict out, std::ptrdiff_t out_stride)
{
const int x0 = roi.tl().x();
const int y0 = roi.tl().y();
for (int y = 0; y < roi.height(); ++y)
{
T* lineptr = out + y * out_stride;
for (int x = 0; x < roi.width(); ++x)
lineptr[x] = in.at({x0 + x, y0 + y});
}
}
template <class I, class T>
[[gnu::noinline]] void copy_block(T* __restrict in, std::ptrdiff_t istride, mln::experimental::box2d roi, I& out)
{
const int x0 = roi.tl().x();
const int y0 = roi.tl().y();
for (int y = 0; y < roi.height(); ++y)
{
const T* lineptr = in + y * istride;
for (int x = 0; x < roi.width(); ++x)
out.at({x0 + x, y0 + y}) = lineptr[x];
}
}
template <class I, class T>
[[gnu::noinline]] void transpose_block2d(I& in, mln::experimental::box2d input_roi, T* __restrict out, std::ptrdiff_t out_stride)
{
const int x0 = input_roi.tl().x();
const int y0 = input_roi.tl().y();
for (int y = 0; y < input_roi.height(); ++y)
for (int x = 0; x < input_roi.width(); ++x)
*(out + x * out_stride + y) = in.at({x0 + x, y0 + y});
}
template <class I, class T>
[[gnu::noinline]] void transpose_block2d(T* __restrict in, std::ptrdiff_t istride, mln::experimental::box2d output_roi, I& out)
{
const int x0 = output_roi.tl().x();
const int y0 = output_roi.tl().y();
for (int y = 0; y < output_roi.height(); ++y)
for (int x = 0; x < output_roi.width(); ++x)
out.at({x0 + x, y0 + y}) = *(in + x * istride + y);
}
template <class I, class T>
void vertical_running_max_algo_base_t::running_max_v2d(I& in, mln::experimental::image2d<T>& out, mln::experimental::box2d roi, int k, bool use_extension)
{
int kBlockWidth = this->get_block_width();
auto sz = this->get_sample_size();
std::ptrdiff_t kBlockByteSize = sz * kBlockWidth;
const int x0 = roi.tl().x();
const int y0 = roi.tl().y();
const int y1 = roi.br().y();
const int width = roi.width();
const int height = roi.height();
std::byte* f = (std::byte*) std::malloc(sz * kBlockWidth * (height + 2 * k));
std::byte* g = (std::byte*) std::malloc(sz * kBlockWidth * (height + 2 * k));
std::byte* h = (std::byte*) std::malloc(sz * kBlockWidth * (height + 2 * k));
for (int x = 0; x < width; x += kBlockWidth)
{
int w = std::min(kBlockWidth, width - x);
// Copy the block
{
mln::experimental::box2d region = {{x + x0, y0 - k}, {x + x0 + w, y1 + k}};
copy_block(in, region, (T*)f, kBlockWidth);
}
this->running_max_block2d(f + k * kBlockByteSize, //
g + k * kBlockByteSize, //
h + k * kBlockByteSize, //
w, height, kBlockByteSize, kBlockByteSize, kBlockByteSize, k, use_extension);
// Copy back
{
mln::experimental::box2d region = {{x + x0, y0}, {x + x0 + w, y1}};
copy_block((T*)f + kBlockWidth * k, kBlockWidth, region, out);
}
}
std::free(f);
std::free(g);
std::free(h);
}
template <class I, class T>
void vertical_running_max_algo_base_t::running_max_h2d(I& in, mln::experimental::image2d<T>& out, mln::experimental::box2d roi, int k, bool use_extension)
{
int kBlockWidth = this->get_block_width();
auto sz = this->get_sample_size();
std::ptrdiff_t kBlockByteSize = sz * kBlockWidth;
const int x0 = roi.tl().x();
const int y0 = roi.tl().y();
const int x1 = roi.br().x();
const int width = roi.width();
const int height = roi.height();
std::byte* f = (std::byte*) std::malloc(sz * kBlockWidth * (width + 2 * k));
std::byte* g = (std::byte*) std::malloc(sz * kBlockWidth * (width + 2 * k));
std::byte* h = (std::byte*) std::malloc(sz * kBlockWidth * (width + 2 * k));
for (int y = 0; y < height; y += kBlockWidth)
{
int H = std::min(kBlockWidth, height - y);
// Copy the block
{
mln::experimental::box2d region = {{x0 - k, y0 + y}, {x1 + k, y0 + y + H}};
transpose_block2d(in, region, (T*)f, kBlockWidth);
}
this->running_max_block2d(f + k * kBlockByteSize, //
g + k * kBlockByteSize, //
h + k * kBlockByteSize, //
H, width, kBlockByteSize, kBlockByteSize, kBlockByteSize, k, use_extension);
// Copy back
{
mln::experimental::box2d region = {{x0, y0 + y}, {x1, y0 + y + H}};
transpose_block2d((T*)f + kBlockWidth * k, kBlockWidth, region, out);
}
}
std::free(f);
std::free(g);
std::free(h);
}
} // namespace mln::morpho::details
......@@ -14,9 +14,9 @@ namespace mln::morpho::experimental::details
/// g[x] = Max f[y] y ∈ [⌊x/α⌋*α:x]
/// h[x] = Max f[y] y ∈ [x:(⌊x/α⌋+1)*α)
///
/// \param[in,out] f Input array of size \p n + 2.k
/// \param[in,out] g Temporary array of size \p n + 2.k
/// \param[in,out] h Temporary array of size \p n + 2.k
/// \param[in,out] f Input array of size \p n + 2.k. Must be valid on the range [-k, n+k]
/// \param[in,out] g Temporary array of size \p n + 2.k. Must be valid on the range [-k, n+k]
/// \param[in,out] h Temporary array of size \p n + 2.k. Must be valid on the range [-k, n+k]
/// \param[in] n Size of the array
/// \param[in] k Radius of the sliding windows
/// \param[in] sup Supremum operator
......
......@@ -47,6 +47,11 @@ namespace mln::experimental::se
return m_k * std::max(std::abs(m_delta.x()), std::abs(m_delta.y()));
}
bool periodic_line2d::is_horizontal() const noexcept { return m_delta.y() == 0; }
bool periodic_line2d::is_vertical() const noexcept { return m_delta.x() == 0; }
mln::experimental::box2d periodic_line2d::compute_input_region(mln::experimental::box2d roi) const
{
int dx = std::abs(m_delta.x()) * m_k;
......
#include <mln/morpho/experimental/private/dilation_vertical_block2d.hpp>
#include <memory>
namespace mln::morpho::experimental::details
{
void vertical_running_max_algo_base_t::running_max_block2d(std::byte* __restrict f, std::byte* __restrict g, std::byte* __restrict h,
int width, int height, std::ptrdiff_t f_byte_stride,
std::ptrdiff_t g_byte_stride, std::ptrdiff_t h_byte_stride,
int k, bool use_extension)
{
assert(width <= this->get_block_width());
const int alpha = 2 * k + 1;
{
int chunk_start = use_extension ? -k : 0;
int rem = use_extension ? (height + 2 * k) : height;
for (; rem > 0; chunk_start += alpha, rem -= alpha)
{
int chunk_size = std::min(rem, alpha);
// Forward pass
// Compute g[x] = Max f(y), y ∈ [α * ⌊x / α⌋ : x]
this->partial_sum_block2d(f + chunk_start * f_byte_stride, //
g + chunk_start * g_byte_stride, //
width, chunk_size, f_byte_stride, g_byte_stride);
// Backward pass
// Compute h[x] = Max f(y) y ∈ [x : α * (⌊x/α⌋+1))
this->partial_sum_block2d(f + (chunk_start + chunk_size - 1) * f_byte_stride, //
h + (chunk_start + chunk_size - 1) * h_byte_stride, //
width, chunk_size, -f_byte_stride, -h_byte_stride);
}
}
// Compute local maximum out[x] = Max f(y) y ∈ [x-k:x+k]
// out[x] = Max (Max f[x-k:b), Max f[b:x+k]) with b = α.⌈(x-k)/α⌉ = α.⌊(x+k)/α⌋
// = Max( h[x-k], g[x+k] )
{
for (int i = 0; i < height; ++i)
{
this->apply_sup(h + (i - k) * h_byte_stride, //
g + (i + k) * g_byte_stride, //
f + i * f_byte_stride, static_cast<std::size_t>(width));
}
}
}
} // namespace mln::morpho::experimental::details
......@@ -3,7 +3,8 @@ set(test_prefix "UTMorpho_")
add_subdirectory(datastruct)
add_subdirectory(alphatree)
add_core_test(${test_prefix}running_max_1d running_max_1d.cpp)
add_core_test(${test_prefix}running_max_1d running_max_1d.cpp)
add_core_test(${test_prefix}running_max_vertical_2d running_max_vertical_2d.cpp)
add_core_test(${test_prefix}saturate saturate.cpp)
add_core_test(${test_prefix}dilate dilate.cpp)
add_core_test(${test_prefix}erode erode.cpp)
......
......@@ -21,6 +21,22 @@
using namespace mln;
struct sup_t
{
template <class T>
T operator()(T x, T y) const
{
return mln::sup(x, y);
}
template <class T>
std::experimental::simd<T> operator()(std::experimental::simd<T> x, std::experimental::simd<T> y) const
{
return std::experimental::max(x, y);
}
};