Commit 6fd93759 authored by Edwin Carlinet's avatar Edwin Carlinet
Browse files

Add padding functions.

(cherry picked from commit 2e1e8420)
parent beaf6c2e
......@@ -9,4 +9,5 @@
compile_commands.json
/venv_*/
/**/.DS_Store
.nfs*
\ No newline at end of file
.nfs*
.history
\ No newline at end of file
......@@ -123,6 +123,15 @@ BENCHMARK_DEFINE_F(BMMorpho, Dilation_EuclideanDisc_incremental)(benchmark::Stat
this->run(st, f);
}
BENCHMARK_DEFINE_F(BMMorpho, Dilation_EuclideanDisc_incremental_parallel)(benchmark::State& st)
{
int radius = st.range(0);
auto se = mln::se::disc(radius, mln::se::disc::EXACT);
auto f = [se](const image_t& input, image_t& output) { mln::morpho::parallel::dilation(input, se, output); };
this->run(st, f);
}
BENCHMARK_DEFINE_F(BMMorpho, Dilation_ApproximatedDisc)(benchmark::State& st)
{
int radius = st.range(0);
......@@ -132,6 +141,15 @@ BENCHMARK_DEFINE_F(BMMorpho, Dilation_ApproximatedDisc)(benchmark::State& st)
this->run(st, f);
}
BENCHMARK_DEFINE_F(BMMorpho, Dilation_ApproximatedDisc_parallel)(benchmark::State& st)
{
int radius = st.range(0);
auto se = mln::se::disc(radius, mln::se::disc::PERIODIC_LINES_8);
auto f = [se](const image_t& input, image_t& output) { mln::morpho::parallel::dilation(input, se, output); };
this->run(st, f);
}
BENCHMARK_DEFINE_F(BMMorpho, Dilation_Square)(benchmark::State& st)
{
......@@ -141,14 +159,24 @@ BENCHMARK_DEFINE_F(BMMorpho, Dilation_Square)(benchmark::State& st)
this->run(st, f);
}
BENCHMARK_DEFINE_F(BMMorpho, Dilation_Square_parallel)(benchmark::State& st)
{
int radius = st.range(0);
auto se = mln::se::rect2d(2 * radius + 1, 2 * radius + 1);
auto f = [se](const image_t& input, image_t& output) { mln::morpho::parallel::dilation(input, se, output); };
this->run(st, f);
}
constexpr int max_range = 128;
BENCHMARK_REGISTER_F(BMMorpho, Dilation_ApproximatedDisc)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_ApproximatedDisc_parallel)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_EuclideanDisc_naive)->RangeMultiplier(2)->Range(2, 16);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_EuclideanDisc_incremental)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_EuclideanDisc_incremental_parallel)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_Square)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_REGISTER_F(BMMorpho, Dilation_Square_parallel)->RangeMultiplier(2)->Range(2, max_range);
BENCHMARK_F(BMMorpho, Opening_Disc)(benchmark::State& st)
......
......@@ -34,7 +34,8 @@ class Pylene(ConanFile):
requires = [
"range-v3/0.10.0",
"fmt/6.0.0",
"tbb/2020.0"
"tbb/2020.0",
"xsimd/7.4.6"
]
......
Core Types
==========
.. toctree::
:hidden:
box
point
.. cpp:namespace:: mln
.. topic:: Fundamental types
.. table::
:class: full
:widths: auto
+----------------------------------------------+-------------------------------------------------------------+
| :cpp:class:`ndpoint` :cpp:class:`ndpointref` | Generic :doc:`point <point>` that hold *n* coordinates |
+----------------------------------------------+-------------------------------------------------------------+
| :cpp:class:`ndbox` :cpp:class:`ndboxref` | Generic :doc:`box <box>` in *n* dimension |
+----------------------------------------------+-------------------------------------------------------------+
Enumerated types
......
......@@ -42,6 +42,16 @@ Include :file:`<mln/morpho/dilation.hpp>`
:exception: N/A
.. cpp:namespace:: mln::morpho::parallel
.. cpp:function:: \
Image{I} image_concrete_t<I> dilation(I image, StructuringElement se)
void dilation(Image image, StructuringElement se, OutputImage out)
Parallel version of the dilation algorithm.
Notes
-----
......
......@@ -36,6 +36,15 @@ Include :file:`<mln/morpho/erosion.hpp>`
:exception: N/A
.. cpp:namespace:: mln::morpho::parallel
.. cpp:function:: \
Image{I} image_concrete_t<I> erosion(I image, StructuringElement se)
void erosion(Image image, StructuringElement se, OutputImage out)
Parallel version of the erosion algorithm.
Notes
-----
......
Tiling
######
A technique in image processing when rather than processing the entire image
at once, you instead split the image up in smaller pieces often called "tiles" which
you then process one by one before putting back into the image.
.. figure:: /figures/core/tiling.png
:align: center
Tiling concept
This process can allow better memory management, for instance by limiting what can
be loaded in order to never exceed memory limits.
In the case of parallel algorithms in Pylene, tiling is used to split the image into
different tiles before feeding those tiles to different threads.
\ No newline at end of file
......@@ -39,5 +39,27 @@ Usually, the first version allocates and forward to the second version. The prev
mln::io::imsave(output, "out.tiff");
Parallel algorithms
-------------------
Pylene supports multithreaded parallelism in some of its algorithms namely for the following pointwise algorithms.
The parallel versions of the algorithms are available in the namespace ``parallel::``.
Using the previous code snippet as an example we would get::
#include <mln/morpho/dilation.hpp>
#include <mln/core/se/disc.hpp>
#include <mln/io/imread.hpp>
#include <mln/io/imsave.hpp>
mln::image2d<uint8_t> input;
mln::io::imread("input.tiff", input);
auto output = mln::morpho::parallel::dilation(input, mln::se::disc(5));
mln::io::imsave(output, "out.tiff");
You can also get better performances by specifying appropriate tile sizes depending on the size of the input image (see :ref:doc:`Tiling and parallelism </tiling>`)
by adding a ``height`` and ``width`` parameter to the function call (N.B. those parameters do not need to be equal).
::
auto output = mln::morpho::parallel::dilation(input, mln::se::disc(5), 128, 128);
......@@ -4,14 +4,14 @@ Writing algorithms
Non-generic implementation
**************************
Even if Pylene have algorithms written in a generic way as much as possible, you do **not** have to write generic
algorithms yourself if do not need so (nevertheless, we will see that it is not that difficult to do so).
Even if Pylene has algorithms written in a generic way as much as possible, you do **not** have to write generic
algorithms yourself if it is unnecessary (nevertheless, we will see that it is not that difficult to do so).
Working on 2D/3D images
-----------------------
You can always use the two/three nested loop to iterate over all positions in the domain. You can access the value
of an :cpp:any:`mln::ndimage` at a given position `p=(x,y)` ou `p=(x,y,z)` with ``ima({x,y})`` or ``ima({x,y,z})``.
You can always use two/three nested loops to iterate over all positions in the domain. You can access the value
of an :cpp:any:`mln::ndimage` at a given position `p=(x,y)` or `p=(x,y,z)` with ``ima({x,y})`` or ``ima({x,y,z})``.
::
......@@ -63,7 +63,7 @@ pixels. For example, in 2D, you may want to use 4-connectivity or 8-connectivity
:ref:doc:`Neighborhoods and structuring elements </core/neighborhood>` are range generators. The expression `nbh(x)`
generates a set of points from an anchor (a point) `x`. The following snippet visit all :ref:doc:`4-connected
generates a set of points from an anchor (a point) `x`. The following snippet visits all :ref:doc:`4-connected
</core/neighborhood/c4>` neighbors of all points of an image::
for (int y = 0; y < roi.height(); ++y)
......@@ -173,10 +173,218 @@ A simple but not optimal generic implementation of a dilation would look like::
}
Writing parallel algorithms
***************************
Writing parallel pointwise algorithms
-------------------------------------
TODO
Writing parallel local algorithms
---------------------------------
Parallel algorithms should be implemented in the ``parallel`` namespace.
Parallel algorithms in Pylene work with tiles (see :ref:doc:`Tiling </tiling>`), and as such have a code structure
fit for tile operations. They are split up in three classes:
* ``TileLoader``, tasked with loading a tile into the class
* ``TileExecutor`` which executes a chosen algorithm on the loaded tile
* ``TileWriter`` which writes the tile back into the image, along with optional tile postprocessing.
In the following paragraphs, you will see code blocks detailing how to implement a parallel algorithm
followed by explanations of the code blocks.
::
template <class InputImage>
class TileLoader_MyAlgo : public TileLoaderBase
{
public:
/// \param width the extended width of the tile
/// \param height the extended height of the tile
TileLoader_MyAlgo(InputImage input, int tile_width, int tile_height)
: _in{input}
, m_tile{tile_width, tile_height}
{
}
TileLoader_MyAlgo(const TileLoader_MyAlgo& other)
: _in{other._in}
, m_tile{other.m_tile.width(), other.m_tile.height()}
{
}
TileLoader_MyAlgo& operator=(const TileLoader_MyAlgo&) = delete;
/// \param roi The tile region
/// \param input_roi The extended roi required to compute the tile region
void load_tile(mln::box2d input_roi) const final
{
assert(m_tile.width() >= input_roi.width() && "Tile width mismatches");
assert(m_tile.height() >= input_roi.height() && "Tile height mismatches");
m_tile.set_domain_topleft(input_roi.tl());
assert(m_tile.domain().includes(input_roi));
assert(m_tile.domain().tl() == input_roi.tl());
image_value_t<InputImage> padding_value = 0;
auto padding_method = mln::PAD_ZERO;
auto dst = m_tile.clip(input_roi);
copy_pad(_in, dst, padding_method, padding_value);
}
mln::ndbuffer_image get_tile() const final
{
return m_tile;
}
private:
InputImage _in;
mutable mln::image2d<image_value_t<InputImage>> m_tile;
};
Regarding ``TileLoader``
* The arguments of the constructor are the entire image we're processing as well as tile size parameters
* The inheritance from ``TileLoaderBase``: using class inheritance allows us to keep
the design generic. As such, this is mandatory.
* The ``load_tile`` function creates tiles based on a region of interest ``roi`` being the
part of the image we're currently processing. We use ``clip`` and ``copy_pad`` (see
:ref:doc:`Padding </core/pad>`) to first clip the ``roi`` in the input image and then
pad the sides of the clipped image as needed (either by taking neighboring values in the
input image when available, or by padding with zeroes if necessary).
::
template <class Image, class SE>
class TileExecutor_MyAlgo : public TileExecutorBase
{
public:
TileExecutor_MyAlgo(const SE& se)
: _se{se}
{
}
void execute(mln::ndbuffer_image in, mln::ndbuffer_image out) const final
{
assert(in.domain().includes(out.domain()));
auto in_image2d = *(in.cast_to<Image, 2>());
auto out_image2d = *(out.cast_to<Image, 2>());
MyAlgo(in_image2d, _se, out_image2d);
}
private:
const SE& _se;
};
Regarding the ``TileExecutor``:
* The only argument for this class is the structuring element (see :ref:doc:`Structuring elements </core/se>`) used for pixel neighborhood.
Other arguments, if any, can be created by the arguments given to ``execute``
* The inheritance from the base class ``TileExecutorBase`` is once again mandatory.
* The ``execute`` function executes a chosen function on an input given by the ``TileLoader`` and also needs an output tile given by the ``TileWriter``
::
template <class OutputImage>
class TileWriter_MyAlgo : public TileWriterBase
{
public:
TileWriter_MyAlgo(OutputImage out)
: _out{out}
{
}
void write_tile(mln::box2d roi) const final { (void)roi; }
mln::ndbuffer_image get_tile(mln::box2d roi) const final { return _out.clip(roi); }
private:
OutputImage _out;
};
Most of the time, the ``TileWriter`` isn't going to do much (or anything at all), so having a quasi-empty class
like this is going to be enough. When you need to do some postprocessing, the ``write_tile`` function should be adapted
accordingly.
As can be expected the only parameter than needs to be passed as argument here is the output image.
::
template <class InputImage, class SE, class OutputImage>
struct MyAlgoParallel : ParallelLocalCanvas2D
{
static_assert(std::is_same_v<image_value_t<InputImage>, image_value_t<OutputImage>>);
private:
MyAlgoParallel(InputImage& in, OutputImage& out, SE& se, mln::box2d roi, mln::box2d tile_dims)
: m_se{se}
, m_output_roi{roi}
, m_tile_l{in, tile_dims.width(), tile_dims.height()}
, m_tile_w{out}
, m_tile_e{se}
{}
public:
MyAlgoParallel(InputImage& in, OutputImage& out, SE& se, mln::box2d roi)
: MyAlgoParallel(in, out, se, roi, se.compute_input_region({TILE_WIDTH, TILE_HEIGHT}))
{
}
MyAlgoParallel(InputImage& in, OutputImage& out, SE& se, mln::box2d roi, int tile_width, int tile_height)
: MyAlgoParallel(in, out, se, roi, se.compute_input_region({tile_width, tile_height}))
{
ParallelLocalCanvas2D::TILE_WIDTH = tile_width;
ParallelLocalCanvas2D::TILE_HEIGHT = tile_height;
}
std::unique_ptr<ParallelLocalCanvas2D> clone() const final { return std::make_unique<MyAlgoParallel>(*this); }
mln::box2d GetOutputRegion() const noexcept final { return m_output_roi; }
mln::box2d ComputeInputRegion(mln::box2d roi) const noexcept final { return m_se.compute_input_region(roi); }
const TileLoaderBase* GetTileLoader() const noexcept final { return &m_tile_l; };
const TileWriterBase* GetTileWriter() const noexcept final { return &m_tile_w; };
const TileExecutorBase* GetTileExecutor() const noexcept final { return &m_tile_e; };
private:
using tile_loader_t = TileLoader_MyAlgo<InputImage>;
using tile_executor_t = TileExecutor_MyAlgo<image_value_t<InputImage>, SE>;
using tile_writer_t = TileWriter_MyAlgo<OutputImage>;
SE m_se;
mln::box2d m_output_roi;
tile_loader_t m_tile_l;
tile_writer_t m_tile_w;
tile_executor_t m_tile_e;
};
This is simply a wrapper around the writer, loader and executor classes for a given algorithm.
The resulting class is used as argument in the function ``sequential_execute_local2D`` (see below)
::
MyAlgo_returntype MyAlgo(args)
{
MyAlgoParallel caller(image, out, se, output_roi, tile_width, tile_height);
sequential_execute_local2D(caller);
}
* The final function definition is identical to non-parallel function signature, allowing for ease of use
when going from non-parallel to parallel with the only difference being the prefix of the function when
using it in parallel: ``parallel::``
* ``sequential_execute_local2D`` is a wrapper around ``tbb::parallel_for`` using the different classes defined previously.
The code executed in parallel through ``tbb::parallel_for`` is similar to the following
::
void ParallelLocalCanvas2D::ExecuteTile(mln::box2d roi) const
{
auto m_tile_l = this->GetTileLoader();
auto m_tile_w = this->GetTileWriter();
auto m_tile_e = this->GetTileExecutor();
mln::box2d input_roi = this->ComputeInputRegion(roi);
m_tile_l->load_tile(input_roi);
m_tile_e->execute(m_tile_l->get_tile(), m_tile_w->get_tile(roi));
m_tile_w->write_tile(roi);
}
\ No newline at end of file
......@@ -85,9 +85,11 @@ namespace fixtures::ImageCompare
comparison_flags, linecmp_fn, print_fn);
}
template <class ImageA, class ImageB>
::testing::AssertionResult compare(const mln::details::Image<ImageA>& f_, const mln::details::Image<ImageB>& g_,
int comparison_flags)
::testing::AssertionResult compare(const mln::details::Image<ImageA>& f_,
const mln::details::Image<ImageB>& g_, int comparison_flags)
{
ImageA f = static_cast<const ImageA&>(f_);
ImageB g = static_cast<const ImageB&>(g_);
......@@ -110,16 +112,32 @@ namespace fixtures::ImageCompare
return testing::AssertionFailure() << "The domains of A and B differ.";
}
bool pass = true;
{
auto zipped_vals = mln::ranges::view::zip(f.values(), g.values());
for (auto&& r : mln::ranges::rows(zipped_vals))
for (auto&& [f_v, g_v] : r)
if (f_v != g_v)
return testing::AssertionFailure() << "The values of image A and B differ";
pass = false;
}
if constexpr (std::is_same_v<f_domain_t, mln::box2d> && //
std::is_same_v<g_domain_t, mln::box2d> && //
fmt::internal::has_formatter<mln::image_value_t<ImageA>, fmt::format_context>() &&
fmt::internal::has_formatter<mln::image_value_t<ImageB>, fmt::format_context>())
{
if (!pass)
{
fmt::print("A = \n");
mln::io::imprint(f);
fmt::print("vs B = \n");
mln::io::imprint(g);
}
}
return ::testing::AssertionSuccess();
if (!pass)
return ::testing::AssertionFailure() << "The values of image A and B differ";
else
return ::testing::AssertionSuccess();
}
/// \}
......
......@@ -6,6 +6,7 @@ find_package(FreeImage REQUIRED)
find_package(TBB REQUIRED tbb)
find_package(range-v3 0.10.0 REQUIRED)
find_package(fmt 6.0 REQUIRED)
find_package(xsimd REQUIRED)
set(PYLENE_USE_TBB YES CACHE BOOL "Set to NO to disable use of TBB and parallelization")
......@@ -39,7 +40,7 @@ target_include_directories(Pylene PUBLIC
$<INSTALL_INTERFACE:include>)
target_include_directories(Pylene SYSTEM PUBLIC $<BUILD_INTERFACE:${Boost_INCLUDE_DIRS}>)
target_link_libraries(Pylene PUBLIC range-v3::range-v3)
target_link_libraries(Pylene PUBLIC range-v3::range-v3 xsimd::xsimd)
target_link_libraries(Pylene PUBLIC fmt::fmt)
target_link_libraries(Pylene PRIVATE FreeImage::FreeImage)
......@@ -55,6 +56,7 @@ target_sources(Pylene PRIVATE
src/core/ndbuffer_image.cpp
src/core/ndbuffer_image_data.cpp
src/core/padding.cpp
src/core/parallel_local.cpp
src/core/parallel_pointwise.cpp
src/core/se/disc.cpp
src/core/se/mask2d.cpp
......@@ -66,12 +68,14 @@ target_sources(Pylene PRIVATE
src/io/imprint.cpp
src/io/imread.cpp
src/io/io.cpp
src/morpho/block_running_max.cpp
src/morpho/component_tree.cpp
src/morpho/hvector.cpp
src/morpho/hvector_unbounded.cpp
src/morpho/immersion.cpp
src/morpho/maxtree.cpp
src/morpho/unionfind.cpp
src/morpho/filters2d.cpp
)
# Compiler configurations
......
......@@ -81,22 +81,13 @@ namespace mln
constexpr bool empty() const noexcept;
/// \brief Returns the width of the box
constexpr int width() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 1)
{
return size(0);
}
constexpr int width() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 1) { return size(0); }
/// \brief Returns the height of the box
constexpr int height() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 2)
{
return size(1);
}
constexpr int height() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 2) { return size(1); }
/// \brief Returns the depth of the box
constexpr int depth() const noexcept requires (Impl::ndim == -1 || Impl::ndim >= 3)
{
return size(2);
}
constexpr int depth() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 3) { return size(2); }
using typename Impl::cursor;
using typename Impl::backward_cursor;
......@@ -126,16 +117,13 @@ namespace mln
constexpr int x() const noexcept { return this->__begin(0); }
/// \brief Returns the y coordinate of the top-left corner point
template <int d = ndim, class = std::enable_if_t<d == -1 || d >= 2>>
constexpr int y() const noexcept { return this->__begin(1); }
constexpr int y() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 2) { return this->__begin(1); }
/// \brief Returns the z coordinate of the top-left corner point
template <int d = ndim, class = std::enable_if_t<d == -1 || d >= 3>>
constexpr int z() const noexcept { return this->__begin(2); }
constexpr int z() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 3) { return this->__begin(2); }
/// \brief Returns the w coordinate of the top-left corner point
template <int d = ndim, class = std::enable_if_t<d == -1 || d >= 4>>
constexpr int w() const noexcept { return this->__begin(3); }
constexpr int w() const noexcept requires(Impl::ndim == -1 || Impl::ndim >= 4) { return this->__begin(3); }
/// \brief Returns the k-th coordinate of the top-left corner point
......@@ -144,6 +132,34 @@ namespace mln
/// \brief Returns the k-th coordinate of the bottom-right corner point (past-the-end)
constexpr int br(int k) const noexcept { return this->__end(k); }
constexpr void shift(point_type diff) noexcept
{
for (int dim = 0; dim < ndim; ++dim)
{
this->__begin(dim) += diff[dim];
this->__end(dim) += diff[dim];
}
}
constexpr auto shifted(point_type diff) const noexcept
{
auto tmp = *this;
tmp.shift(diff);
return tmp;
}
/// \brief Transpose the box (inplace)
constexpr void transpose() noexcept requires(Impl::ndim == 2)
{
std::swap(this->__begin(0), this->__begin(1));
std::swap(this->__end(0), this->__end(1));
}
/// \brief Return the box transposed.
constexpr auto transposed() const noexcept requires(Impl::ndim == 2) { return mln::box2d{y(), x(), width(), height()}; }
/// \brief Returns the bottom-right (past-the-end) corner point
using Impl::br;
......@@ -703,6 +719,14 @@ namespace mln
this->_zeros();
}
// template <class Impl>
// constexpr void _box<Impl>::transpose() noexcept requires (Impl::dim == 2)
// {
// std::swap(this->__begin(0), this->__begin(1));
// std::swap(this->__end(0), this->__end(1));
// }