Commit cd6accc2 authored by Celian GOSSEC's avatar Celian GOSSEC

Add content for March 25 (LT #2)

parent 9e00287b
include share/make/
include share/make/
TEXI2PDFFLAGS += --shell-escape
all: slides.pdf
${RM} slides.pdf* tmp*
@InProceedings{ levillain.14.ciarp,
author = {Roland Levillain and Thierry G\'eraud and Laurent Najman
and Edwin Carlinet},
title = {Practical Genericity: Writing Image Processing Algorithms
Both Reusable and Efficient},
booktitle = {Progress in Pattern Recognition, Image Analysis, Computer
Vision, and Applications -- Proceedings of the 19th
Iberoamerican Congress on Pattern Recognition (CIARP)},
address = {Puerto Vallarta, Mexico},
month = nov,
year = {2014},
pages = {70--79},
editor = {Eduardo Bayro and Edwin Hancock},
publisher = {Springer-Verlag},
series = {Lecture Notes in Computer Science},
volume = {8827},
lrdeprojects = {Olena},
abstract = {An important topic for the image processing and pattern
recognition community is the construction of open source
and efficient libraries. An increasing number of software
frameworks are said to be generic: they allow users to
write reusable algorithms compatible with many input image
types. However, this design choice is often made at the
expense of performance. We present an approach to preserve
efficiency in a generic image processing framework, by
leveraging data types features. Variants of generic
algorithms taking advantage of image types properties can
be defined, offering an adjustable trade-off between
genericity and efficiency. Our experiments show that these
generic optimizations can match dedicated code in terms of
execution times, and even sometimes perform better than
routines optimized by hand. Digital Topology software
should reflect the generality of the underlying
mathematics: mapping the latter to the former requires
genericity. By designing generic solutions, one can
effectively reuse digital topology data structures and
algorithms. We propose an image processing framework
focused on the Generic Programming paradigm in which an
algorithm on the paper can be turned into a single code,
written once and usable with various input types. This
approach enables users to design and implement new methods
at a lower cost, try cross-domain experiments and help
generalize results.},
keywords = {Generic Programming, Image Processing, Performance,
lrdepaper = {},
lrdeslides = {},
lrdenewsdate = {2014-09-10}
@InProceedings{ roynard.18.rrpr,
title = {An Image Processing Library in Modern {C++}: Getting
Simplicity and Efficiency with Generic Programming},
author = {Micha\"el Roynard and Edwin Carlinet and Thierry G\'eraud},
booktitle = {Proceedings of the 2nd Workshop on Reproducible Research
in Pattern Recognition (RRPR)},
year = {2018},
abstract = {As there are as many clients as many usages of an Image
Processing library, each one may expect different services
from it. Some clients may look for efficient and
production-quality algorithms, some may look for a large
tool set, while others may look for extensibility and
genericity to inter-operate with their own code base... but
in most cases, they want a simple-to-use and stable
product. For a C++ Image Processing library designer, it is
difficult to conciliate genericity, efficiency and
simplicity at the same time. Modern C++ (post 2011) brings
new features for library developers that will help
designing a software solution combining those three points.
In this paper, we develop a method using these facilities
to abstract the library components and augment the
genericity of the algorithms. Furthermore, this method is
not specific to image processing; it can be applied to any
C++ scientific library.}
author = {Malossi, A. Cristiano I. and Ineichen, Yves and Bekas, Costas and Curioni, Alessandro},
year = {2015},
month = {01},
pages = {},
title = {Fast Exponential Computation on SIMD Architectures},
doi = {10.13140/2.1.4362.3207}
author = {Wittenbrink, Craig and Somani, Arun},
year = {1993},
month = {01},
pages = {12-22},
title = {Cache tiling for high performance morphological image processing},
volume = {7},
journal = {Machine Vision and Applications},
doi = {10.1007/BF01212412}
\ No newline at end of file
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{footline} [frame number]
\date[25-03-2020]{Lightning Talk \#2: March 25, 2020}
\author{Celian \textsc{Gossec}}
\title[Parallelism in Pylene]{Introducing parallelism in a generic image
processing framework for fun \& performances}
\institute[LRDE]{LRDE\\\textit{Laboratoire de Recherche et Développement de l'EPITA}}
\frametitle{The context}
\structure{The library} \\
\textit{Pylene} is \emph{generic} image processing library written in C++
\footnote{\tiny Practical Genericity : Writing Image Processing Algorithms Both Reusable
and Efficient.R. Levillain et al., \textit{ICPR'14}.}
\footnote{\tiny An Image Processing Library in Modern C++: Getting Simplicity and Efficiency
with Generic Programming. M. Roynard, E. Carlinet, T. Géraud, \textit{RRPR'18}.} \\
It contains a wide variety of algorithms, grouped in three main categories:
\item Point-wise (PW) algorithms
\item Local algorithms
\item Global algorithms
\frametitle{The context - bis}
\structure{Performances} \\
Pylene as a library is already relatively fast but we know that it can be faster than even
OpenCV if we add parallelism and/or other speedup mechanics.
\footnote{\tiny Paper being worked on by M.Roynard \& E.Carlinet}
(Michaël and Edwin worked on benchmarks that showed a speedup coefficient of up to 4x using tiling
\footnote{\tiny Cache tiling for high performance morphological image processing. Wittenbrink, A. et al \textit{Machine Vision and Applications, 1993}.}
and SIMD)
\frametitle{Regarding performances - SIMD}
{\emph{SIMD}, or \emph{vectorization}, is the process of treating variables as part of a vector of data.
Instead of running instructions one by one, we run them all at the same time (limited by architectures).}
\frametitle{Regarding performances - Tiling}
{\emph{Tiling} is a technique in image processing where you process an image by pieces, rather than trying to fit it all in memory.}
\frametitle{Objective and problems}
\structure{Objective: improving our performances} \\
Improve the performances of the library through the aforementioned means.
The current short- and long-term points of interest:
\item Short term: work on PW algorithms
\item Longer term: work on local algorithms
\structure{Our main problem}
\item \textbf{Scalability and genericity have to be maintained}
\frametitle{The work done}
\item Designing a durable code pattern that would work with every pointwise algorithm.
\item Implementation (partly) of chosen design pattern as POC
\item Adding benchmarks and tests for what has been implemented
\frametitle{The design pattern}
\tikzset{land/.style={draw}, obj/.style={draw,fill=red!20}};
\tikzstyle{inheritance}=[->, >=open triangle 90, thick]
\tikzstyle{line}=[-, thick]
\tikzstyle{class}=[draw, fill=red!30, rectangle split, rectangle split parts=2]
\draw node[] (header) {\tiny{Header}} -- ++(6,0) node[] (src) {\tiny{Source code}};
\coordinate (midBar) at ($(header)!0.4!(src)$); % Mid way between both
\draw (midBar) -- ++(0,-5);
\node[obj] [below = 0.4cm of header] (includehpp) {\tiny \#include <mln/algorithms.hpp> };
\node[obj] [below = 0.1cm of src] (includecpp1) {\tiny \#include <mln/parallel\_pointwise.hpp>};
\node[obj] [below = 0.7cm of src] (includecpp2) {\tiny \#include <tbb.h>};
\node (canvas) [class, align=left] [below = 1.3cm of src]
\footnotesize ParallelPointwiseBase
\nodepart{second} \tiny operator()(mln::box2d tile) \{ this->execTile(this->gDomain); \} \\
\tiny virtual void execTile(t) const = 0; \\
\tiny virtual mln::box2d gDomain() const = 0;
\node (foreach) [class, align=left, anchor=north] [below = 1.3cm of header]
\footnotesize ForEachParallel<Img, Func>
\nodepart{second} \tiny box2d gDomain() \{ return Img.dom; \} \\
\tiny void execTile(t) \{ for\_each(t.elm, fun); \}
\node (transform) [class, align=left, anchor=north] [below = 3.4cm of header]
\footnotesize TransformParallel<Img, Img, Func>
\nodepart{second} \tiny box2d gDomain() \{ return Img.dom; \} \\
\tiny void execTile(t) \{ transform(t.elm, out.elm, fun); \}
\node[draw, fill=red!50] (exec) [below = 4cm of src, align=left]
\tiny parallel\_call(Base\* canvas) \{ \\
\tiny parallel\_for(canvas->size, *canvas) \\
\tiny \}
\draw[inheritance] (foreach) -- (canvas);
\draw[inheritance] (transform) -- (canvas);
\item soft dependancy on tbb
\item parallel\_for calls operator(), allows for algorithm-specific optimizations
\frametitle{Performances obtained}
for each pixel, increment value by 1 \\
Speedup negligible / no speedup (4.4G/s vs 4G/s) \footnotemark[5]
For each pixel, apply a gamma correction \\
(pixel = pixel**(1/2.2)) \\
Speedup 5x (175M/s vs 36M/s)
\footnote{\tiny All benchmarks ran on my lousy laptop}
\frametitle{Next steps}
\item Finishing implementation of PW algorithms
\item Thinking about the design pattern for the next step: local algorithms
\nocite{levillain.14.ciarp, roynard.18.rrpr, expsimd, tiling_performances}
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment