Commit b703a084 authored by Guillaume Lazzara's avatar Guillaume Lazzara
Browse files

Add support for loading multiple pages PDF files using libpoppler.

	* mln/io/pdf/get_header.hh,
	* mln/io/pdf/load.hh,
	* mln/tests/io/pdf/Makefile.am,
	* mln/tests/io/pdf/load.cc: New.

	* mln/tests/io/Makefile.am: Add pdf/ subdir.
parent b460ca00
2013-02-28 Guillaume Lazzara <z@lrde.epita.fr>
Add support for loading multiple pages PDF files using libpoppler.
* mln/io/pdf/get_header.hh,
* mln/io/pdf/load.hh,
* mln/tests/io/pdf/Makefile.am,
* mln/tests/io/pdf/load.cc: New.
* mln/tests/io/Makefile.am: Add pdf/ subdir.
2013-02-26 Guillaume Lazzara <z@lrde.epita.fr>
 
* tests/unit_test/unit-tests.mk: Update.
// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#ifndef MLN_IO_PDF_GET_HEADER_HH
# define MLN_IO_PDF_GET_HEADER_HH
/// \file
///
/// Load PDF header.
# include <iostream>
# include <fstream>
# include <mln/core/concept/image.hh>
# include <mln/core/routine/initialize.hh>
# include <mln/core/box_runstart_piter.hh>
# include <mln/core/pixel.hh>
# include <mln/data/memcpy_.hh>
# include <mln/util/array.hh>
namespace mln
{
namespace io
{
namespace pdf
{
/// Store pdf file header.
struct pdf_header
{
pdf_header(const poppler::document *doc);
int page_count;
int pdf_version_major;
int pdf_version_minor;
bool is_encrypted;
bool is_linearized;
bool is_locked;
std::string metadata;
};
/// Retrieve header in a pdf file.
pdf_header get_header(const std::string& filename);
# ifndef MLN_INCLUDE_ONLY
// Begin of pdf_header implementation.
pdf_header::pdf_header(const poppler::document *doc)
{
page_count = doc->pages();
doc->get_pdf_version(&pdf_version_major, 0);
doc->get_pdf_version(0, &pdf_version_minor);
is_encrypted = doc->is_encrypted();
is_linearized = doc->is_linearized();
is_locked = doc->is_locked();
metadata = doc->metadata().to_latin1();
}
// End of pdf_header implementation.
inline
pdf_header get_header(const std::string& filename)
{
trace::entering("mln::io::pdf::get_header");
// Load document
poppler::document *pdf = poppler::document::load_from_file(filename);
if (pdf == 0)
{
std::cerr << "Error: Cannot load PDF " << filename << std::endl;
abort();
}
// Initialize structure with pdf information.
pdf_header header(pdf);
// Clear pdf document.
delete pdf;
trace::exiting("mln::io::pdf::get_header");
return header;
}
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace mln::io::pdf
} // end of namespace mln::io
} // end of namespace mln
#endif // ! MLN_IO_PDF_GET_HEADER_HH
// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#ifndef MLN_IO_PDF_LOAD_HH
# define MLN_IO_PDF_LOAD_HH
/// \file
///
/// \brief Load a multiple page PDF document using libpoppler.
#include <iostream>
#include <mln/core/image/image2d.hh>
#include <poppler/cpp/poppler-document.h>
#include <poppler/cpp/poppler-page-renderer.h>
namespace mln
{
namespace io
{
namespace pdf
{
/**! Load a multiple page PDF document using libpoppler.
\param[out] arr An array of images.
\param[in] filename The name of the input file.
\param[in] dpi Document resolution.
Page numbering starts from 0.
*/
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
float dpi);
/**! Load a range of pages from a PDF document using libpoppler.
\param[out] arr An array of images.
\param[in] filename The name of the input file.
\param[in] first_page First page number.
\param[in] last_page Last page number.
\param[in] dpi Document resolution.
Page numbering starts from 0.
*/
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
int first_page, int last_page, float dpi);
/**! Load specific pages from a PDF document using libpoppler.
\param[out] arr An array of images.
\param[in] filename The name of the input file.
\param[in] pages An array of pages numbers.
\param[in] dpi Document resolution.
Page numbering starts from 0.
*/
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
util::array<int> pages, float dpi);
/**! Load a specific page from a PDF document using libpoppler.
\param[out] ima An image.
\param[in] filename The name of the input file.
\param[in] page The page number.
\param[in] dpi Document resolution.
Page numbering starts from 0.
*/
template <typename I>
void load(Image<I>& ima, const std::string& filename,
int page, float dpi);
/**! \overload Load a multiple page PDF document using libpoppler.
\param[out] arr An array of images.
\param[in] filename The name of the input file.
DPI resolution is set to 300.
Page numbering starts from 0.
*/
template <typename I>
void load(util::array<I>& arr, const std::string& filename);
/**! \overload Load specific pages from a PDF document using
libpoppler.
\param[out] arr An array of images.
\param[in] filename The name of the input file.
\param[in] pages An array of pages numbers.
DPI resolution is set to 300.
Page numbering starts from 0.
*/
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
util::array<int> pages);
# ifndef MLN_INCLUDE_ONLY
namespace internal
{
template <typename I>
void copy_data_argb32(I& ima, const poppler::image& pima)
{
mln_value(I) *vima = &ima(ima.domain().pmin());
const unsigned char *
nsites = (unsigned char *)pima.const_data()
+ pima.width() * pima.height() * 4;
unsigned next = 4;
for (const unsigned char *v = (unsigned char *)pima.const_data();
v < nsites; v += next, ++vima)
{
vima->red() = *(v + 2);
vima->green() = *(v + 1);
vima->blue() = *v;
}
}
template <typename I>
void copy_data_rgb24(I& ima, const poppler::image& pima)
{
mln_value(I) *vima = &ima(ima.domain().pmin());
const unsigned char *
nsites = (unsigned char *)pima.const_data()
+ pima.width() * pima.height() * 3;
unsigned next = 3;
for (const unsigned char *v = (unsigned char *)pima.const_data();
v < nsites; v += next, ++vima)
{
vima->red() = *v;
vima->green() = *(v + 1);
vima->blue() = *(v + 2);
}
}
template <typename I>
void copy_data(I& ima, const poppler::image& pima)
{
mln_precondition(ima.is_valid());
switch(pima.format())
{
case poppler::image::format_invalid:
case poppler::image::format_mono:
std::cout << "Loading black and white pdf is not implemented!";
abort();
break;
case poppler::image::format_rgb24:
copy_data_rgb24(ima, pima);
break;
case poppler::image::format_argb32:
copy_data_argb32(ima, pima);
break;
}
}
template <typename I>
void extract_page(const poppler::document* pdf, int i,
util::array<I>& arr, float dpi)
{
// Read page
poppler::page* p = pdf->create_page(i);
// Render Page
poppler::page_renderer pr;
poppler::image pima = pr.render_page(p, dpi, dpi);
// Prepare MLN image.
I ima(make::box2d(pima.height(), pima.width()), 0);
mln_assertion(ima.is_valid());
// Copy data
internal::copy_data(ima, pima);
// Append result
arr.append(ima);
}
} // end of namespace mln::io::pdf::internal
template <typename I>
void load(util::array<I>& arr, const std::string& filename, float dpi)
{
trace::entering("mln::io::pdf::load");
// Load document
poppler::document *pdf = poppler::document::load_from_file(filename);
if (pdf == 0)
{
std::cerr << "Error: Cannot load pdf " << filename << std::endl;
abort();
}
for (int i = 0; i < pdf->pages(); ++i)
internal::extract_page(pdf, i, arr, dpi);
delete pdf;
trace::exiting("mln::io::pdf::load");
}
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
int first_page, int last_page, float dpi)
{
trace::entering("mln::io::pdf::load");
// Load document
poppler::document *pdf = poppler::document::load_from_file(filename);
if (pdf == 0)
{
std::cerr << "Error: Cannot load PDF " << filename << std::endl;
abort();
}
if (first_page < 0 || first_page > (pdf->pages() - 1)
|| last_page < 0 || last_page > (pdf->pages() - 1))
{
std::cout << "Error while loading PDF: page range is not correct!"
<< std::endl;
abort();
}
for (int i = first_page; i <= last_page; ++i)
internal::extract_page(pdf, i, arr, dpi);
delete pdf;
trace::exiting("mln::io::pdf::load");
}
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
util::array<int> pages, float dpi)
{
trace::entering("mln::io::pdf::load");
// Load document
poppler::document *pdf = poppler::document::load_from_file(filename);
if (pdf == 0)
{
std::cerr << "Error: Cannot load PDF " << filename << std::endl;
abort();
}
for (unsigned i = 0; i < pages.size(); ++i)
{
if (pages[i] < 0 || pages[i] > pdf->pages() - 1)
{
std::cout << "Error while loading PDF: selected page "
<< pages[i] << " does not exist!" << std::endl;
abort();
}
internal::extract_page(pdf, pages[i], arr, dpi);
}
delete pdf;
trace::exiting("mln::io::pdf::load");
}
template <typename I>
void load(Image<I>& ima, const std::string& filename,
int page, float dpi)
{
util::array<I> arr;
load(arr, filename, page, page, dpi);
mln_assertion(arr.size() == 1);
exact(ima) = arr[0];
}
template <typename I>
void load(util::array<I>& arr, const std::string& filename)
{
load(arr, filename, 300);
}
template <typename I>
void load(util::array<I>& arr, const std::string& filename,
util::array<int> pages)
{
load(arr, filename, pages, 300);
}
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace mln::io::pdf
} // end of namespace mln::io
} // end of namespace mln
#endif // ! MLN_IO_PDF_LOAD_HH
# Copyright (C) 2007, 2008, 2009 EPITA Research and Development
# Copyright (C) 2007, 2008, 2009, 2013 EPITA Research and Development
# Laboratory (LRDE).
#
# This file is part of Olena.
......@@ -53,3 +53,7 @@ endif HAVE_MAGICKXX
if HAVE_TIFF
SUBDIRS += tiff
endif HAVE_TIFF
if HAVE_POPPLER
SUBDIRS += pdf
endif HAVE_POPPLER
\ No newline at end of file
# Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE).
#
# This file is part of Olena.
#
# Olena is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, version 2 of the License.
#
# Olena is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Olena. If not, see <http://www.gnu.org/licenses/>.
include $(top_srcdir)/milena/tests/tests.mk
AM_CPPFLAGS += $(POPPLER_CPPFLAGS)
AM_LDFLAGS = $(POPPLER_LDFLAGS)
check_PROGRAMS = \
load
TESTS = $(check_PROGRAMS)
// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#include <mln/core/image/image2d.hh>
#include <mln/data/compare.hh>
#include <mln/value/rgb8.hh>
#include <mln/io/ppm/load.hh>
#include <mln/io/pdf/load.hh>
#include "tests/data.hh"
int main()
{
using namespace mln;
image2d<value::rgb8> page0, page1, page2, page3;
io::ppm::load(page0, MLN_TESTS_IMG_DIR "/example-0.ppm");
io::ppm::load(page1, MLN_TESTS_IMG_DIR "/example-1.ppm");
io::ppm::load(page2, MLN_TESTS_IMG_DIR "/example-2.ppm");
io::ppm::load(page3, MLN_TESTS_IMG_DIR "/example-3.ppm");
// Loading full PDF.
{
util::array<image2d<value::rgb8> > arr;
io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 75);
mln_assertion(arr.size() == 4);
mln_assertion(arr[0] == page0);
mln_assertion(arr[1] == page1);
mln_assertion(arr[2] == page2);
mln_assertion(arr[3] == page3);
}
// Loading a page range
{
util::array<image2d<value::rgb8> > arr;
io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 1, 2, 75);
mln_assertion(arr.size() == 2);
mln_assertion(arr[0] == page1);
mln_assertion(arr[1] == page2);
}
// Loading specific pages.
{
util::array<int> pages;
pages.append(1);
pages.append(3);
util::array<image2d<value::rgb8> > arr;
io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", pages, 75);
mln_assertion(arr.size() == 2);
mln_assertion(arr[0] == page1);
mln_assertion(arr[1] == page3);
}
// Loading a specific page.
{
image2d<value::rgb8> ima;
io::pdf::load(ima, MLN_TESTS_IMG_DIR "/example.pdf", 3, 75);
mln_assertion(ima.is_valid());
mln_assertion(ima == page3);
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment