Commit 667167f7 authored by Guillaume Lazzara's avatar Guillaume Lazzara
Browse files

src/content_in_doc.cc: New example extracting document content.

parent f1489987
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* src/content_in_doc.cc: New example extracting document content.
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr> 2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* scribo/toolchain/text_in_doc.hh: Make use of non visible * scribo/toolchain/text_in_doc.hh: Make use of non visible
......
...@@ -85,6 +85,21 @@ if HAVE_TESSERACT ...@@ -85,6 +85,21 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \ $(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) $(MAGICKXX_LDFLAGS)
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
-I/home/lazzara/git/oln/scribo/sandbox/green/ \
-I/home/lazzara/git/oln/scribo/sandbox/z/ \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
$(MAGICKXX_CPPFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
-lpthread
endif HAVE_TESSERACT endif HAVE_TESSERACT
endif HAVE_MAGICKXX endif HAVE_MAGICKXX
......
// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#include <libgen.h>
#include <fstream>
#include <iostream>
#include <mln/core/image/image2d.hh>
#include <mln/core/alias/neighb2d.hh>
#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
#include <mln/value/label_8.hh>
#include <mln/core/var.hh>
#include <mln/accu/count_value.hh>
#include <mln/draw/box_plain.hh>
#include <scribo/toolchain/text_in_doc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
#include <scribo/core/line_set.hh>
#include <scribo/debug/usage.hh>
#include <scribo/make/debug_filename.hh>
#include <scribo/primitive/extract/elements.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/preprocessing/crop.hh>
#include <scribo/io/xml/save.hh>
#include <scribo/io/text_boxes/save.hh>
const char *args_desc[][2] =
{
{ "input.*", "An image." },
{ "out.xml", "Result of the document analysis." },
{ "denoise_enabled", "1 enables denoising, 0 disables it. (enabled by default)" },
{ "pmin_row", "Row index of the top left corner of the Region of interest." },
{ "pmin_col", "Col index of the top left corner of the Region of interest." },
{ "pmax_row", "Row index of the bottom right corner of the Region of interest." },
{ "pmax_col", "Col index of the bottom right corner of the Region of interest." },
{ "debug_dir", "Output directory for debug image" },
{0, 0}
};
int main(int argc, char* argv[])
{
using namespace scribo;
using namespace mln;
if (argc != 3 && argc != 4 && argc != 5 && argc != 8 && argc != 9)
return scribo::debug::usage(argv,
"Find text lines and elements in a document",
"input.* out.xml <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <debug_dir>",
args_desc);
bool debug = false;
// Enable debug output.
if (argc == 5 || argc == 9)
{
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
debug = true;
}
trace::entering("main");
typedef image2d<scribo::def::lbl_type> L;
scribo::document<L> doc(argv[1]);
// Preprocess document
image2d<bool>
input = toolchain::text_in_doc_preprocess(doc.image(), false);
// Optional Cropping
point2d crop_shift = literal::origin;
if (argc >= 8)
{
mln::def::coord
minr = atoi(argv[4]),
minc = atoi(argv[5]),
maxr = atoi(argv[6]),
maxc = atoi(argv[7]);
box2d roi = mln::make::box2d(minr, minc, maxr, maxc);
input = preprocessing::crop_without_localization(input, roi);
crop_shift = point2d(minr, minc);
if (debug)
mln::io::pbm::save(input,
scribo::make::debug_filename("input_cropped.pbm"));
}
bool denoise = (argc > 3 && atoi(argv[3]) != 0);
// Run document toolchain.
// Text
std::cout << "Extracting text" << std::endl;
line_set<L>
lines = scribo::toolchain::text_in_doc(input, denoise, debug);
doc.set_text(lines);
// Elements
std::cout << "Extracting Elements" << std::endl;
component_set<L> elements = scribo::primitive::extract::elements(doc, input);
doc.set_elements(elements);
// Saving results
scribo::io::xml::save(doc, argv[2], true);
trace::exiting("main");
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment