Commit cbbdf4e0 authored by Guillaume Lazzara's avatar Guillaume Lazzara
Browse files

Cleanup sample tools.

	* src/preprocessing/Makefile.am,
	* src/text/Makefile.am,
	* src/Makefile.am: Update.

	* src/multi_scale/Makefile.am,
	* src/multi_scale/find_lines.cc
	* src/text/pbm_recognition.cc
	* src/text/recognition.cc
	* src/text_in_article_preprocess.cc
	* src/text_in_doc.cc
	* src/text_in_doc_ppm.cc
	* src/text_in_photo.cc
	* src/text_in_photo_invert.cc
	* src/text_in_photo_pbm_fast.cc
	* src/text_in_photo_ppm.cc
	* test.cc: Remove. Deprecated.

	* src/text_in_article_pbm.cc: Rename as...
	* src/pbm_text_in_doc.cc: ... this. Make use of the new toolchain
	routines.

	* src/text/pbm_lines_recognition.cc,
	* src/text_in_doc_preprocess.cc: New.

	* src/text_in_photo_fast.cc: Rename as...
	* src/text_in_picture.cc: ... this.

	* src/preprocessing/subsample.cc: Make use of io::magick.
parent ff15d075
2010-05-25 Guillaume Lazzara <z@lrde.epita.fr>
Cleanup sample tools.
* src/preprocessing/Makefile.am,
* src/text/Makefile.am,
* src/Makefile.am: Update.
* src/multi_scale/Makefile.am,
* src/multi_scale/find_lines.cc
* src/text/pbm_recognition.cc
* src/text/recognition.cc
* src/text_in_article_preprocess.cc
* src/text_in_doc.cc
* src/text_in_doc_ppm.cc
* src/text_in_photo.cc
* src/text_in_photo_invert.cc
* src/text_in_photo_pbm_fast.cc
* src/text_in_photo_ppm.cc
* test.cc: Remove. Deprecated.
* src/text_in_article_pbm.cc: Rename as...
* src/pbm_text_in_doc.cc: ... this. Make use of the new toolchain
routines.
* src/text/pbm_lines_recognition.cc,
* src/text_in_doc_preprocess.cc: New.
* src/text_in_photo_fast.cc: Rename as...
* src/text_in_picture.cc: ... this.
* src/preprocessing/subsample.cc: Make use of io::magick.
2010-05-25 Guillaume Lazzara <z@lrde.epita.fr>
* toolchain/text_in_doc.hh: Introduce a new routine for extracting
......
......@@ -25,81 +25,51 @@ SUBDIRS = \
primitive \
filter \
misc \
multi_scale \
nuxeo_xwiki \
preprocessing \
table \
text
bin_PROGRAMS = \
pbm_lines_in_doc \
text_in_photo \
text_in_photo_ppm \
text_in_photo_pbm_fast \
text_in_photo_invert
pbm_lines_in_doc
pbm_lines_in_doc_SOURCES = pbm_lines_in_doc.cc
text_in_photo_SOURCES = text_in_photo.cc
text_in_photo_ppm_SOURCES = text_in_photo_ppm.cc
text_in_photo_pbm_fast_SOURCES = text_in_photo_pbm_fast.cc
text_in_photo_invert_SOURCES = text_in_photo_invert.cc
if HAVE_TESSERACT
if HAVE_TIFF
bin_PROGRAMS += text_in_doc
text_in_doc_SOURCES = text_in_doc.cc
text_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS)
text_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
-lpthread -lhpdf
bin_PROGRAMS += text_in_doc_ppm
text_in_doc_ppm_SOURCES = text_in_doc_ppm.cc
text_in_doc_ppm_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS)
text_in_doc_ppm_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
-lpthread -lhpdf
bin_PROGRAMS += text_in_article_pbm
text_in_article_pbm_CPPFLAGS = $(AM_CPPFLAGS) \
bin_PROGRAMS += pbm_text_in_doc
pbm_text_in_doc_CPPFLAGS = $(AM_CPPFLAGS) -g -ggdb \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS)
text_in_article_pbm_LDFLAGS = $(AM_LDFLAGS) \
pbm_text_in_doc_LDFLAGS = $(AM_LDFLAGS) -g -ggdb\
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
-lpthread
# -lhpdf
text_in_article_pbm_SOURCES = text_in_article_pbm.cc
pbm_text_in_doc_SOURCES = pbm_text_in_doc.cc
endif HAVE_TESSERACT
bin_PROGRAMS += text_in_article_preprocess
text_in_article_preprocess_CPPFLAGS = $(AM_CPPFLAGS) \
`Magick++-config --cppflags`
text_in_article_preprocess_LDFLAGS = $(AM_LDFLAGS) \
-lpthread `Magick++-config --libs`
text_in_article_preprocess_SOURCES = text_in_article_preprocess.cc
if HAVE_MAGICKXX
bin_PROGRAMS += text_in_doc_preprocess
text_in_doc_preprocess_CPPFLAGS = $(AM_CPPFLAGS) \
`Magick++-config --cppflags`
text_in_doc_preprocess_LDFLAGS = $(AM_LDFLAGS) \
-lpthread `Magick++-config --libs`
text_in_doc_preprocess_SOURCES = text_in_doc_preprocess.cc
bin_PROGRAMS += text_in_photo_fast
text_in_photo_fast_SOURCES = text_in_photo_fast.cc
text_in_photo_fast_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
`Magick++-config --cppflags`
text_in_photo_fast_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
-lpthread `Magick++-config --libs`
bin_PROGRAMS += text_in_picture
text_in_picture_SOURCES = text_in_picture.cc
text_in_picture_CPPFLAGS = $(AM_CPPFLAGS) \
`Magick++-config --cppflags`
endif HAVE_TIFF
endif HAVE_TESSERACT
text_in_picture_LDFLAGS = $(AM_LDFLAGS) \
-lpthread `Magick++-config --libs`
endif HAVE_MAGICKXX
# Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE).
#
# This file is part of Olena.
#
# Olena is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, version 2 of the License.
#
# Olena is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Olena. If not, see <http://www.gnu.org/licenses/>.
#
## Process this file through Automake to create Makefile.in.
include $(top_srcdir)/scribo/scribo.mk
bin_PROGRAMS = \
find_lines
find_lines_SOURCES = find_lines.cc
// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#include <mln/core/alias/neighb2d.hh>
#include <mln/core/image/image2d.hh>
#include <mln/data/convert.hh>
#include <mln/debug/superpose.hh>
#include <mln/io/pbm/all.hh>
#include <mln/io/ppm/save.hh>
#include <mln/pw/all.hh>
#include <mln/subsampling/subsampling.hh>
#include <mln/value/label_16.hh>
#include <mln/value/rgb8.hh>
#include <mln/world/binary_2d/enlarge.hh>
#include <scribo/debug/usage.hh>
#include <scribo/core/object_image.hh>
#include <scribo/primitive/extract/lines_h_pattern.hh>
#include <scribo/primitive/extract/lines_v_pattern.hh>
#include <scribo/filter/objects_h_thin.hh>
#include <scribo/filter/objects_v_thin.hh>
#include <sandbox/theo/Rd/sequential.hh>
#include <mln/morpho/erosion.hh>
const char *args_desc[][2] =
{
{ "input.pbm", "A binary image." },
{ "length", " Minimum line length." },
{0, 0}
};
namespace mln
{
template <typename I>
mln_concrete(I)
process(const I& input, const std::string& filename,
unsigned length, unsigned delta, unsigned ratio)
{
(void) filename;
I hlines = scribo::primitive::extract::lines_h_pattern(input,
length,
delta);
value::label_16 nhlines;
hlines = scribo::filter::objects_v_thin(hlines, c8(),
nhlines, delta * ratio);
// I vlines = scribo::primitive::extract::lines_v_pattern(input,
// length,
// delta);
// value::label_16 nvlines;
// vlines = scribo::filter::objects_h_thin(vlines, c8(),
// nvlines, delta * ratio);
// image2d<value::rgb8> out = debug::superpose(input, hlines, literal::red);
// out = debug::superpose(out, vlines, literal::green);
// io::ppm::save(out, filename);
return hlines;
}
template <typename I>
mln_concrete(I)
merge_results(const I& input,
const I& out, const I& out_sub2x, const I& out_sub4x)
{
mln_concrete(I) output;
initialize(output, input);
return output;
}
} // end of namespace mln
int main(int argc, char *argv[])
{
using namespace mln;
if (argc != 4)
return scribo::debug::usage(argv,
"Extract discontinued horizontal and vertical lines (multi-scale version)",
"input.pbm length output.ppm",
args_desc,
"A color image. Horizontal lines are in red and vertical lines in green.");
trace::entering("main");
typedef image2d<bool> I;
dpoint2d none(0, 0);
I input;
io::pbm::load(input, argv[1]);
// 1/1
std::cout << "1/1" << std::endl;
I hlines = scribo::primitive::extract::lines_h_pattern(input,
atoi(argv[2]),
3);
// I vlines = scribo::primitive::extract::lines_v_pattern(input,
// atoi(argv[2]),
// 3);
image2d<value::rgb8> out = debug::superpose(input, hlines, literal::red);
// out = debug::superpose(out, vlines, literal::green);
io::ppm::save(out, "out_1_1.ppm");
// I out = process(input, "out_1_1.ppm", atoi(argv[2]), 3);
// 1/2
std::cout << "1/2" << std::endl;
I input_sub2x = mln::subsampling::subsampling(input, none, 2);
I out_sub2 = process(input_sub2x, "out_1_2.ppm", atoi(argv[2]), 3, 2);
// // 1/4
// std::cout << "1/4" << std::endl;
// I input_sub4x = mln::subsampling::subsampling(input, none, 4);
// I out_sub4 = process(input_sub4x, "out_1_4.ppm", atoi(argv[2]), 3, 4);
// out_sub4 = world::binary_2d::enlarge(out_sub4, 2);
out_sub2 = world::binary_2d::enlarge(out_sub2, 1);
// io::ppm::save(merge_results(input, out, out_sub2, out_sub4), argv[2]);
I tmp = morpho::Rd::sequential(hlines, input, c8());
io::pbm::save(tmp, "rd.pbm");
I tmp_sub2 = morpho::Rd::sequential(out_sub2, input, c8());
io::pbm::save(tmp_sub2, "rd_sub2.pbm");
out = debug::superpose(input, tmp_sub2, literal::red);
out = debug::superpose(out, tmp, literal::red);
io::ppm::save(out, "out.ppm");
trace::exiting("main");
}
// Copyright (C) 2009
EPITA Research and Development Laboratory (LRDE)
// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
//
......@@ -24,21 +24,109 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#include <mln/essential/2d.hh>
#include <mln/util/ord.hh>
int main()
#include <libgen.h>
#include <fstream>
#include <iostream>
#include <mln/core/image/image2d.hh>
#include <mln/core/alias/neighb2d.hh>
#include <mln/io/pbm/all.hh>
#include <scribo/toolchain/text_in_doc.hh>
#include <scribo/core/line_set.hh>
#include <scribo/debug/usage.hh>
#include <scribo/make/debug_filename.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/io/xml/save_text_lines.hh>
#include <scribo/io/text_boxes/save.hh>
const char *args_desc[][2] =
{
{ "input.pbm", "A binary image. 'False' for object, 'True'\
for the background." },
{ "out.txt", "Text output" },
{ "denoise_enabled", "1 enables denoising, 0 disables it. (enabled by default)" },
{ "pmin_row", "Row index of the top left corner of the Region of interest." },
{ "pmin_col", "Col index of the top left corner of the Region of interest." },
{ "pmax_row", "Row index of the bottom right corner of the Region of interest." },
{ "pmax_col", "Col index of the bottom right corner of the Region of interest." },
{ "debug_dir", "Output directory for debug image" },
{0, 0}
};
int main(int argc, char* argv[])
{
using namespace scribo;
using namespace mln;
if (argc != 3 && argc != 4 && argc != 5 && argc != 8 && argc != 9)
return scribo::debug::usage(argv,
"Find text lines using left/right validation and display x-height in a binarized article.",
"input.pbm out.txt <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <debug_dir>",
args_desc);
bool debug = false;
// Enable debug output.
if (argc == 5 || argc == 9)
{
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
debug = true;
}
trace::entering("main");
image2d<bool> input;
mln::io::pbm::load(input, argv[1]);
// Optional Cropping
point2d crop_shift = literal::origin;
if (argc >= 8)
{
mln::def::coord
minr = atoi(argv[4]),
minc = atoi(argv[5]),
maxr = atoi(argv[6]),
maxc = atoi(argv[7]);
box2d roi = mln::make::box2d(minr, minc, maxr, maxc);
input = preprocessing::crop_without_localization(input, roi);
crop_shift = point2d(minr, minc);
if (debug)
mln::io::pbm::save(input,
scribo::make::debug_filename("input_cropped.pbm"));
}
bool denoise = (argc > 3 && atoi(argv[3]) != 0);
// Run document toolchain.
typedef image2d<scribo::def::lbl_type> L;
line_set<L>
lines = scribo::toolchain::text_in_doc(input, denoise, debug);
point2d p1(0, 2);
point2d p2(0, 2);
// Saving results
scribo::io::xml::save_text_lines(argv[1], lines, "out.xml");
std::cout << util::ord_strict(p1, p2) << std::endl;
std::cout << util::ord_weak(p1, p2) << std::endl;
// Specify shift due to previous crop.
scribo::io::text_boxes::save(lines, argv[2], crop_shift);
std::cout << util::ord_strict(p2, p1) << std::endl;
std::cout << util::ord_weak(p2, p1) << std::endl;
trace::exiting("main");
}
......@@ -25,12 +25,34 @@ bin_PROGRAMS = \
homogeneous_contrast \
preprocess \
split_bg_fg \
subsample \
unskew
denoise_SOURCES = denoise.cc
homogeneous_contrast_SOURCES = homogeneous_contrast.cc
preprocess_SOURCES = preprocess.cc
split_bg_fg_SOURCES = split_bg_fg.cc
subsample_SOURCES = subsample.cc
unskew_SOURCES = unskew.cc
if HAVE_MAGICKXX
bin_PROGRAMS += to_pgm
to_pgm_SOURCES = to_pgm.cc
to_pgm_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
`Magick++-config --cppflags`
to_pgm_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
-lpthread `Magick++-config --libs`
bin_PROGRAMS += subsample
subsample_SOURCES = subsample.cc
subsample_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
`Magick++-config --cppflags`
subsample_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
-lpthread `Magick++-config --libs`
endif HAVE_MAGICKXX
\ No newline at end of file
......@@ -25,19 +25,21 @@
#include <mln/core/image/image2d.hh>
#include <mln/core/alias/neighb2d.hh>
#include <mln/io/pgm/all.hh>
#include <mln/io/magick/load.hh>
#include <mln/io/ppm/save.hh>
#include <mln/subsampling/antialiased.hh>
#include <mln/value/int_u8.hh>
#include <mln/value/rgb8.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
{ "input.pgm", "A gray-scale image." },
{ "input.*", "An image." },
{ "ratio", "Scale ratio." },
{ "output.ppm", "A color image." },
{0, 0}
};
......@@ -49,17 +51,17 @@ int main(int argc, char *argv[])
if (argc != 4)
return scribo::debug::usage(argv,
"Subsample.",
"input.pgm ratio output.pgm",
"input.* ratio output.ppm",
args_desc);
trace::entering("main");
typedef image2d<value::int_u8> I;
typedef image2d<value::rgb8> I;
I input;
io::pgm::load(input, argv[1]);
io::magick::load(input, argv[1]);
unsigned ratio = atoi(argv[2]);
io::pgm::save(mln::subsampling::antialiased(input, ratio), argv[3]);
io::ppm::save(mln::subsampling::antialiased(input, ratio), argv[3]);
trace::exiting("main");
}
......@@ -29,15 +29,15 @@ cleantxt_SOURCES = cleantxt.cc
if HAVE_TESSERACT
if HAVE_TIFF
bin_PROGRAMS += recognition
recognition_SOURCES = recognition.cc
recognition_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS)
recognition_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
-lpthread
bin_PROGRAMS += pbm_lines_recognition
pbm_lines_recognition_SOURCES = pbm_lines_recognition.cc
pbm_lines_recognition_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS)
pbm_lines_recognition_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
-lpthread
bin_PROGRAMS += pbm_recognition
pbm_recognition_SOURCES = pbm_recognition.cc
......
// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
......@@ -26,33 +26,30 @@
#include <iostream>
#include <mln/core/image/image2d.hh>
#include <mln/util/array.hh>
#include <mln/io/txt/save.hh>
#include <mln/io/pbm/load.hh>
#include <mln/value/int_u16.hh>
#include <mln/value/label_16.hh>
#include <scribo/text/recognition.hh>
#include <mln/core/alias/neighb2d.hh>
#include <scribo/debug/usage.hh>
#include <scribo/primitive/extract/objects.hh>
#include <scribo/core/component_set.hh>
#include <scribo/core/object_links.hh>
#include <scribo/core/object_groups.hh>
#include <scribo/primitive/group/apply.hh>
#include <scribo/primitive/link/with_several_left_links.hh>
#include <scribo/primitive/link/with_several_right_links.hh>
#include <scribo/primitive/group/from_double_link.hh>
#include <scribo/filter/objects_small.hh>
#include <scribo/filter/objects_thin.hh>
#include <scribo/text/recognition.hh>