Commit c7cb9f3c authored by Guillaume Lazzara's avatar Guillaume Lazzara
Browse files

Improve OCR recognition.

	* src/text_in_article_pbm.cc: Update call to text::recognition.

	* text/clean.hh: Resize text lines according to their x_height.

	* text/extract_lines.hh: Update call to extract::components.

	* text/merging.hh: Remove Debug.

	* text/recognition.hh: Update call to text::clean and do
	recognition only on lines considered as text.

	* subsampling/bilinear.hh,
	* upsampling/bs2x.hh: New.
parent 28453cfc
2010-04-30 Guillaume Lazzara <z@lrde.epita.fr>
Improve OCR recognition.
* src/text_in_article_pbm.cc: Update call to text::recognition.
* text/clean.hh: Resize text lines according to their x_height.
* text/extract_lines.hh: Update call to extract::components.
* text/merging.hh: Remove Debug.
* text/recognition.hh: Update call to text::clean and do
recognition only on lines considered as text.
* subsampling/bilinear.hh,
* upsampling/bs2x.hh: New.
2010-04-30 Guillaume Lazzara <z@lrde.epita.fr>
* postprocessing/fill_object_holes.hh: New routine.
......
......@@ -48,12 +48,14 @@
#include <scribo/primitive/remove/separators.hh>
#include <scribo/primitive/link/merge_double_link.hh>
#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
#include <scribo/primitive/group/from_single_link.hh>
#include <scribo/filter/object_links_bbox_h_ratio.hh>
#include <scribo/filter/objects_small.hh>
#include <scribo/debug/usage.hh>
......@@ -71,6 +73,7 @@
#include <scribo/preprocessing/denoise_fg.hh>
#include <scribo/io/xml/save_text_lines.hh>
// #include <mln/morpho/closing/structural.hh>
// #include <mln/win/rectangle2d.hh>
......@@ -80,6 +83,7 @@ const char *args_desc[][2] =
{
{ "input.pbm", "A binary image. 'False' for object, 'True'\
for the background." },
{ "out.txt", "Text output" },
{ "denoise", "1 enables denoising, 0 disables it. (enabled by default)" },
{ "debug_dir", "Output directory for debug image" },
{0, 0}
......@@ -95,8 +99,7 @@ int main(int argc, char* argv[])
return scribo::debug::usage(argv,
"Find text lines using left/right validation and display x-height in a binarized article.",
"input.pbm out.txt <denoise: 0|1> <debug_dir>",
args_desc,
"Text output.");
args_desc);
if (argc == 5)
scribo::make::internal::debug_filename_prefix = argv[4];
......@@ -151,13 +154,19 @@ int main(int argc, char* argv[])
components.add_separators(separators);
// components.add_separators(whitespaces);
components = scribo::filter::components_small(components, 3);
/// Linking potential objects
std::cout << "Linking objects..." << std::endl;
object_links<L> left_link
= primitive::link::with_single_left_link_dmax_ratio(components, 2);
= primitive::link::with_single_left_link_dmax_ratio(components,
primitive::link::internal::dmax_width_and_height(1),
anchor::MassCenter);
object_links<L> right_link
= primitive::link::with_single_right_link_dmax_ratio(components, 2);
= primitive::link::with_single_right_link_dmax_ratio(components,
primitive::link::internal::dmax_width_and_height(1),
anchor::MassCenter);
// Validating left and right links.
object_links<L>
......@@ -218,38 +227,38 @@ int main(int argc, char* argv[])
scribo::make::debug_filename("step1_looks_like_a_text_line.ppm"));
// Bboxes + line infos
{
std::ofstream file(scribo::make::debug_filename("step1_bboxes_100p.txt").c_str());
std::ofstream file_50p(scribo::make::debug_filename("step1_bboxes_50p.txt").c_str());
for_all_lines(l, lines)
if (lines(l).tag() != line::Merged
&& lines(l).tag() != line::Ignored
&& lines(l).tag() != line::Pathological)
{
file << lines(l).bbox().pmin().row() << " "
<< lines(l).bbox().pmin().col() << " "
<< lines(l).bbox().pmax().row() << " "
<< lines(l).bbox().pmax().col() << " "
<< lines(l).card() << " "
<< lines(l).baseline() << " "
<< lines(l).x_height() << " "
<< lines(l).meanline() << " "
<< lines(l).d_height() << " "
<< lines(l).a_height() << " "
<< lines(l).char_space() << " "
<< lines(l).char_width() << std::endl;
// // Bboxes + line infos
// {
// std::ofstream file(scribo::make::debug_filename("step1_bboxes_100p.txt").c_str());
// // std::ofstream file_50p(scribo::make::debug_filename("step1_bboxes_50p.txt").c_str());
file_50p << lines(l).bbox().pmin().row() / 2 << " "
<< lines(l).bbox().pmin().col() / 2 << " "
<< lines(l).bbox().pmax().row() / 2 << " "
<< lines(l).bbox().pmax().col() / 2 << std::endl;
}
// for_all_lines(l, lines)
// if (lines(l).tag() != line::Merged
// && lines(l).tag() != line::Ignored
// && lines(l).tag() != line::Pathological)
// {
// file << lines(l).bbox().pmin().row() << " "
// << lines(l).bbox().pmin().col() << " "
// << lines(l).bbox().pmax().row() << " "
// << lines(l).bbox().pmax().col() << " "
// << lines(l).card() << " "
// << lines(l).baseline() << " "
// << lines(l).x_height() << " "
// << lines(l).meanline() << " "
// << lines(l).d_height() << " "
// << lines(l).a_height() << " "
// << lines(l).char_space() << " "
// << lines(l).char_width() << std::endl;
// // file_50p << lines(l).bbox().pmin().row() / 2 << " "
// // << lines(l).bbox().pmin().col() / 2 << " "
// // << lines(l).bbox().pmax().row() / 2 << " "
// // << lines(l).bbox().pmax().col() / 2 << std::endl;
// }
file.close();
file_50p.close();
}
// file.close();
// // file_50p.close();
// }
// mean and base lines.
......@@ -280,57 +289,49 @@ int main(int argc, char* argv[])
scribo::debug::save_bboxes_image(input, lines,
scribo::make::debug_filename("step2_bboxes.ppm"));
//===== END OF DEBUG =====
{
std::ofstream file(scribo::make::debug_filename("step2_bboxes_100p.txt").c_str());
// std::ofstream file_50p(scribo::make::debug_filename("step2_bboxes_50p.txt").c_str());
scribo::text::recognition(lines, "fra", argv[2]);
for_all_lines(l, lines)
if (lines(l).tag() != line::Merged
&& lines(l).tag() != line::Ignored
&& lines(l).tag() != line::Pathological)
{
file << lines(l).bbox().pmin().row() << " "
<< lines(l).bbox().pmin().col() << " "
<< lines(l).bbox().pmax().row() << " "
<< lines(l).bbox().pmax().col() << " "
<< lines(l).card() << " "
<< lines(l).baseline() << " "
<< lines(l).x_height() << " "
<< lines(l).meanline() << " "
<< lines(l).d_height() << " "
<< lines(l).a_height() << " "
<< lines(l).char_space() << " "
<< lines(l).char_width() << std::endl;
// file_50p << lines(l).bbox().pmin().row() / 2 << " "
// << lines(l).bbox().pmin().col() / 2 << " "
// << lines(l).bbox().pmax().row() / 2 << " "
// << lines(l).bbox().pmax().col() / 2 << std::endl;
}
// // Display median character space.
// {
// image2d<value::rgb8> output = data::convert(value::rgb8(), input);
// typedef mln::value::int_u<8> median_t;
// typedef mln::accu::stat::median_h<median_t> accu_t;
// util::array<accu_t>
// lspace_med(static_cast<unsigned>(grouped_objects.nlabels()) + 1);
// for_all_components(i, filtered_objects.bboxes())
// if (groups_packed(i) != 0)
// {
// if (hratio_filtered_links(i) != i)
// {
// unsigned
// space = filtered_objects.bbox(i).pmin().col() - filtered_objects.bbox(hratio_filtered_links(i)).pmax().col();
file.close();
// file_50p.close();
}
// lspace_med(groups_packed(i)).take(space);
// }
// }
//===== END OF DEBUG =====
// std::cout << "Drawing median character space" << std::endl;
// for_all_components(i, filtered_objects.bboxes())
// if (groups_packed(i) != 0 && lspace_med(groups_packed(i)).card() > 1)
// {
// unsigned med = lspace_med(groups_packed(i)).to_result();
// mln::draw::box(output, grouped_objects.bbox(groups_packed(i)),
// literal::purple);
scribo::io::xml::save_text_lines(argv[1], lines, "out.xml");
// point2d
// beg = filtered_objects.bbox(i).pmax(),
// end = beg;
// beg.row() = filtered_objects.bbox(i).pmin().row();
// mln::draw::line(output, beg, end, literal::cyan);
// beg.col() += med;
// end.col() += med;
// mln::draw::line(output, beg, end, literal::cyan);
// }
// io::ppm::save(output, "median_wspace.ppm");
scribo::text::recognition(lines, "fra", argv[2]);
// }
trace::exiting("main");
}
// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#ifndef SCRIBO_SUBSAMPLING_BILINEAR_HH
# define SCRIBO_SUBSAMPLING_BILINEAR_HH
/// \file
///
/// Bilinear subsampling.
# include <mln/core/concept/image.hh>
# include <mln/opt/at.hh>
namespace scribo
{
namespace subsampling
{
template <typename I>
mln_concrete(I)
bilinear(const Image<I>& input_, int sub_ratio);
# ifndef MLN_INCLUDE_ONLY
template <typename I>
mln_concrete(I)
bilinear(const Image<I>& input_, int sub_ratio)
{
trace::entering("scribo::subsampling::bilinear");
const I& input = exact(input_);
mln_precondition(input.is_valid());
mlc_is(mln_domain(I), box2d)::check();
mln_concrete(I) output(input.domain().nrows() / sub_ratio,
input.domain().ncols() / sub_ratio);
std::cout << "output domain = " << output.domain() << std::endl;
mln_piter(I) p(output.domain());
mln_value(I) pixels[4];
def::coord
x_offset = input.domain().pmin().col(),
y_offset = input.domain().pmin().row();
for_all(p)
{
int x = round(-0.5 + p.col() * sub_ratio);
int y = round(-0.5 + p.row() * sub_ratio);
if (x < 0)
x = 0;
else if (x >= geom::max_col(input))
x = input.ncols() - 1;
if (y < 0)
y = 0;
else if (y >= geom::max_row(input))
y = input.nrows() - 1;
double dx = (p.col() * sub_ratio) - x;
double dy = (p.row() * sub_ratio) - y;
pixels[0] = opt::at(input, y + y_offset, x + x_offset);
pixels[1] = opt::at(input, y + y_offset, x + x_offset + 1);
pixels[2] = opt::at(input, y + y_offset + 1, x + x_offset);
pixels[3] = opt::at(input, y + y_offset + 1, x + x_offset + 1);
output(p) = pixels[0] * (1 - dx) * (1 - dy) + pixels[1] * dx * (1 - dy) +
pixels[2] * (1 - dx) * dy + pixels[3] * dx * dy;
}
trace::exiting("scribo::subsampling::bilinear");
return output;
}
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace subsampling
} // end of namespace scribo
#endif // SCRIBO_SUBSAMPLING_BILINEAR_HH
......@@ -64,8 +64,9 @@
#include <mln/value/rgb8.hh>
#include <mln/io/pgm/all.hh>
#include <sandbox/inim/2009/ocr/resize.hh>
#include <sandbox/fabien/mln/upsampling/hq2x.hh>
#include <scribo/upsampling/bs2x.hh>
#include <scribo/subsampling/bilinear.hh>
namespace scribo
{
......@@ -80,69 +81,60 @@ namespace scribo
///
/// \param[in] input_ A binary image. Object are set to 'false'
/// and backgroud to 'true'.
/// \param[in] dmap_win_ A weighted window.
///
/// \return An image. The text have better quality.
//
template <typename I, typename W>
template <typename L, typename I>
mln_concrete(I)
clean(const Image<I>& input_, const Weighted_Window<W>& dmap_win_);
clean(const line_info<L>& line, const Image<I>& input_);
# ifndef MLN_INCLUDE_ONLY
template <typename I, typename W>
template <typename L, typename I>
mln_concrete(I)
clean(const Image<I>& input_, const Weighted_Window<W>& dmap_win_)
clean(const line_info<L>& line, const Image<I>& input_)
{
trace::entering("scribo::text::clean");
const I& input = exact(input_);
const W& dmap_win = exact(dmap_win_);
mlc_bool(mln_site_(I)::dim == 2)::check();
mlc_equal(mln_value(I),bool)::check();
mln_precondition(input.is_valid());
mln_precondition(dmap_win.is_valid());
(void) dmap_win;
// Resize
typedef image2d<value::rgb8> J;
J tmp = data::convert(value::rgb8(), input);
J clarge = tmp;//mln::upsampling::hq2x(tmp); (FIXME: re-enable)
//FIXME: not generic!
// if (input.domain().pmax()[0] - input.domain().pmin()[0] <= 10)
// clarge = mln::upsampling::hq4x(clarge);
I input_large = data::convert(bool(), clarge);
// Blur
image2d<value::int_u8>
blur = linear::gaussian(data::convert(value::int_u8(), input_large), 2);
// Skeleton constraint
I K = topo::skeleton::crest(input_large, blur, c8());
// Skeleton
I skel_on_gaussian =
morpho::skeleton_constrained(input_large, c8(),
topo::skeleton::is_simple_point<I,neighb2d>,
extend(K, false), arith::revert(blur));
// Dilation
win::octagon2d oct(7);
I dilate_on_gaussian = morpho::dilation(skel_on_gaussian, oct);
// io::pgm::save(arith::revert(blur), "blur_revert.pgm");
// io::pgm::save(blur, "gaussian.pgm");
// io::pbm::save(input_large, mln::debug::filename("input_large_4x.pbm"));
// io::pbm::save(K, mln::debug::filename("K.pbm"));
// io::pbm::save(skel_on_gaussian, mln::debug::filename("skeleton_on_gaussian.pbm"));
// io::pbm::save(dilate_on_gaussian, mln::debug::filename("dilation_on_gaussian.pbm"));
mln_precondition(line.is_valid());
mln_concrete(I) output = duplicate(input);
if (line.x_height() < 5) // Non significative text/remaining lines...
return output;
float fact = line.x_height() / 40.0f;
std::cout << fact << " - " << output.domain() << std::endl;
if (fact < 1)
{
std::cout << "Upsampling..." << " - "
<< std::ceil(fact) << std::endl;
while (fact < 1)
{
output = scribo::upsampling::bs2x(output); // 2x upsampling
fact *= 2.0f;
// std::cout << "fact = " << fact
// << " - output.domain = " << output.domain()
// << std::endl;
}
}
else if (fact > 2.5f)
{
std::cout << "subsampling::bilinear" << " - "
<< std::ceil(fact) << std::endl;
output = subsampling::bilinear(output, std::ceil(fact - 0.5)); // math::floor instead?
}
else
std::cout << "not cleaning text. Seems ok." << std::endl;
trace::exiting("scribo::text::clean");
return dilate_on_gaussian;
return output;
}
# endif // ! MLN_INCLUDE_ONLY
......
......@@ -47,7 +47,7 @@
# include <mln/util/graph.hh>
# include <mln/value/label_16.hh>
# include <scribo/primitive/extract/objects.hh>
# include <scribo/primitive/extract/components.hh>
# include <scribo/primitive/group/apply.hh>
# include <scribo/primitive/link/with_several_left_links.hh>
# include <scribo/primitive/link/with_several_right_links.hh>
......@@ -75,7 +75,7 @@ namespace scribo
** \param[in,out] nbboxes Will hold the number of bounding boxes
** at the end of the routine.
**
** \return An object image with grouped potential text objects.
** \return An object image with grouped potential text components.
*/
template <typename I, typename N, typename V>
object_image(mln_ch_value(I,V))
......@@ -103,7 +103,7 @@ namespace scribo
typedef mln_ch_value(I,V) L;
typedef object_image(L) text_t;
text_t text = scribo::primitive::extract::objects(input, nbh, nbboxes);
text_t text = scribo::primitive::extract::components(input, nbh, nbboxes);
# ifndef SCRIBO_NDEBUG
debug::save_bboxes_image(input, text.bboxes(), literal::red,
......
......@@ -800,21 +800,21 @@ namespace scribo
(void) ith_pass;
if (ith_pass == 1)
{
mln::io::pgm::save(log, "log_1.pgm");
mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_1e.pgm");
}
else if (ith_pass == 2)
{
mln::io::pgm::save(log, "log_2.pgm");
mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_2e.pgm");
}
else if (ith_pass == 3)
{
mln::io::pgm::save(log, "log_3.pgm");
mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_3e.pgm");
}
// if (ith_pass == 1)
// {
// mln::io::pgm::save(log, "log_1.pgm");
// mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_1e.pgm");
// }
// else if (ith_pass == 2)
// {
// mln::io::pgm::save(log, "log_2.pgm");
// mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_2e.pgm");
// }
// else if (ith_pass == 3)
// {
// mln::io::pgm::save(log, "log_3.pgm");
// mln::io::pgm::save(data::wrap(int_u8(), billboard), "log_3e.pgm");
// }
}
......
......@@ -128,11 +128,12 @@ namespace scribo
/// Use text bboxes with Tesseract
for_all_lines(i, lines)
{
if (! lines(i).is_valid())
if (! lines(i).is_valid() || lines(i).tag() != line::None || lines(i).type() != line::Text)
continue;
std::cout << "Text recognition... ("
<< i << "/" << lines.nelements() << ")" << std::endl;
std::cout << "x_height = " << lines(i).x_height() << std::endl;
mln_domain(I) box = lines(i).bbox();
// Make sure characters are isolated from the borders.
......@@ -157,7 +158,7 @@ namespace scribo
/// Improve text quality.
/// text_ima_cleaned domain is larger than text_ima's.
I text_ima_cleaned = text::clean(text_ima, dmap_win);
I text_ima_cleaned = text::clean(lines(i), text_ima);
mln::io::pbm::save(text_ima_cleaned, mln::debug::filename("line.pbm", debug_id++));
// Setting objects to 'True'
......
// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena. If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License. This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
#ifndef SCRIBO_UPSAMPLING_BS2X_HH
# define SCRIBO_UPSAMPLING_BS2X_HH
/// \file
///
/// Scale 2x algorithm for binary images.
# include <mln/core/concept/image.hh>
# include <mln/core/alias/box2d.hh>
# include <mln/opt/at.hh>
# include <mln/geom/all.hh>
namespace scribo
{
namespace upsampling
{
using namespace mln;
/// \brief Scale 2x algorithm for binary images.
///
//
template <typename I>
mln_concrete(I)
bs2x(const Image<I>& input);