Commit f5024327 authored by Guillaume Lazzara's avatar Guillaume Lazzara
Browse files

scribo/io/xml/save.hh: Make use of document structure.

parent 222955ac
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* scribo/io/xml/save.hh: Make use of document structure.
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* scribo/primitive/extract/elements.hh: New routine.
......
......@@ -28,7 +28,7 @@
/// \file
///
/// \brief Save text line information as XML.
/// \brief Save document information as XML.
# include <fstream>
# include <sstream>
......@@ -46,7 +46,7 @@ namespace scribo
namespace xml
{
/*! \brief Save text line information as XML.
/*! \brief Save document information as XML.
We use a XML Schema part of the PAGE (Page Analysis and Ground
truth Elements) image representation framework.
......@@ -60,8 +60,7 @@ namespace scribo
*/
template <typename L>
void
save(const std::string& input_name,
const line_set<L>& lines,
save(const document<L>& doc,
const std::string& output_name,
bool extended_format);
......@@ -86,12 +85,34 @@ namespace scribo
return input;
}
void print_box_coords(std::ofstream& ostr, const box2d& b,
const char *space)
{
std::string sc = space;
std::string sp = sc + " ";
ostr << sc << "<coords>" << std::endl
<< sp << "<point x=\"" << b.pmin().col()
<< "\" y=\"" << b.pmin().row() << "\"/>"
<< std::endl
<< sp << "<point x=\"" << b.pmax().col()
<< "\" y=\"" << b.pmin().row() << "\"/>"
<< std::endl
<< sp << "<point x=\"" << b.pmax().col()
<< "\" y=\"" << b.pmax().row() << "\"/>"
<< std::endl
<< sp << "<point x=\"" << b.pmin().col()
<< "\" y=\"" << b.pmax().row() << "\"/>"
<< std::endl
<< sc << "</coords>" << std::endl;
}
} // end of namespace scribo::io::xml::internal
template <typename L>
void
save(const std::string& input_name,
const line_set<L>& lines,
save(const document<L>& doc,
const std::string& output_name,
bool extended_format)
{
......@@ -100,9 +121,12 @@ namespace scribo
std::ofstream file(output_name.c_str());
if (! file)
{
std::cerr << "error: cannot open file '" << input_name << "'!";
std::cerr << "error: cannot open file '" << doc.filename() << "'!";
abort();
}
const line_set<L>& lines = doc.text();
std::map<char, std::string> html_map;
html_map['\"'] = "&quot;";
html_map['<'] = "&lt;";
......@@ -111,13 +135,13 @@ namespace scribo
file << "<?xml version=\"1.0\"?>" << std::endl;
if (extended_format)
{
file << "<pcGts>" << std::endl;
}
{
file << "<pcGts>" << std::endl;
}
else
{
file << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd\" pcGtsId=\"" << input_name << "\">" << std::endl;
}
{
file << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd\" pcGtsId=\"" << doc.filename() << "\">" << std::endl;
}
file << " <PcMetadata>" << std::endl;
file << " <PcCreator>LRDE</PcCreator>" << std::endl;
......@@ -126,110 +150,86 @@ namespace scribo
file << " <PcComments>Generated by Scribo from Olena.</PcComments>" << std::endl;
file << " </PcMetadata>" << std::endl;
file << " <page image_filename=\"" << input_name
file << " <page image_filename=\"" << doc.filename()
<< "\" image_width=\"" << lines.components().labeled_image().ncols()
<< "\" image_height=\"" << lines.components().labeled_image().nrows()
<< "\">" << std::endl;
for_all_lines(l, lines)
{
if (! lines(l).is_valid()
|| lines(l).tag() != line::None
|| lines(l).type() != line::Text) // Is NOT a text line.
continue;
{
if (! lines(l).is_valid()
|| lines(l).tag() != line::None
|| lines(l).type() != line::Text) // Is NOT a text line.
continue;
file << " <text_region id=\"" << lines(l).id()
<< "\" txt_orientation=\"" << lines(l).orientation()
<< "\" txt_reading_orientation=\"" << lines(l).reading_orientation()
<< "\" txt_reading_direction=\"" << lines(l).reading_direction()
<< "\" txt_text_type=\"" << lines(l).type()
<< "\" txt_reverse_video=\"" << (lines(l).reverse_video() ? "true" : "false")
<< "\" txt_indented=\"" << (lines(l).indented() ? "true" : "false")
<< "\" kerning=\"" << lines(l).char_space();
// EXTENSIONS - Not officially supported
if (extended_format)
{
file << " <text_region id=\"" << lines(l).id()
<< "\" txt_orientation=\"" << lines(l).orientation()
<< "\" txt_reading_orientation=\"" << lines(l).reading_orientation()
<< "\" txt_reading_direction=\"" << lines(l).reading_direction()
<< "\" txt_text_type=\"" << lines(l).type()
<< "\" txt_reverse_video=\"" << (lines(l).reverse_video() ? "true" : "false")
<< "\" txt_indented=\"" << (lines(l).indented() ? "true" : "false")
<< "\" kerning=\"" << lines(l).char_space();
// EXTENSIONS - Not officially supported
if (extended_format)
{
file << "\" baseline=\"" << lines(l).baseline()
<< "\" meanline=\"" << lines(l).meanline()
<< "\" x_height=\"" << lines(l).x_height()
<< "\" d_height=\"" << lines(l).d_height()
<< "\" a_height=\"" << lines(l).a_height()
<< "\" char_width=\"" << lines(l).char_width();
}
// End of EXTENSIONS
file << "\">"
<< std::endl;
file << "\" baseline=\"" << lines(l).baseline()
<< "\" meanline=\"" << lines(l).meanline()
<< "\" x_height=\"" << lines(l).x_height()
<< "\" d_height=\"" << lines(l).d_height()
<< "\" a_height=\"" << lines(l).a_height()
<< "\" char_width=\"" << lines(l).char_width();
}
// End of EXTENSIONS
file << "\">"
<< std::endl;
if (extended_format)
{
file << " <coords>" << std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " </coords>" << std::endl;
file << " <paragraph>" << std::endl;
file << " <coords>" << std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " </coords>" << std::endl;
if (lines(l).has_text())
{
std::string tmp = lines(l).text();
tmp = internal::html_markups_replace(tmp, html_map);
file << " <line text=\""
<< tmp
<< "\">" << std::endl;
}
else
file << " <line>" << std::endl;
file << " <coords>" << std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmin().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmax().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " <point x=\"" << lines(l).bbox().pmin().col()
<< "\" y=\"" << lines(l).bbox().pmax().row() << "\"/>"
<< std::endl
<< " </coords>" << std::endl;
file << " </line>" << std::endl;
file << " </paragraph>" << std::endl;
}
internal::print_box_coords(file, lines(l).bbox(), " ");
if (extended_format)
{
file << " <paragraph>" << std::endl;
internal::print_box_coords(file, lines(l).bbox(), " ");
if (lines(l).has_text())
{
std::string tmp = lines(l).text();
tmp = internal::html_markups_replace(tmp, html_map);
file << " <line text=\""
<< tmp
<< "\">" << std::endl;
}
else
file << " <line>" << std::endl;
internal::print_box_coords(file, lines(l).bbox(), " ");
file << " </text_region>" << std::endl;
file << " </line>" << std::endl;
file << " </paragraph>" << std::endl;
}
file << " </text_region>" << std::endl;
}
}
const component_set<L>& elts = doc.elements();
for_all_comps(e, elts)
if (elts(e).is_valid())
{
file << " <image_region id=\"ir" << elts(e).id()
<< "\" img_colour_type=\"24_Bit_Colour\""
<< " img_orientation=\"0.000000\" "
<< " img_emb_text=\"No\" "
<< " img_bgcolour=\"White\">" << std::endl;
internal::print_box_coords(file, elts(e).bbox(), " ");
file << " </image_region>" << std::endl;
}
file << " </page>" << std::endl;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment