save_text_lines.hh 4.62 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Olena.  If not, see <http://www.gnu.org/licenses/>.
//
// As a special exception, you may use this file as part of a free
// software project without restriction.  Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License.  This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.

#ifndef SCRIBO_IO_XML_SAVE_TEXT_LINES_HH
# define SCRIBO_IO_XML_SAVE_TEXT_LINES_HH

/// \file
///
/// \brief Save text line information as XML.

#include <fstream>
#include <sstream>


namespace scribo
{

  namespace io
  {

    namespace xml
    {

      /*! \brief Save text line information as XML.

	We use a XML Schema part of the PAGE (Page Analysis and Ground
	truth Elements) image representation framework.

	This schema was used in the Page Segmentation COMPetition
	(PSCOMP) for ICDAR 2009.

	Its XSD file is located here:
	http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd

       */
      template <typename L>
      void
      save_text_lines(const std::string& input_name,
		      const line_set<L>& lines,
		      const std::string& output_name);


# ifndef MLN_INCLUDE_ONLY


      template <typename L>
      void
      save_text_lines(const std::string& input_name,
		      const line_set<L>& lines,
		      const std::string& output_name)
      {
	trace::entering("scribo::io::xml:save_text_lines");

	std::ofstream file(output_name.c_str());
	if (! file)
	{
	  std::cerr << "error: cannot open file '" << input_name << "'!";
	  abort();
	}

	file << "<?xml version=\"1.0\"?>" << std::endl;
	file << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd\" pcGtsId=\"" << input_name << "\">" << std::endl;


	file << "  <pcMetadata>" << std::endl;
	file << "    <pcCreator>LRDE</pcCreator>" << std::endl;
	file << "    <pcCreated/>" << std::endl;
	file << "    <pcLastChange/>" << std::endl;
	file << "    <pcComments/>" << std::endl;
	file << "  </pcMetadata>" << std::endl;

	file << "  <page image_filename=\"" << input_name
	     << "\" image_width=\"" << lines.component_set_().labeled_image().ncols()
	     << "\" image_height=\"" << lines.component_set_().labeled_image().nrows()
	     << "\">" << std::endl;

	for_all_lines(l, lines)
	{
	  file << "    <text_region id=\"" << lines(l).id()
	       << "\" txt_orientation=\"0.000\" "
	       << "txt_reading_orientation=\"0.000\" "
	       << "txt_reading_direction=\"Left_To_Right\" "
	       << "txt_reverse_video=\"No\" "
	       << "txt_indented=\"No\">"
	       << std::endl;

	  file << "      <coords>" << std::endl
	       << "        <point x=\"" << lines(l).bbox().pmin().row()
	       << "\" y=\"" << lines(l).bbox().pmin().col() << "\"/>"
	       << std::endl
	       << "        <point x=\"" << lines(l).bbox().pmin().row()
	       << "\" y=\"" << lines(l).bbox().pmax().col() << "\"/>"
	       << std::endl
	       << "        <point x=\"" << lines(l).bbox().pmax().row()
	       << "\" y=\"" << lines(l).bbox().pmin().col() << "\"/>"
	       << std::endl
	       << "        <point x=\"" << lines(l).bbox().pmax().row()
	       << "\" y=\"" << lines(l).bbox().pmax().col() << "\"/>"
	       << std::endl
	       << "      </coords>" << std::endl;
	}

	file << "    </text_region>" << std::endl;
	file << "  </page>" << std::endl;
	file << "</pcGts>" << std::endl;



	trace::exiting("scribo::io::xml::save_text_lines");
      }


# endif // ! MLN_INCLUDE_ONLY

    } // end of namespace scribo::io::xml

  } // end of namespace scribo::io

} // end of namespace scribo


#endif // ! SCRIBO_IO_XML_SAVE_TEXT_LINES_HH