Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Olena
olena
Commits
f5024327
Commit
f5024327
authored
Nov 15, 2010
by
Guillaume Lazzara
Browse files
scribo/io/xml/save.hh: Make use of document structure.
parent
222955ac
Changes
2
Hide whitespace changes
Inline
Side-by-side
scribo/ChangeLog
View file @
f5024327
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* scribo/io/xml/save.hh: Make use of document structure.
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
2010-11-15 Guillaume Lazzara <z@lrde.epita.fr>
* scribo/primitive/extract/elements.hh: New routine.
* scribo/primitive/extract/elements.hh: New routine.
...
...
scribo/scribo/io/xml/save.hh
View file @
f5024327
...
@@ -28,7 +28,7 @@
...
@@ -28,7 +28,7 @@
/// \file
/// \file
///
///
/// \brief Save
text line
information as XML.
/// \brief Save
document
information as XML.
# include <fstream>
# include <fstream>
# include <sstream>
# include <sstream>
...
@@ -46,7 +46,7 @@ namespace scribo
...
@@ -46,7 +46,7 @@ namespace scribo
namespace
xml
namespace
xml
{
{
/*! \brief Save
text line
information as XML.
/*! \brief Save
document
information as XML.
We use a XML Schema part of the PAGE (Page Analysis and Ground
We use a XML Schema part of the PAGE (Page Analysis and Ground
truth Elements) image representation framework.
truth Elements) image representation framework.
...
@@ -60,8 +60,7 @@ namespace scribo
...
@@ -60,8 +60,7 @@ namespace scribo
*/
*/
template
<
typename
L
>
template
<
typename
L
>
void
void
save
(
const
std
::
string
&
input_name
,
save
(
const
document
<
L
>&
doc
,
const
line_set
<
L
>&
lines
,
const
std
::
string
&
output_name
,
const
std
::
string
&
output_name
,
bool
extended_format
);
bool
extended_format
);
...
@@ -86,12 +85,34 @@ namespace scribo
...
@@ -86,12 +85,34 @@ namespace scribo
return
input
;
return
input
;
}
}
void
print_box_coords
(
std
::
ofstream
&
ostr
,
const
box2d
&
b
,
const
char
*
space
)
{
std
::
string
sc
=
space
;
std
::
string
sp
=
sc
+
" "
;
ostr
<<
sc
<<
"<coords>"
<<
std
::
endl
<<
sp
<<
"<point x=
\"
"
<<
b
.
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
b
.
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
sp
<<
"<point x=
\"
"
<<
b
.
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
b
.
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
sp
<<
"<point x=
\"
"
<<
b
.
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
b
.
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
sp
<<
"<point x=
\"
"
<<
b
.
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
b
.
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
sc
<<
"</coords>"
<<
std
::
endl
;
}
}
// end of namespace scribo::io::xml::internal
}
// end of namespace scribo::io::xml::internal
template
<
typename
L
>
template
<
typename
L
>
void
void
save
(
const
std
::
string
&
input_name
,
save
(
const
document
<
L
>&
doc
,
const
line_set
<
L
>&
lines
,
const
std
::
string
&
output_name
,
const
std
::
string
&
output_name
,
bool
extended_format
)
bool
extended_format
)
{
{
...
@@ -100,9 +121,12 @@ namespace scribo
...
@@ -100,9 +121,12 @@ namespace scribo
std
::
ofstream
file
(
output_name
.
c_str
());
std
::
ofstream
file
(
output_name
.
c_str
());
if
(
!
file
)
if
(
!
file
)
{
{
std
::
cerr
<<
"error: cannot open file '"
<<
input_
name
<<
"'!"
;
std
::
cerr
<<
"error: cannot open file '"
<<
doc
.
file
name
()
<<
"'!"
;
abort
();
abort
();
}
}
const
line_set
<
L
>&
lines
=
doc
.
text
();
std
::
map
<
char
,
std
::
string
>
html_map
;
std
::
map
<
char
,
std
::
string
>
html_map
;
html_map
[
'\"'
]
=
"""
;
html_map
[
'\"'
]
=
"""
;
html_map
[
'<'
]
=
"<"
;
html_map
[
'<'
]
=
"<"
;
...
@@ -111,13 +135,13 @@ namespace scribo
...
@@ -111,13 +135,13 @@ namespace scribo
file
<<
"<?xml version=
\"
1.0
\"
?>"
<<
std
::
endl
;
file
<<
"<?xml version=
\"
1.0
\"
?>"
<<
std
::
endl
;
if
(
extended_format
)
if
(
extended_format
)
{
{
file
<<
"<pcGts>"
<<
std
::
endl
;
file
<<
"<pcGts>"
<<
std
::
endl
;
}
}
else
else
{
{
file
<<
"<pcGts xmlns=
\"
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16
\"
xmlns:xsi=
\"
http://www.w3.org/2001/XMLSchema-instance
\"
xsi:schemaLocation=
\"
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd
\"
pcGtsId=
\"
"
<<
input_
name
<<
"
\"
>"
<<
std
::
endl
;
file
<<
"<pcGts xmlns=
\"
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16
\"
xmlns:xsi=
\"
http://www.w3.org/2001/XMLSchema-instance
\"
xsi:schemaLocation=
\"
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd
\"
pcGtsId=
\"
"
<<
doc
.
file
name
()
<<
"
\"
>"
<<
std
::
endl
;
}
}
file
<<
" <PcMetadata>"
<<
std
::
endl
;
file
<<
" <PcMetadata>"
<<
std
::
endl
;
file
<<
" <PcCreator>LRDE</PcCreator>"
<<
std
::
endl
;
file
<<
" <PcCreator>LRDE</PcCreator>"
<<
std
::
endl
;
...
@@ -126,110 +150,86 @@ namespace scribo
...
@@ -126,110 +150,86 @@ namespace scribo
file
<<
" <PcComments>Generated by Scribo from Olena.</PcComments>"
<<
std
::
endl
;
file
<<
" <PcComments>Generated by Scribo from Olena.</PcComments>"
<<
std
::
endl
;
file
<<
" </PcMetadata>"
<<
std
::
endl
;
file
<<
" </PcMetadata>"
<<
std
::
endl
;
file
<<
" <page image_filename=
\"
"
<<
input_
name
file
<<
" <page image_filename=
\"
"
<<
doc
.
file
name
()
<<
"
\"
image_width=
\"
"
<<
lines
.
components
().
labeled_image
().
ncols
()
<<
"
\"
image_width=
\"
"
<<
lines
.
components
().
labeled_image
().
ncols
()
<<
"
\"
image_height=
\"
"
<<
lines
.
components
().
labeled_image
().
nrows
()
<<
"
\"
image_height=
\"
"
<<
lines
.
components
().
labeled_image
().
nrows
()
<<
"
\"
>"
<<
std
::
endl
;
<<
"
\"
>"
<<
std
::
endl
;
for_all_lines
(
l
,
lines
)
for_all_lines
(
l
,
lines
)
{
if
(
!
lines
(
l
).
is_valid
()
||
lines
(
l
).
tag
()
!=
line
::
None
||
lines
(
l
).
type
()
!=
line
::
Text
)
// Is NOT a text line.
continue
;
{
{
if
(
!
lines
(
l
).
is_valid
()
file
<<
" <text_region id=
\"
"
<<
lines
(
l
).
id
()
||
lines
(
l
).
tag
()
!=
line
::
None
<<
"
\"
txt_orientation=
\"
"
<<
lines
(
l
).
orientation
()
||
lines
(
l
).
type
()
!=
line
::
Text
)
// Is NOT a text line.
<<
"
\"
txt_reading_orientation=
\"
"
<<
lines
(
l
).
reading_orientation
()
continue
;
<<
"
\"
txt_reading_direction=
\"
"
<<
lines
(
l
).
reading_direction
()
<<
"
\"
txt_text_type=
\"
"
<<
lines
(
l
).
type
()
<<
"
\"
txt_reverse_video=
\"
"
<<
(
lines
(
l
).
reverse_video
()
?
"true"
:
"false"
)
<<
"
\"
txt_indented=
\"
"
<<
(
lines
(
l
).
indented
()
?
"true"
:
"false"
)
<<
"
\"
kerning=
\"
"
<<
lines
(
l
).
char_space
();
// EXTENSIONS - Not officially supported
if
(
extended_format
)
{
{
file
<<
" <text_region id=
\"
"
<<
lines
(
l
).
id
()
file
<<
"
\"
baseline=
\"
"
<<
lines
(
l
).
baseline
()
<<
"
\"
txt_orientation=
\"
"
<<
lines
(
l
).
orientation
()
<<
"
\"
meanline=
\"
"
<<
lines
(
l
).
meanline
()
<<
"
\"
txt_reading_orientation=
\"
"
<<
lines
(
l
).
reading_orientation
()
<<
"
\"
x_height=
\"
"
<<
lines
(
l
).
x_height
()
<<
"
\"
txt_reading_direction=
\"
"
<<
lines
(
l
).
reading_direction
()
<<
"
\"
d_height=
\"
"
<<
lines
(
l
).
d_height
()
<<
"
\"
txt_text_type=
\"
"
<<
lines
(
l
).
type
()
<<
"
\"
a_height=
\"
"
<<
lines
(
l
).
a_height
()
<<
"
\"
txt_reverse_video=
\"
"
<<
(
lines
(
l
).
reverse_video
()
?
"true"
:
"false"
)
<<
"
\"
char_width=
\"
"
<<
lines
(
l
).
char_width
();
<<
"
\"
txt_indented=
\"
"
<<
(
lines
(
l
).
indented
()
?
"true"
:
"false"
)
}
<<
"
\"
kerning=
\"
"
<<
lines
(
l
).
char_space
();
// End of EXTENSIONS
file
<<
"
\"
>"
// EXTENSIONS - Not officially supported
<<
std
::
endl
;
if
(
extended_format
)
{
file
<<
"
\"
baseline=
\"
"
<<
lines
(
l
).
baseline
()
<<
"
\"
meanline=
\"
"
<<
lines
(
l
).
meanline
()
<<
"
\"
x_height=
\"
"
<<
lines
(
l
).
x_height
()
<<
"
\"
d_height=
\"
"
<<
lines
(
l
).
d_height
()
<<
"
\"
a_height=
\"
"
<<
lines
(
l
).
a_height
()
<<
"
\"
char_width=
\"
"
<<
lines
(
l
).
char_width
();
}
// End of EXTENSIONS
file
<<
"
\"
>"
<<
std
::
endl
;
if
(
extended_format
)
internal
::
print_box_coords
(
file
,
lines
(
l
).
bbox
(),
" "
);
{
file
<<
" <coords>"
<<
std
::
endl
if
(
extended_format
)
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
{
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
file
<<
" <paragraph>"
<<
std
::
endl
;
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
internal
::
print_box_coords
(
file
,
lines
(
l
).
bbox
(),
" "
);
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
if
(
lines
(
l
).
has_text
())
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
{
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
std
::
string
tmp
=
lines
(
l
).
text
();
<<
std
::
endl
tmp
=
internal
::
html_markups_replace
(
tmp
,
html_map
);
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
file
<<
" <line text=
\"
"
<<
std
::
endl
<<
tmp
<<
" </coords>"
<<
std
::
endl
;
<<
"
\"
>"
<<
std
::
endl
;
}
else
file
<<
" <paragraph>"
<<
std
::
endl
;
file
<<
" <line>"
<<
std
::
endl
;
file
<<
" <coords>"
<<
std
::
endl
internal
::
print_box_coords
(
file
,
lines
(
l
).
bbox
(),
" "
);
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" </coords>"
<<
std
::
endl
;
if
(
lines
(
l
).
has_text
())
{
std
::
string
tmp
=
lines
(
l
).
text
();
tmp
=
internal
::
html_markups_replace
(
tmp
,
html_map
);
file
<<
" <line text=
\"
"
<<
tmp
<<
"
\"
>"
<<
std
::
endl
;
}
else
file
<<
" <line>"
<<
std
::
endl
;
file
<<
" <coords>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" <point x=
\"
"
<<
lines
(
l
).
bbox
().
pmin
().
col
()
<<
"
\"
y=
\"
"
<<
lines
(
l
).
bbox
().
pmax
().
row
()
<<
"
\"
/>"
<<
std
::
endl
<<
" </coords>"
<<
std
::
endl
;
file
<<
" </line>"
<<
std
::
endl
;
file
<<
" </paragraph>"
<<
std
::
endl
;
}
file
<<
" </text_region>"
<<
std
::
endl
;
file
<<
" </line>"
<<
std
::
endl
;
file
<<
" </paragraph>"
<<
std
::
endl
;
}
}
file
<<
" </text_region>"
<<
std
::
endl
;
}
}
const
component_set
<
L
>&
elts
=
doc
.
elements
();
for_all_comps
(
e
,
elts
)
if
(
elts
(
e
).
is_valid
())
{
file
<<
" <image_region id=
\"
ir"
<<
elts
(
e
).
id
()
<<
"
\"
img_colour_type=
\"
24_Bit_Colour
\"
"
<<
" img_orientation=
\"
0.000000
\"
"
<<
" img_emb_text=
\"
No
\"
"
<<
" img_bgcolour=
\"
White
\"
>"
<<
std
::
endl
;
internal
::
print_box_coords
(
file
,
elts
(
e
).
bbox
(),
" "
);
file
<<
" </image_region>"
<<
std
::
endl
;
}
}
file
<<
" </page>"
<<
std
::
endl
;
file
<<
" </page>"
<<
std
::
endl
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment