Commit 4a399a3c authored by Arthur Crepin-Leblond's avatar Arthur Crepin-Leblond Committed by Guillaume Lazzara
Browse files

ICDAR XML to HTML.

	* arthur/xml_to_html/domitem.cc,
        * arthur/xml_to_html/domitem.hh.,
        * arthur/xml_to_html/dommodel.cc,
        * arthur/xml_to_html/dommodel.hh: XML parsing.

        * arthur/xml_to_html/patterns/css.css,
        * arthur/xml_to_html/patterns/xsl.xsl: HTML patterns.

        * arthur/xml_to_html/xml_to_html.cc,
        * arthur/xml_to_html/xml_to_html.hh: Image cropingd XML merging.

        * arthur/xml_to_html/README
        * arthur/xml_to_html/xml_to_html.pro
        * arthur/xml_to_html/main.cc
parent 11d3d490
2010-07-05 Arthur Crepin-Leblond <crepin@stockholm.lrde.epita.fr>
ICDAR XML to HTML.
* arthur/xml_to_html/domitem.cc,
* arthur/xml_to_html/domitem.hh.,
* arthur/xml_to_html/dommodel.cc,
* arthur/xml_to_html/dommodel.hh: XML parsing.
* arthur/xml_to_html/patterns/css.css,
* arthur/xml_to_html/patterns/xsl.xsl: HTML patterns.
* arthur/xml_to_html/xml_to_html.cc,
* arthur/xml_to_html/xml_to_html.hh: Image croping and XML merging.
* arthur/xml_to_html/README
* arthur/xml_to_html/xml_to_html.pro
* arthur/xml_to_html/main.cc
2010-06-25 Guillaume Lazzara <z@lrde.epita.fr>
Add algorithms to retrieve white spaces.
......
xml_to_html
Produce HTML view from ICDAR (or LRDE extended) XML.
*Compilation:
Just change the paths to olena and milena in xml_to_html.pro
*Usage:
-m, --merge: Merge an XML with an other XML
args: <xml_file_1> <xml_file_2> [xml_output]
Useful to merge LRDE XML extended mode files with ICDAR XML files to combine both images and recognized text.
-c, --create-html: Crop images (if exist) from the ppm file using the XML file and produce an html output.
args: <xml_file> [ppm_file] [output dir]
If no PPM file is given, the program will only produce an HTML output without images.
If a PPM file is given, the program will crop image_region, separator_region, graphic_regions, chart_region and table_region to output_dir/img/id.png
*Result:
To view the result run your internet browser on output_dir/output.xml (Does not work with Chrome on local, use a LAMPP server to fix this ;-) ).
\ No newline at end of file
/****************************************************************************
**
** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the example classes of the Qt Toolkit.
**
** Commercial Usage
** Licensees holding valid Qt Commercial licenses may use this file in
** accordance with the Qt Commercial License Agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Nokia.
**
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License versions 2.0 or 3.0 as published by the Free
** Software Foundation and appearing in the file LICENSE.GPL included in
** the packaging of this file. Please review the following information
** to ensure GNU General Public Licensing requirements will be met:
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
** exception, Nokia gives you certain additional rights. These rights
** are described in the Nokia Qt GPL Exception version 1.3, included in
** the file GPL_EXCEPTION.txt in this package.
**
** Qt for Windows(R) Licensees
** As a special exception, Nokia, as the sole copyright holder for Qt
** Designer, grants users of the Qt/Eclipse Integration plug-in the
** right for the Qt/Eclipse Integration to link to functionality
** provided by Qt Designer and its related libraries.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
**
****************************************************************************/
//
// Document layout viewer.
//
// Copyright (C) 2009 Florent D'Halluin.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//
#include <QtXml>
#include "domitem.hh"
DomItem::DomItem(QDomNode &node, int row, DomItem *parent)
{
domNode = node;
// Record the item's location within its parent.
rowNumber = row;
parentItem = parent;
}
DomItem::~DomItem()
{
QHash<int,DomItem*>::iterator it;
for (it = childItems.begin(); it != childItems.end(); ++it)
delete it.value();
}
QDomNode DomItem::node() const
{
return domNode;
}
DomItem *DomItem::parent()
{
return parentItem;
}
DomItem *DomItem::child(int i)
{
if (childItems.contains(i))
return childItems[i];
if (i >= 0 && i < domNode.childNodes().count()) {
QDomNode childNode = domNode.childNodes().item(i);
DomItem *childItem = new DomItem(childNode, i, this);
childItems[i] = childItem;
return childItem;
}
return 0;
}
int DomItem::row()
{
return rowNumber;
}
/****************************************************************************
**
** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the example classes of the Qt Toolkit.
**
** Commercial Usage
** Licensees holding valid Qt Commercial licenses may use this file in
** accordance with the Qt Commercial License Agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Nokia.
**
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License versions 2.0 or 3.0 as published by the Free
** Software Foundation and appearing in the file LICENSE.GPL included in
** the packaging of this file. Please review the following information
** to ensure GNU General Public Licensing requirements will be met:
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
** exception, Nokia gives you certain additional rights. These rights
** are described in the Nokia Qt GPL Exception version 1.3, included in
** the file GPL_EXCEPTION.txt in this package.
**
** Qt for Windows(R) Licensees
** As a special exception, Nokia, as the sole copyright holder for Qt
** Designer, grants users of the Qt/Eclipse Integration plug-in the
** right for the Qt/Eclipse Integration to link to functionality
** provided by Qt Designer and its related libraries.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
**
****************************************************************************/
//
// Document layout viewer.
//
// Copyright (C) 2009 Florent D'Halluin.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//
#ifndef DOMITEM_H
#define DOMITEM_H
#include <QDomNode>
#include <QHash>
class DomItem
{
public:
DomItem(QDomNode &node, int row, DomItem *parent = 0);
~DomItem();
DomItem *child(int i);
DomItem *parent();
QDomNode node() const;
int row();
private:
QDomNode domNode;
QHash<int,DomItem*> childItems;
DomItem *parentItem;
int rowNumber;
};
#endif
/****************************************************************************
**
** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the example classes of the Qt Toolkit.
**
** Commercial Usage
** Licensees holding valid Qt Commercial licenses may use this file in
** accordance with the Qt Commercial License Agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Nokia.
**
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License versions 2.0 or 3.0 as published by the Free
** Software Foundation and appearing in the file LICENSE.GPL included in
** the packaging of this file. Please review the following information
** to ensure GNU General Public Licensing requirements will be met:
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
** exception, Nokia gives you certain additional rights. These rights
** are described in the Nokia Qt GPL Exception version 1.3, included in
** the file GPL_EXCEPTION.txt in this package.
**
** Qt for Windows(R) Licensees
** As a special exception, Nokia, as the sole copyright holder for Qt
** Designer, grants users of the Qt/Eclipse Integration plug-in the
** right for the Qt/Eclipse Integration to link to functionality
** provided by Qt Designer and its related libraries.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
**
****************************************************************************/
//
// Document layout viewer.
//
// Copyright (C) 2009 Florent D'Halluin.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//
#include <QtGui>
#include <QtXml>
#include "domitem.hh"
#include "dommodel.hh"
DomModel::DomModel(QDomDocument document, QObject *parent)
: QAbstractItemModel(parent), domDocument(document)
{
rootItem = new DomItem(domDocument, 0);
}
DomModel::~DomModel()
{
delete rootItem;
}
int DomModel::columnCount(const QModelIndex &/*parent*/) const
{
return 3;
}
QVariant DomModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid())
return QVariant();
DomItem *item = static_cast<DomItem*>(index.internalPointer());
QDomNode node = item->node();
QDomNamedNodeMap attributeMap = node.attributes();
if (role == Qt::DisplayRole)
{
QStringList attributes;
switch (index.column())
{
case 0:
return node.nodeName();
case 1:
for (int i = 0; i < attributeMap.count(); ++i)
{
QDomNode attribute = attributeMap.item(i);
attributes << attribute.nodeName() + "=\""
+ attribute.nodeValue() + "\"";
}
return attributes.join(" ");
case 2:
return node.nodeValue().split("\n").join(" ");
default:
return QVariant();
}
}
else if (role == Qt::UserRole)
{
QMap<QString, QVariant> attributes;
switch (index.column())
{
case 1:
for (int i = 0; i < attributeMap.count(); ++i)
{
QDomNode attribute = attributeMap.item(i);
attributes[attribute.nodeName()] = attribute.nodeValue();
}
return attributes;
default:
return QVariant();
}
}
return QVariant();
}
Qt::ItemFlags DomModel::flags(const QModelIndex &index) const
{
if (!index.isValid())
return 0;
return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
}
QVariant DomModel::headerData(int section, Qt::Orientation orientation,
int role) const
{
if (orientation == Qt::Horizontal && role == Qt::DisplayRole) {
switch (section) {
case 0:
return tr("Name");
case 1:
return tr("Attributes");
case 2:
return tr("Value");
default:
return QVariant();
}
}
return QVariant();
}
QModelIndex DomModel::index(int row, int column, const QModelIndex &parent)
const
{
if (!hasIndex(row, column, parent))
return QModelIndex();
DomItem *parentItem;
if (!parent.isValid())
parentItem = rootItem;
else
parentItem = static_cast<DomItem*>(parent.internalPointer());
DomItem *childItem = parentItem->child(row);
if (childItem)
return createIndex(row, column, childItem);
else
return QModelIndex();
}
QModelIndex DomModel::parent(const QModelIndex &child) const
{
if (!child.isValid())
return QModelIndex();
DomItem *childItem = static_cast<DomItem*>(child.internalPointer());
DomItem *parentItem = childItem->parent();
if (!parentItem || parentItem == rootItem)
return QModelIndex();
return createIndex(parentItem->row(), 0, parentItem);
}
int DomModel::rowCount(const QModelIndex &parent) const
{
if (parent.column() > 0)
return 0;
DomItem *parentItem;
if (!parent.isValid())
parentItem = rootItem;
else
parentItem = static_cast<DomItem*>(parent.internalPointer());
return parentItem->node().childNodes().count();
}
/****************************************************************************
**
** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the example classes of the Qt Toolkit.
**
** Commercial Usage
** Licensees holding valid Qt Commercial licenses may use this file in
** accordance with the Qt Commercial License Agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Nokia.
**
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License versions 2.0 or 3.0 as published by the Free
** Software Foundation and appearing in the file LICENSE.GPL included in
** the packaging of this file. Please review the following information
** to ensure GNU General Public Licensing requirements will be met:
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
** exception, Nokia gives you certain additional rights. These rights
** are described in the Nokia Qt GPL Exception version 1.3, included in
** the file GPL_EXCEPTION.txt in this package.
**
** Qt for Windows(R) Licensees
** As a special exception, Nokia, as the sole copyright holder for Qt
** Designer, grants users of the Qt/Eclipse Integration plug-in the
** right for the Qt/Eclipse Integration to link to functionality
** provided by Qt Designer and its related libraries.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
**
****************************************************************************/
//
// Document layout viewer.
//
// Copyright (C) 2009 Florent D'Halluin.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//
#ifndef DOMMODEL_H
#define DOMMODEL_H
#include <QAbstractItemModel>
#include <QDomDocument>
#include <QModelIndex>
#include <QVariant>
class DomItem;
class DomModel : public QAbstractItemModel
{
Q_OBJECT
public:
DomModel(QDomDocument document, QObject *parent = 0);
~DomModel();
QVariant data(const QModelIndex &index, int role) const;
Qt::ItemFlags flags(const QModelIndex &index) const;
QVariant headerData(int section, Qt::Orientation orientation,
int role = Qt::DisplayRole) const;
QModelIndex index(int row, int column,
const QModelIndex &parent = QModelIndex()) const;
QModelIndex parent(const QModelIndex &child) const;
int rowCount(const QModelIndex &parent = QModelIndex()) const;
int columnCount(const QModelIndex &parent = QModelIndex()) const;
private:
QDomDocument domDocument;
DomItem *rootItem;
};
#endif
#include <iostream>
#include "xml_to_html.hh"
int main(int argc, char **argv)
{
std::string man;
man = "xml_to_html: ...usage\n -m, --merge: Merge an XML with an other XML\n \targs: <xml_file_1> <xml_file_2> [xml_output]\n\n -c, --create-html: Crop images (if exist) from the ppm file using the XML file and produce an html output.\n \targs: <xml_file> [ppm_file] [output dir]\n";
if (argc > 2)
{
new QApplication::QApplication(argc, argv, false);
ImageCrop crop;
std::string merge = "--merge";
std::string m = "-m";
std::string scrop = "--create-html";
std::string c = "-c";
if (argc == 3 && (scrop.compare(argv[1]) == 0 || c.compare(argv[1]) == 0))
{
bool b = false;
b = crop.load_xml(argv[2], QString::Null());
if (b)
crop.find_image_regions();
}
else if (argc > 3)
{
if (merge.compare(argv[1]) == 0 || m.compare(argv[1]) == 0)
{
if (argc > 4)
crop.merge(argv[2], argv[3], argv[4]);
else
crop.merge(argv[2], argv[3]);
}
else if (scrop.compare(argv[1]) == 0 || c.compare(argv[1]) == 0)
{
bool b = false;
if (argc > 4)
b = crop.load_xml(argv[2], argv[3], argv[4]);
else
b = crop.load_xml(argv[2], argv[3]);
if (b)
crop.find_image_regions();
}
else
std::cout << man;
}
else
std::cout << man;
}
else
std::cout << man;
return 0;
}
.line
{
position:absolute;
background-color:white;
z-index:7;
display:inline;
white-space:pre;
letter-spacing:-2px;
padding:0px;
margin:0px;
filter:alpha(opacity=100);
font-family:"Times New Roman", Times, serif;
}
.para
{
position:absolute;
z-index:6;
}
.region
{
position:absolute;
z-index:5;
}
.image
{
position:absolute;
border:0;
}
\ No newline at end of file
<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/"> <!-- "/" means the whole document (root) -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Fooooooo !</title>
<link rel="stylesheet" type="text/css" href="css.css" />
</head>
<body>
<div class="image" style="top:0px;left:0px;">