Commit 7e4787da authored by Alexandre Duret-Lutz's avatar Alexandre Duret-Lutz
Browse files

Fix error reporting in utf8-encoded LTL formulae.

* src/ltlparse/public.hh (fix_utf8_locations): New function.
* src/ltlparse/fmterror.cc (fix_utf8_locations): Implement it.
(format_parse_errors): Rename as ...
(format_parse_errors_aux): ... this.
(format_parse_errors): New implementation that call fix_utf8_locations()
before format_parse_errors_aux() on valid utf8 strings.
* src/ltlparse/Makefile.am: Include $(top_srcdir).
* src/ltltest/utf8.test: New file.
* src/ltltest/Makefile.am: Add it.
* src/ltltest/parse.test: Fix header.
parent 403170f5
## Copyright (C) 2008, 2009, 2010, 2011 Laboratoire de Recherche et
## Dveloppement de l'Epita (LRDE).
## Copyright (C) 2008, 2009, 2010, 2011, 2012 Laboratoire de Recherche
## et Dveloppement de l'Epita (LRDE).
## Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris
## 6 (LIP6), dpartement Systmes Rpartis Coopratifs (SRC),
## Universit Pierre et Marie Curie.
......@@ -21,7 +21,7 @@
## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA.
AM_CPPFLAGS = -I$(srcdir)/.. -I.. -DYY_NO_INPUT
AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir)/.. -I.. -DYY_NO_INPUT
# Disable -Werror because too many versions of flex yield warnings.
AM_CXXFLAGS = $(WARNING_CXXFLAGS:-Werror=)
......
// Copyright (C) 2010 Laboratoire de Recherche et Développement de
// l'Epita (LRDE).
// Copyright (C) 2010, 2012 Laboratoire de Recherche et Développement
// de l'Epita (LRDE).
// Copyright (C) 2003, 2004, 2005 Laboratoire d'Informatique de Paris
// 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
// Université Pierre et Marie Curie.
......@@ -23,36 +23,95 @@
#include "public.hh"
#include <ostream>
#include <iterator>
#include <vector>
#include "utf8/utf8.h"
namespace spot
{
namespace ltl
{
bool
format_parse_errors(std::ostream& os,
const std::string& ltl_string,
parse_error_list& error_list)
void
fix_utf8_locations(const std::string& ltl_string,
parse_error_list& error_list)
{
bool printed = false;
spot::ltl::parse_error_list::iterator it;
// LUT to convert byte positions to utf8 positions.
// (The +2 is to account for position 0, not used,
// and position ltl_string.size()+1 denoting EOS.)
std::vector<unsigned> b2u(ltl_string.size() + 2);
// i will iterate over all utf8 characters between b and e
std::string::const_iterator b = ltl_string.begin();
std::string::const_iterator i = b;
std::string::const_iterator e = ltl_string.end();
unsigned n = 0; // number of current utf8 character
unsigned prev = 0; // last byte of previous utf8 character
while (i != e)
{
utf8::next(i, e);
++n;
unsigned d = std::distance(b, i);
while (prev < d)
b2u[++prev] = n;
}
b2u[++prev] = ++n;
// use b2u to update error_list
parse_error_list::iterator it;
for (it = error_list.begin(); it != error_list.end(); ++it)
{
os << ">>> " << ltl_string << std::endl;
ltlyy::location& l = it->first;
l.begin.column = b2u[l.begin.column];
l.end.column = b2u[l.end.column];
}
}
unsigned n = 1;
for (; n < 4 + l.begin.column; ++n)
os << ' ';
// Write at least one '^', even if begin==end.
os << '^';
++n;
for (; n < 4 + l.end.column; ++n)
namespace
{
bool
format_parse_errors_aux(std::ostream& os,
const std::string& ltl_string,
const parse_error_list& error_list)
{
bool printed = false;
parse_error_list::const_iterator it;
for (it = error_list.begin(); it != error_list.end(); ++it)
{
os << ">>> " << ltl_string << std::endl;
const ltlyy::location& l = it->first;
unsigned n = 1;
for (; n < 4 + l.begin.column; ++n)
os << ' ';
// Write at least one '^', even if begin==end.
os << '^';
os << std::endl << it->second << std::endl << std::endl;
printed = true;
++n;
for (; n < 4 + l.end.column; ++n)
os << '^';
os << std::endl << it->second << std::endl << std::endl;
printed = true;
}
return printed;
}
}
bool
format_parse_errors(std::ostream& os,
const std::string& ltl_string,
const parse_error_list& error_list)
{
if (utf8::is_valid(ltl_string.begin(), ltl_string.end()))
{
parse_error_list fixed = error_list;
fix_utf8_locations(ltl_string, fixed);
return format_parse_errors_aux(os, ltl_string, fixed);
}
else
{
return format_parse_errors_aux(os, ltl_string, error_list);
}
return printed;
}
}
......
// Copyright (C) 2010, 2011 Laboratoire de Recherche et Développement de
// l'Epita (LRDE).
// Copyright (C) 2010, 2011, 2012 Laboratoire de Recherche et
// Développement de l'Epita (LRDE).
// Copyright (C) 2003, 2004, 2005, 2006 Laboratoire d'Informatique de
// Paris 6 (LIP6), département Systèmes Répartis Coopératifs (SRC),
// Université Pierre et Marie Curie.
......@@ -94,6 +94,12 @@ namespace spot
/// \brief Format diagnostics produced by spot::ltl::parse
/// or spot::ltl::ratexp
///
/// If the string is utf8 encoded, spot::ltl::fix_utf8_locations()
/// will be used to report correct utf8 locations (assuming the
/// output is utf8 aware). Nonetheless, the supplied \a
/// error_list will not be modified.
///
/// \param os Where diagnostics should be output.
/// \param input_string The string that were parsed.
/// \param error_list The error list filled by spot::ltl::parse
......@@ -101,7 +107,34 @@ namespace spot
/// \return \c true iff any diagnostic was output.
bool format_parse_errors(std::ostream& os,
const std::string& input_string,
parse_error_list& error_list);
const parse_error_list& error_list);
/// \brief Fix location of diagnostics assuming the input is utf8.
///
/// The spot::ltl::parse() and spot::ltl::parse_sere() function
/// return a parse_error_list that contain locations specified at
/// the byte level. Although these parser recognize some
/// utf8 characters they only work byte by byte and will report
/// positions by counting byte.
///
/// This function fixes the positions returned by the parser to
/// look correct when the string is interpreted as a utf8-encoded
/// string.
///
/// It is invalid to call this function on a string that is not
/// valid utf8.
///
/// You should NOT call this function before calling
/// spot::ltl::format_parse_errors() because it is already called
/// inside if needed. You may need this function only if you want
/// to write your own error reporting code.
///
/// \param input_string The string that were parsed.
/// \param error_list The error list filled by spot::ltl::parse
/// or spot::ltl::parse_sere while parsing \a input_string.
void
fix_utf8_locations(const std::string& ltl_string,
parse_error_list& error_list);
/// @}
}
......
......@@ -83,6 +83,7 @@ EXTRA_DIST = $(TESTS)
TESTS = \
parse.test \
parseerr.test \
utf8.test \
length.test \
equals.test \
tostring.test \
......
......@@ -2,7 +2,7 @@
# Copyright (C) 2009, 2010, 2011 Laboratoire de Recherche et Developpement
# de l'Epita (LRDE).
# Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
# dpartement Systmes Rpartis Coopratifs (SRC), Universit Pierre
# département Systèmes Répartis Coopératifs (SRC), Université Pierre
# et Marie Curie.
#
# This file is part of Spot, a model checking library.
......
#! /bin/sh
# Copyright (C) 2012 Laboratoire de Recherche et Developpement
# de l'Epita (LRDE).
#
# This file is part of Spot, a model checking library.
#
# Spot is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Spot is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Spot; see the file COPYING. If not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# Make sure
. ./defs || exit 1
# ----
run 0 ../ltl2text '□◯a' >out
echo 'unop(G, unop(X, AP(a)))' > exp
cmp out exp
# ----
run 0 ../ltl2text '□◯"αβγ"' >out
echo 'unop(G, unop(X, AP(αβγ)))' > exp
cmp out exp
# ----
set +x
run 1 ../ltl2text '□)◯a' 2>err
set -x
cat >exp <<EOF
>>> )a
^
syntax error, unexpected closing parenthesis
>>> )a
^
missing right operand for "always operator"
>>> )a
^^^
ignoring trailing garbage
EOF
cmp exp err
# ----
set +x
run 1 ../ltl2text '"αβγ"X' 2>err
set -x
cat >exp <<EOF
>>> "αβγ"X
^
syntax error, unexpected next operator
>>> "αβγ"X
^
ignoring trailing garbage
EOF
cmp exp err
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment