Commit 86dac4aa authored by Alexandre Duret-Lutz's avatar Alexandre Duret-Lutz
Browse files

ltlparse: add a lenient parsing mode

Spin 6 supports formulas such as []<>(a < b) so that atomic properties
need not be specified using #define.  Of course we don't want to
implement all the syntax of Spin in our LTL parser because other tools
may have different syntaxes for their atomic propositions.  The
lenient mode tells the scanner to return any (...), {...}, or {...}!
block as a single token.  The parser will try to recursively parse
this block as a LTL/SERE formula, and if this fails, it will consider
the block to be an atomic proposition.  The drawback is that most
syntax errors will no be considered to be atomic propositions.  For
instance (a U b U) is a single atomic proposition in lenient mode, and
a syntax error in default mode.

* src/ltlparse/ltlparse.yy, src/ltlparse/ltlscan.ll,
src/ltlparse/parsedecl.hh, src/ltlparse/public.hh: Add a
lenient parsing mode.  Simplify the lexer using yy_scan_string.
* src/bin/common_finput.cc: Add a --lenient option.
* src/ltltest/lenient.test: New file.
* src/ltltest/Makefile.am: Add it.
* src/neverparse/neverclaimparse.yy: Parse the guards in lenient mode.
* src/tgbatest/neverclaimread.test: Adjust.
* src/ltlvisit/tostring.cc: When outputing a formula in Spin's syntax,
output (a < b) instead of "a < b".
* src/misc/escape.cc, src/misc/escape.hh (trim): New helper function.
parent d9ceb4ad
......@@ -24,9 +24,11 @@
#include <fstream>
#define OPT_LBT 1
#define OPT_LENIENT 2
jobs_t jobs;
bool lbt_input = false;
static bool lenient = false;
static const argp_option options[] =
{
......@@ -36,6 +38,9 @@ static const argp_option options[] =
"process each line of FILENAME as a formula", 0 },
{ "lbt-input", OPT_LBT, 0, 0,
"read all formulas using LBT's prefix syntax", 0 },
{ "lenient", OPT_LENIENT, 0, 0,
"parenthesized blocks that cannot be parsed as subformulas "
"are considered as atomic properties", 0 },
{ 0, 0, 0, 0, 0, 0 }
};
......@@ -56,6 +61,9 @@ parse_opt_finput(int key, char* arg, struct argp_state*)
case OPT_LBT:
lbt_input = true;
break;
case OPT_LENIENT:
lenient = true;
break;
default:
return ARGP_ERR_UNKNOWN;
}
......@@ -68,7 +76,9 @@ parse_formula(const std::string& s, spot::ltl::parse_error_list& pel)
if (lbt_input)
return spot::ltl::parse_lbt(s, pel);
else
return spot::ltl::parse(s, pel);
return spot::ltl::parse(s, pel,
spot::ltl::default_environment::instance(),
false, lenient);
}
......
......@@ -86,6 +86,53 @@ using namespace spot::ltl;
} \
while (0);
const formula*
try_recursive_parse(const std::string& str,
const ltlyy::location& location,
spot::ltl::environment& env,
bool debug,
bool sere,
spot::ltl::parse_error_list& error_list)
{
// We want to parse a U (b U c) as two until operators applied
// to the atomic propositions a, b, and c. We also want to
// parse a U (b == c) as one until operator applied to the
// atomic propositions "a" and "b == c". The only problem is
// that we do not know anything about "==" or in general about
// the syntax of atomic proposition of our users.
//
// To support that, the lexer will return "b U c" and "b == c"
// as PAR_BLOCK tokens. We then try to parse such tokens
// recursively. If, as in the case of "b U c", the block is
// successfully parsed as a formula, we return this formula.
// Otherwise, we convert the string into an atomic proposition
// (it's up to the environment to check the syntax of this
// proposition, and maybe reject it).
spot::ltl::parse_error_list suberror;
const spot::ltl::formula* f;
if (sere)
f = spot::ltl::parse_sere(str, suberror, env, debug, true);
else
f = spot::ltl::parse(str, suberror, env, debug, true);
if (suberror.empty())
return f;
if (f)
f->destroy();
f = env.require(str);
if (!f)
{
std::string s = "atomic proposition `";
s += str;
s += "' rejected by environment `";
s += env.name();
s += "'";
error_list.push_back(parse_error(location, s));
}
return f;
}
}
......@@ -95,6 +142,9 @@ using namespace spot::ltl;
%token START_LBT "LBT start marker"
%token START_SERE "SERE start marker"
%token PAR_OPEN "opening parenthesis" PAR_CLOSE "closing parenthesis"
%token <str> PAR_BLOCK "(...) block"
%token <str> BRA_BLOCK "{...} block"
%token <str> BRA_BANG_BLOCK "{...}! block"
%token BRACE_OPEN "opening brace" BRACE_CLOSE "closing brace"
%token BRACE_BANG_CLOSE "closing brace-bang"
%token OP_OR "or operator" OP_XOR "xor operator"
......@@ -378,6 +428,14 @@ sere: booleanatom
}
}
| bracedsere
| PAR_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
}
| PAR_OPEN sere PAR_CLOSE
{ $$ = $2; }
| PAR_OPEN error PAR_CLOSE
......@@ -590,8 +648,24 @@ bracedsere: BRACE_OPEN sere BRACE_CLOSE
"treating this brace block as false"));
$$ = constant::false_instance();
}
| BRA_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
}
parenthesedsubformula: PAR_OPEN subformula PAR_CLOSE
parenthesedsubformula: PAR_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), false, error_list);
delete $1;
if (!$$)
YYERROR;
}
| PAR_OPEN subformula PAR_CLOSE
{ $$ = $2; }
| PAR_OPEN subformula error PAR_CLOSE
{ error_list.push_back(parse_error(@3, "ignoring this"));
......@@ -721,16 +795,25 @@ subformula: booleanatom
/* {SERE}! = {SERE} <>-> 1 */
{ $$ = binop::instance(binop::EConcat, $2,
constant::true_instance()); }
;
| BRA_BANG_BLOCK
{
$$ = try_recursive_parse(*$1, @1, parse_environment,
debug_level(), true, error_list);
delete $1;
if (!$$)
YYERROR;
$$ = binop::instance(binop::EConcat, $$,
constant::true_instance());
}
lbtformula: ATOMIC_PROP
{
$$ = parse_environment.require(*$1);
if (! $$)
{
std::string s = "unknown atomic proposition `";
std::string s = "atomic proposition `";
s += *$1;
s += "' in environment `";
s += "' rejected by environment `";
s += parse_environment.name();
s += "'";
error_list.push_back(parse_error(@1, s));
......@@ -790,14 +873,16 @@ namespace spot
parse(const std::string& ltl_string,
parse_error_list& error_list,
environment& env,
bool debug)
bool debug, bool lenient)
{
const formula* result = 0;
flex_set_buffer(ltl_string.c_str(),
ltlyy::parser::token::START_LTL);
ltlyy::parser::token::START_LTL,
lenient);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}
......@@ -809,25 +894,30 @@ namespace spot
{
const formula* result = 0;
flex_set_buffer(ltl_string.c_str(),
ltlyy::parser::token::START_LBT);
ltlyy::parser::token::START_LBT,
false);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}
const formula*
parse_sere(const std::string& sere_string,
parse_error_list& error_list,
environment& env,
bool debug)
parse_error_list& error_list,
environment& env,
bool debug,
bool lenient)
{
const formula* result = 0;
flex_set_buffer(sere_string.c_str(),
ltlyy::parser::token::START_SERE);
ltlyy::parser::token::START_SERE,
lenient);
ltlyy::parser parser(error_list, env, result);
parser.set_debug_level(debug);
parser.parse();
flex_unset_buffer();
return result;
}
......
......@@ -22,41 +22,29 @@
%option noyywrap warn 8bit batch
%option prefix="ltlyy"
%option outfile="lex.yy.c"
%option stack
%{
#include <string>
#include <boost/lexical_cast.hpp>
#include "ltlparse/parsedecl.hh"
/* Hack Flex so we read from a string instead of reading from a file. */
#define YY_INPUT(buf, result, max_size) \
do { \
result = (max_size < to_parse_size) ? max_size : to_parse_size; \
memcpy(buf, to_parse, result); \
to_parse_size -= result; \
to_parse += result; \
} while (0);
#include "misc/escape.hh"
#define YY_USER_ACTION \
yylloc->columns(yyleng);
static const char* to_parse = 0;
static size_t to_parse_size = 0;
static int start_token = 0;
static int parent_level = 0;
static bool missing_parent = false;
static bool lenient_mode = false;
typedef ltlyy::parser::token token;
void
flex_set_buffer(const char* buf, int start_tok)
{
to_parse = buf;
to_parse_size = strlen(to_parse);
start_token = start_tok;
}
%}
%s not_prop
%x in_par
%x in_bra
%x sqbracket
%x lbt
......@@ -78,18 +66,103 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
{
int t = start_token;
start_token = 0;
if (t == token::START_LBT)
BEGIN(lbt);
else
BEGIN(0);
return t;
}
yylloc->step();
%}
"(" BEGIN(0); return token::PAR_OPEN;
"(" {
if (!lenient_mode)
{
BEGIN(0);
return token::PAR_OPEN;
}
/* Parse any (...) block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_par);
parent_level = 1;
yylval->str = new std::string();
}
<in_par>{
"(" ++parent_level; yylval->str->append(yytext, yyleng);
")" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::PAR_BLOCK;
}
}
[^()]+ yylval->str->append(yytext, yyleng);
<<EOF>> {
unput(')');
if (!missing_parent)
error_list.push_back(
spot::ltl::parse_error(*yylloc,
"missing closing parenthese"));
missing_parent = true;
}
}
"{" {
if (!lenient_mode)
{
BEGIN(0);
return token::BRACE_OPEN;
}
/* Parse any {...} block as a single block,
taking care of nested parentheses. The
parser will then try to parse this block
recursively. */
BEGIN(in_bra);
parent_level = 1;
yylval->str = new std::string();
}
<in_bra>{
"{" ++parent_level; yylval->str->append(yytext, yyleng);
"}"[ \t\n]*"!" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::BRA_BANG_BLOCK;
}
}
"}" {
if (--parent_level)
{
yylval->str->append(yytext, yyleng);
}
else
{
BEGIN(not_prop);
spot::trim(*yylval->str);
return token::BRA_BLOCK;
}
}
[^{}]+ yylval->str->append(yytext, yyleng);
<<EOF>> {
unput(')');
if (!missing_parent)
error_list.push_back(
spot::ltl::parse_error(*yylloc,
"missing closing brace"));
missing_parent = true;
}
}
")" BEGIN(not_prop); return token::PAR_CLOSE;
"{" BEGIN(0); return token::BRACE_OPEN;
"}"[ \t\n]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
"}" BEGIN(not_prop); return token::BRACE_CLOSE;
......@@ -225,7 +298,26 @@ BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
<<EOF>> return token::END_OF_INPUT;
%{
/* Dummy use of yyunput to shut up a gcc warning. */
(void) &yyunput;
%}
%%
void
flex_set_buffer(const char* buf, int start_tok, bool lenient)
{
yypush_buffer_state(YY_CURRENT_BUFFER);
yy_scan_string(buf);
start_token = start_tok;
if (start_tok == token::START_LBT)
yy_push_state(lbt);
else
yy_push_state(0);
lenient_mode = lenient;
}
void
flex_unset_buffer()
{
(void)&yy_top_state; // shut up a g++ warning.
yy_pop_state();
yypop_buffer_state();
missing_parent = false;
}
// Copyright (C) 2010 Laboratoire de Recherche et Dveloppement de
// Copyright (C) 2010, 2012 Laboratoire de Recherche et Dveloppement de
// l'Epita (LRDE)
// Copyright (C) 2003, 2005 Laboratoire d'Informatique de Paris 6 (LIP6),
// dpartement Systmes Rpartis Coopratifs (SRC), Universit Pierre
......@@ -31,6 +31,7 @@
spot::ltl::parse_error_list& error_list)
YY_DECL;
void flex_set_buffer(const char *buf, int start_tok);
void flex_set_buffer(const char *buf, int start_tok, bool lenient);
void flex_unset_buffer();
#endif // SPOT_LTLPARSE_PARSEDECL_HH
......@@ -56,6 +56,9 @@ namespace spot
/// parse errors that occured during parsing.
/// \param env The environment into which parsing should take place.
/// \param debug When true, causes the parser to trace its execution.
/// \param lenient When true, parenthesized blocks that cannot be
/// parsed as subformulas will be considered as
/// atomic propositions.
/// \return A pointer to the formula built from \a ltl_string, or
/// 0 if the input was unparsable.
///
......@@ -68,7 +71,8 @@ namespace spot
const formula* parse(const std::string& ltl_string,
parse_error_list& error_list,
environment& env = default_environment::instance(),
bool debug = false);
bool debug = false,
bool lenient = false);
/// \brief Build a formula from an LTL string in LBT's format.
/// \param ltl_string The string to parse.
......@@ -100,6 +104,9 @@ namespace spot
/// parse errors that occured during parsing.
/// \param env The environment into which parsing should take place.
/// \param debug When true, causes the parser to trace its execution.
/// \param lenient When true, parenthesized blocks that cannot be
/// parsed as subformulas will be considered as
/// atomic propositions.
/// \return A pointer to the formula built from \a sere_string, or
/// 0 if the input was unparsable.
///
......@@ -113,7 +120,8 @@ namespace spot
parse_error_list& error_list,
environment& env =
default_environment::instance(),
bool debug = false);
bool debug = false,
bool lenient = false);
/// \brief Format diagnostics produced by spot::ltl::parse
/// or spot::ltl::ratexp
......
......@@ -85,6 +85,7 @@ TESTS = \
tunenoform.test \
consterm.test \
kind.test \
lenient.test \
syntimpl.test \
reduc.test \
reducpsl.test \
......
#!/bin/sh
# -*- coding: utf-8 -*-
# Copyright (C) 2012 Laboratoire de Recherche et Développement de
# l'Epita (LRDE).
#
# This file is part of Spot, a model checking library.
#
# Spot is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Spot is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
. ./defs || exit 1
set -e
ltlfilt=../../bin/ltlfilt
cat >input <<EOF
(a < b) U (b == c - 1)
((a < b) U {b == c - 1})
(((a < b) U (b == c - 1)))
(((((a < b))) U (b == c - 1)))
(((a < b) U (((b == c - 1)))))
(((a < b ) U ({( b == c - 1 )})))
((((( a < b))) U ( b == c - 1)))
((((( a < b))) U { b == c - 1}!))
EOF
cat >expected <<EOF
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
"a < b" U "b == c - 1"
EOF
run 0 $ltlfilt --lenient input > output
cmp output expected
......@@ -249,7 +249,14 @@ namespace spot
os_ << "(";
if (!is_bare_word(str.c_str()))
{
os_ << '"' << str << '"';
// Spin 6 supports atomic propositions such as (a == 0)
// as long as they are enclosed in parentheses.
if (kw_ != spin_kw)
os_ << '"' << str << '"';
else if (!full_parent_)
os_ << '(' << str << ')';
else
os_ << str;
}
else
{
......
// -*- coding: utf-8 -*-
// Copyright (C) 2012 Laboratoire de Recherche et Developpement de
// l'Epita (LRDE)
// Copyright (C) 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
// dpartement Systmes Rpartis Coopratifs (SRC), Universit Pierre
// département Systèmes Répartis Coopératifs (SRC), Université Pierre
// et Marie Curie.
//
// This file is part of Spot, a model checking library.
......@@ -19,6 +22,10 @@
#include <sstream>
#include <ostream>
#include <algorithm>
#include <functional>
#include <cctype>
#include <locale>
#include "escape.hh"
namespace spot
......@@ -52,4 +59,16 @@ namespace spot
escape_str(os, str);
return os.str();
}
void
trim(std::string& str)
{
str.erase(std::find_if(str.rbegin(), str.rend(),
std::not1(std::ptr_fun<int, int>
(std::isspace))).base(),
str.end());
str.erase(str.begin(),
std::find_if(str.begin(), str.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
}
}
// Copyright (C) 2011 Laboratoire de Recherche et Developpement de
// Copyright (C) 2011, 2012 Laboratoire de Recherche et Developpement de
// l'Epita (LRDE).
// Copyright (C) 2004 Laboratoire d'Informatique de Paris 6 (LIP6),
// département Systèmes Répartis Coopératifs (SRC), Université Pierre
......@@ -38,6 +38,8 @@ namespace spot
/// <code>\\n</code> in \a str.
std::string escape_str(const std::string& str);
/// \brief Remove spaces at the front and back of \a str.
void trim(std::string& str);
/// @}
}
......
......@@ -201,7 +201,9 @@ transition:
else
{
spot::ltl::parse_error_list pel;
const spot::ltl::formula* f = spot::ltl::parse(*$3, pel);
const spot::ltl::formula* f = spot::ltl::parse(*$3, pel,
parse_environment,
false, true);
delete $3;
for(spot::ltl::parse_error_list::const_iterator i = pel.begin();
i != pel.end(); ++i)
......
......@@ -86,10 +86,14 @@ skip
}
EOF
# We used to catch the following error:
# input:5.11: syntax error, unexpected closing parenthesis
# input:5.8-9: missing right operand for "and operator"
# but since the guard parser is more lenient, we just assume
# that "p1 && " is an atomic property.
run 2 ../ltl2tgba -XN input > stdout 2>stderr
cat >expected <<EOF
input:5.11: syntax error, unexpected closing parenthesis
input:5.8-9: missing right operand for "and operator"
input:9.16: syntax error, unexpected closing parenthesis
input:9.16: ignoring trailing garbage
EOF
......@@ -98,24 +102,33 @@ diff stderrfilt expected
cat >formulae<<EOF
a
<>[] a
FG a