Commit 0faea814 authored by Alexandre Duret-Lutz's avatar Alexandre Duret-Lutz
Browse files

bin: add support for reading formulas from CSV files.

* NEWS: Mention it.
* src/bin/common_finput.cc, src/bin/common_finput.hh: Implement it.
* src/bin/common_output.cc, src/bin/common_output.hh: Add the %< and
%> escapes.
* src/bin/ltlfilt.cc: Connect %< and %> to the
prefix andsuffix of the input, and document them.
* src/tgbatest/det.test, src/tgbatest/nondet.test: Simplify these
tests that read CSV files.
parent 8c587531
......@@ -22,6 +22,15 @@ New in spot 1.2a (not released)
output in CSV or JSON will have some extra columns to report
the size of these input automata before ltlcross converts them
into TGBA to perform its regular checks.
- ltlfilt, ltl2tgba, ltl2tgta, and ltlcross can now read formulas
from CSV files. Use option -F FILE/COL to read formulas from
column COL of FILE. Use -F FILE/-COL if the first line of
FILE be ignored.
- when ltlfilt processes formulas from a CSV file, it will output
each CSV line whose formula matches the given constraints, with
the rewriten formula. The new escape sequence %< (text in
columns before the formula) and %> (text after) can be used
with the --format option to alter this output.
* Bug fixes:
- ltlcross' CSV output now stricly follows RFC 4180.
- ltlcross failed to report missing input or output escape sequences
......
......@@ -22,6 +22,7 @@
#include "ltlparse/public.hh"
#include <fstream>
#include <cstring>
#define OPT_LBT 1
#define OPT_LENIENT 2
......@@ -34,8 +35,10 @@ static const argp_option options[] =
{
{ 0, 0, 0, 0, "Input options:", 1 },
{ "formula", 'f', "STRING", 0, "process the formula STRING", 0 },
{ "file", 'F', "FILENAME", 0,
"process each line of FILENAME as a formula", 0 },
{ "file", 'F', "FILENAME[/COL]", 0,
"process each line of FILENAME as a formula; if COL is a "
"positive integer, assume a CSV file and read column COL; use "
"a negative COL to drop the first line of the CSV file", 0 },
{ "lbt-input", OPT_LBT, 0, 0,
"read all formulas using LBT's prefix syntax", 0 },
{ "lenient", OPT_LENIENT, 0, 0,
......@@ -82,10 +85,22 @@ parse_formula(const std::string& s, spot::ltl::parse_error_list& pel)
}
job_processor::job_processor()
: abort_run(false)
: abort_run(false), real_filename(0),
col_to_read(0), prefix(0), suffix(0)
{
}
job_processor::~job_processor()
{
if (real_filename)
free(real_filename);
if (prefix)
free(prefix);
if (suffix)
free(suffix);
}
int
job_processor::process_string(const std::string& input,
const char* filename,
......@@ -113,9 +128,139 @@ job_processor::process_stream(std::istream& is,
int error = 0;
int linenum = 0;
std::string line;
// Discard the first line of a CSV file if requested.
if (col_to_read < 0)
{
std::getline(is, line);
col_to_read = -col_to_read;
}
// Each line of the file and send them to process_string,
// optionally extracting a column of a CSV file.
while (!abort_run && std::getline(is, line))
if (!line.empty())
error |= process_string(line, filename, ++linenum);
{
if (col_to_read == 0)
{
error |= process_string(line, filename, ++linenum);
}
else // We are reading column COL_TO_READ in a CSV file.
{
// FIXME: This code assumes an entire CSV row was been
// fetched by getline(). This is incorrect for processing
// CSV files with fields that contain newlines inside
// double-quoted strings. Patching this code to deal with
// such files is left as an exercise for the first user
// who encounters the issue.
const char* str = line.c_str();
const char* col1_start = str;
// Delimiters for the extracted column.
const char* coln_start = str;
const char* coln_end = 0;
// The current column. (1-based)
int colnum = 1;
// Whether we are parsing a double-quoted string.
bool instring = false;
// Note that RFC 4180 has strict rules about
// double-quotes: ① if a field is double-quoted, the first
// and last characters of the field should be
// double-quotes; ② if a field contains a double-quote
// then it should be double quoted, and the occurrences
// of double-quotes should be doubled. Therefore a CSV file
// may no contain a line such as:
// foo,bar"ba""z",12
// Tools have different interpretation of such a line.
// For instance Python's pandas.read_csv() function will
// load the second field verbatim as the string 'bar"ba""z"',
// while R's read.csv() function will load it as the
// string 'barba"z'. We use this second interpretation, because
// it also makes it possible to parse CSVs fields formatted
// with leading spaces (often for cosmetic purpose). When
// extracting the second field of
// foo, "ba""z", 12
// we will return ' baz' and the leading space will be ignored
// by our LTL formula parser.
while (*str)
{
switch (*str)
{
case '"':
// Doubled double-quotes are used to escape
// double-quotes.
if (instring && str[1] == '"')
++str;
else
instring = !instring;
break;
case ',':
if (!instring)
{
if (col_to_read == colnum)
coln_end = str;
++colnum;
if (col_to_read == colnum)
coln_start = str + 1;
}
break;
}
// Once we have the end delimiter for our target
// column, we have all we need.
if (coln_end)
break;
++str;
}
if (!*str)
{
if (colnum != col_to_read)
// Skip this line as it has no enough columns.
continue;
else
// The target columns ends at the end of the line.
coln_end = str;
}
// Skip the line if it has an empty field.
if (coln_start == coln_end)
continue;
// save the contents before and after that columns for the
// %< and %> escapes (ignoring the trailing and leading
// commas).
prefix = (col_to_read != 1) ?
strndup(col1_start, coln_start - col1_start - 1) : 0;
suffix = (*coln_end != 0) ? strdup(coln_end + 1) : 0;
std::string field(coln_start, coln_end);
// Remove double-quotes if any.
if (field.find('"') != std::string::npos)
{
unsigned dst = 0;
bool instring = false;
for (; coln_start != coln_end; ++coln_start)
if (*coln_start == '"')
// A doubled double-quote instead a double-quoted
// string is an escaped double-quote.
if (instring && coln_start[1] == '"')
field[dst++] = *++coln_start;
else
instring = !instring;
else
field[dst++] = *coln_start;
field.resize(dst);
}
error |= process_string(field, filename, ++linenum);
if (prefix)
{
free(prefix);
prefix = 0;
}
if (suffix)
{
free(suffix);
suffix = 0;
}
}
}
return error;
}
......@@ -128,9 +273,46 @@ job_processor::process_file(const char* filename)
errno = 0;
std::ifstream input(filename);
if (!input)
error(2, errno, "cannot open '%s'", filename);
return process_stream(input, filename);
if (input)
return process_stream(input, filename);
int saved_errno = errno;
// If we have a filename like "foo/NN" such
// that:
// ① foo/NN is not a file (already the case),
// ② NN is a number > 0,
// ③ foo is a file,
// then it means we want to open foo as
// a CSV file and process column NN.
if (const char* slash = strrchr(filename, '/'))
{
char* end;
errno = 0;
long int col = strtol(slash + 1, &end, 10);
// strtol ate all remaining characters and NN is positive
if (errno == 0 && !*end && col != 0)
{
col_to_read = col;
if (real_filename)
free(real_filename);
real_filename = strndup(filename, slash - filename);
// Special case for stdin.
if (real_filename[0] == '-' && real_filename[1] == 0)
return process_stream(std::cin, real_filename);
std::ifstream input(real_filename);
if (input)
return process_stream(input, real_filename);
error(2, errno, "cannot open '%s' nor '%s'",
filename, real_filename);
}
}
error(2, saved_errno, "cannot open '%s'", filename);
return -1;
}
int
......
// -*- coding: utf-8 -*-
// Copyright (C) 2012 Laboratoire de Recherche et Développement de
// l'Epita (LRDE).
// Copyright (C) 2012, 2013 Laboratoire de Recherche et Développement
// de l'Epita (LRDE).
//
// This file is part of Spot, a model checking library.
//
......@@ -56,9 +56,7 @@ protected:
public:
job_processor();
virtual ~job_processor()
{
}
virtual ~job_processor();
virtual int
process_formula(const spot::ltl::formula* f,
......@@ -75,6 +73,11 @@ public:
virtual int
run();
char* real_filename;
long int col_to_read;
char* prefix;
char* suffix;
};
......
......@@ -109,6 +109,8 @@ namespace
const spot::ltl::formula* f;
const char* filename;
int line;
const char* prefix;
const char* suffix;
};
class printable_formula:
......@@ -141,6 +143,8 @@ namespace
declare('f', &fl_);
declare('F', &filename_);
declare('L', &line_);
declare('<', &prefix_);
declare('>', &suffix_);
set_output(os);
}
......@@ -150,6 +154,8 @@ namespace
fl_ = &fl;
filename_ = fl.filename ? fl.filename : "";
line_ = fl.line;
prefix_ = fl.prefix ? fl.prefix : "";
suffix_ = fl.suffix ? fl.suffix : "";
return format(format_);
}
......@@ -158,6 +164,8 @@ namespace
printable_formula fl_;
spot::printable_value<const char*> filename_;
spot::printable_value<int> line_;
spot::printable_value<const char*> prefix_;
spot::printable_value<const char*> suffix_;
};
}
......@@ -202,15 +210,20 @@ parse_opt_output(int key, char* arg, struct argp_state*)
void
output_formula(const spot::ltl::formula* f, const char* filename, int linenum)
output_formula(const spot::ltl::formula* f, const char* filename, int linenum,
const char* prefix, const char* suffix)
{
if (!format)
{
if (prefix)
std::cout << prefix << ",";
stream_formula(std::cout, f, filename, linenum);
if (suffix)
std::cout << "," << suffix;
}
else
{
formula_with_location fl = { f, filename, linenum };
formula_with_location fl = { f, filename, linenum, prefix, suffix };
format->print(fl);
}
......
......@@ -35,6 +35,7 @@ extern const struct argp output_argp;
int parse_opt_output(int key, char* arg, struct argp_state* state);
void output_formula(const spot::ltl::formula* f,
const char* filename = 0, int linenum = 0);
const char* filename = 0, int linenum = 0,
const char* prefix = 0, const char* suffix = 0);
#endif // SPOT_BIN_COMMON_OUTPUT_HH
......@@ -165,6 +165,12 @@ static const argp_option options[] =
"the name of the input file", 0 },
{ "%L", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
"the original line number in the input file", 0 },
{ "%<", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
"the part of the line before the formula if it "
"comes from a column extracted from a CSV file", 0 },
{ "%>", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
"the part of the line after the formula if it "
"comes from a column extracted from a CSV file", 0 },
{ "%%", 0, 0, OPTION_DOC | OPTION_NO_USAGE,
"a single %", 0 },
{ 0, 0, 0, 0, "Miscellaneous options:", -1 },
......@@ -571,7 +577,7 @@ namespace
if (matched)
{
one_match = true;
output_formula(f, filename, linenum);
output_formula(f, filename, linenum, prefix, suffix);
}
f->destroy();
return 0;
......@@ -584,7 +590,7 @@ main(int argc, char** argv)
{
setup(argv);
const argp ap = { options, parse_opt, "[FILENAME...]",
const argp ap = { options, parse_opt, "[FILENAME[/COL]...]",
argp_program_doc, children, 0, 0 };
if (int err = argp_parse(&ap, argc, argv, ARGP_NO_HELP, 0, 0))
......
......@@ -56,7 +56,7 @@ cat >formulas <<'EOF'
1,4,G(G!a | F!c | G!b)
EOF
cut -d, -f3 <formulas | $ltl2tgba -x tba-det --det --stats '%d,%s,%f' -F - > out
$ltl2tgba -x tba-det --det --stats '%d,%s,%f' -F formulas/3 > out
diff formulas out
cat >in.tgba <<'EOF'
......
......@@ -28,8 +28,8 @@ a U b, 1 0
G(!r | Fa) | Fx, 0 1
EOF
cut -d, -f1 expected.1 |
../../bin/ltl2tgba -F- --stats='%f, %d %p' >out.1
# also test the filename/COL syntax
../../bin/ltl2tgba -F expected.1/1 --stats='%f, %d %p' >out.1
diff out.1 expected.1
cat >expected.2<<EOF
......@@ -39,8 +39,8 @@ a U b, 1 1
G(!r | Fa) | Fx, 0 1
EOF
cut -d, -f1 expected.2 |
../../bin/ltl2tgba -C -F- --stats='%f, %d %p' >out.2
# filename/COL should also work when filename=-
../../bin/ltl2tgba -C -F-/1 --stats='%f, %d %p' <expected.2 >out.2
diff out.2 expected.2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment