Commit c6c0218c authored by Akim Demaille's avatar Akim Demaille
Browse files

Sms2fr: use xz

Now that we support compressed efsm, let's compress the efsm
files of sms2fr.  Not only will this save space, it should also
help with sites that claim that Vcsn is 98.3% of shell-scripts...

The results with Xz are impressive: graphemic.efsm is reduced to
5% of its original size, and syntactic.efsm 12%.

    -rw-r--r--   1 akim  staff  93099091 21 mar 23:46 graphemic.efsm
    -rw-r--r--   1 akim  staff  11713613 21 mar 19:05 graphemic.efsm.bz2
    -rw-r--r--   1 akim  staff   5502716 21 mar 23:22 graphemic.efsm.xz
    -rw-r--r--   1 akim  staff  62289670 21 mar 23:46 syntactic.efsm
    -rw-r--r--   1 akim  staff  13055960 13 mar 19:44 syntactic.efsm.bz2
    -rw-r--r--   1 akim  staff   7916576 21 mar 23:24 syntactic.efsm.xz

And Vcsn is now 68.8% C++, 20.6% Python and 3.5% shell-script!

While at it:
- use int not char for options
- don't use references to keep the automata
- don't force the format to efsm: let vcsn guess the format
parent a1eb6462
Pipeline #1118 passed with stage
in 128 minutes and 34 seconds
This diff is collapsed.
......@@ -81,5 +81,5 @@ dist_lal_char_zmin_DATA = \
sms2frdir = $(pkgdatadir)/sms2fr
dist_sms2fr_DATA = \
%D%/sms2fr/graphemic.efsm \
%D%/sms2fr/syntactic.efsm
%D%/sms2fr/graphemic.efsm.xz \
%D%/sms2fr/syntactic.efsm.xz
This diff is collapsed.
This diff is collapsed.
......@@ -160,6 +160,10 @@ def canonical_dict(dict, ignores):
exit(0)
if 'text' in dict:
m = re.search('SKIP: (.*)$', dict['text'])
if m:
SKIP(m.group(1))
exit(0)
if re.search('fstcompile: (command )?not found', dict['text']):
SKIP('OpenFST not installed')
exit(0)
......
......@@ -19,7 +19,7 @@ namespace vcsn
static automaton read_automaton(const std::string& f)
{
auto is = open_input_file(f);
return read_automaton(*is, "efsm");
return read_automaton(*is);
}
}
}
......@@ -69,8 +69,8 @@ struct sms2fr_impl
return replace_all_copy(str.substr(begin + 1, end - begin - 1), "#", " ");
}
const vcsn::dyn::automaton &grap;
const vcsn::dyn::automaton &synt;
const vcsn::dyn::automaton grap;
const vcsn::dyn::automaton synt;
const vcsn::dyn::context ctx = vcsn::dyn::make_context("lan_char, rmin");
};
......@@ -89,10 +89,10 @@ struct options
};
int opti;
char opt;
int opt;
while ((opt = getopt_long(argc, argv, "g:s:", longopts, &opti)) != EOF)
switch(opt)
switch (opt)
{
case 'g': // --graphemic
graphemic_file = optarg;
......@@ -115,7 +115,7 @@ struct options
std::string datafile(const std::string& f)
{
auto datadir = vcsn::dyn::configuration("configuration.datadir");
return datadir + "/sms2fr/" + f + ".efsm";
return datadir + "/sms2fr/" + f + ".efsm.xz";
}
std::string graphemic_file = datafile("graphemic");
......
......@@ -150,10 +150,10 @@ struct options
};
int opti;
char opt;
int opt;
while ((opt = getopt_long(argc, argv, "g:s:", longopts, &opti)) != EOF)
switch(opt)
switch (opt)
{
case 'g': // --graphemic
graphemic_file = optarg;
......@@ -176,7 +176,7 @@ struct options
std::string datafile(const std::string& f)
{
auto datadir = vcsn::dyn::configuration("configuration.datadir");
return datadir + "/sms2fr/" + f + ".efsm";
return datadir + "/sms2fr/" + f + ".efsm.xz";
}
std::string graphemic_file = datafile("graphemic");
......
......@@ -5,7 +5,7 @@ from sms2fr import sms_to_fr
import sys
def datafile(f):
return '{datadir}/sms2fr/{file}.efsm'.format(
return '{datadir}/sms2fr/{file}.efsm.xz'.format(
datadir=vcsn.config('configuration.datadir'),
file=f)
......
#! /bin/sh
vcsn_env
demodir=tests/demo
......@@ -19,8 +20,14 @@ comment c\' est trop bien
difficile
je peux pas
EOF
vcsn_env
run 0 out.exp -vcsn python3 $srcdir/$demodir/sms2fr-tests.py < sms.in
run 0 out.exp $demodir/sms2fr-dyn --no-prompt < sms.in
run 0 out.exp $demodir/sms2fr-static --no-prompt < sms.in
set -x
case $(vcsn configuration configuration.lzma) in
(true)
run 0 out.exp -vcsn python3 $srcdir/$demodir/sms2fr-tests.py < sms.in
run 0 out.exp $demodir/sms2fr-dyn --no-prompt < sms.in
run 0 out.exp $demodir/sms2fr-static --no-prompt < sms.in
;;
(*)
tap_skip "Boost does not support lzma"
;;
esac
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment