Commit f4cbaa83 authored by Akim Demaille's avatar Akim Demaille
Browse files

fst: support fstinfo

I wanted to know where OpenFST stands on is-deterministic when there
are spontaneous transitions.  The answer is: Open FST does not care
about them wrt determinism: the `one` label is treated like any other
label.

This is something I was considering for Vcsn too...  But I'm uneasy
with this, so I think I will rather introduce a new property P, and
define deterministic as having P and no spontaneous transitions.

See also #196.

* python/vcsn/automaton.py (fstinfo): New.
* tests/python/efsm.py: Check.
parent 1ba16b54
......@@ -14,9 +14,28 @@ from vcsn.dot import (_dot_pretty, _dot_to_boxart, _dot_to_svg,
_dot_to_svg_dot2tex)
def _automaton_fst_output(cmd, aut):
'''Run the command `cmd` on the automaton `aut` coded in OpenFST
format via pipes and return the output as bytes.
'''
p1 = Popen(['efstcompile'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
p2 = Popen(cmd, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits.
p1.stdin.write(aut.format('efsm').encode('utf-8'))
p1.stdin.close()
res, err = p2.communicate()
if p1.wait():
raise RuntimeError(
"efstcompile failed: " + p1.stderr.read().decode('utf-8'))
if p2.wait():
raise RuntimeError(
cmd + " failed: " + err.decode('utf-8'))
return res
def _automaton_fst(cmd, aut):
'''Run the command `cmd` on the automaton `aut` coded in OpenFST
format via pipes.
format via pipes and return the result as an automaton.
'''
p1 = Popen(['efstcompile'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
p2 = Popen(cmd, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
......@@ -55,6 +74,49 @@ def _automaton_fst_files(cmd, *aut):
return automaton(res.decode('utf-8'), 'efsm')
_fstinfo_bool = {
'acceptor': 'is acceptor',
'accessible': 'is accessible',
'coaccessible': 'is coaccessible',
'cyclic at initial state': 'is cyclic at initial state',
'cyclic': 'is cyclic',
'error': 'is error',
'expanded': 'is expanded',
'input deterministic': 'is input deterministic',
'input epsilons': 'is input epsilons',
'input label sorted': 'is input label sorted',
'input lookahead': 'is input lookahead',
'input matcher': 'is input matcher',
'input/output epsilons': 'is input/output epsilons',
'mutable': 'is mutable',
'output deterministic': 'is output deterministic',
'output epsilons': 'is output epsilons',
'output label sorted': 'is output label sorted',
'output lookahead': 'is output lookahead',
'output matcher': 'is output matcher',
'string': 'is string',
'top sorted': 'is top sorted',
'weighted': 'is weighted',
}
_fstinfo_int = {
'number of arcs': 'number of transitions',
}
def _fstinfo_normalize(k, v):
if k.startswith('#'):
k = k.replace('#', 'number')
if k in _fstinfo_int:
k = _fstinfo_int[k]
v = int(v)
elif k in _fstinfo_bool:
k = _fstinfo_bool[k]
assert v in ['y', 'n']
v = v == 'y'
return k, v
@variadicProxy('__and__')
class Conjunction:
'''A proxy class to delay calls to the & operator in order to turn
......@@ -270,6 +332,18 @@ class automaton:
fstsynchronize = lambda self: _automaton_fst('fstsynchronize', self)
fsttranspose = lambda self: _automaton_fst('fstreverse', self)
def fstinfo(self):
info = _automaton_fst_output('fstinfo', self).decode('utf-8')
res = {}
for l in info.splitlines():
k, v = l.split(' ', 1)
v = v.strip()
k, v = _fstinfo_normalize(k, v)
res[k] = v
return res
def HTML(self):
'''Display `self` with SVG and MathJax together.'''
svg = _dot_to_svg(_dot_pretty(self.format('dot,mathjax'), 'pretty'))
......
......@@ -22,10 +22,11 @@ def _info_to_dict(info):
'''Convert a "key: value" list of lines into a dictionary.
Convert Booleans into bool, and likewise for integers.
'''
res = dict()
res = {}
for l in info.splitlines():
(k, v) = l.split(':', 1)
k, v = l.split(':', 1)
v = v.strip()
# Beware that we may display "N/A" for Boolean (e.g., "is
# ambiguous" for non LAL), and for integers (e.g., "number of
# deterministic states" for non LAL).
......
......@@ -130,6 +130,22 @@ if have_ofst:
CHECK_EQ(a2_vcsn, a2_ofst)
# Check that our infos are compatible.
print('Info')
zmin = vcsn.context('lal_char(ab), zmin')
a = zmin.expression('[ab]*a(<2>[ab])').automaton()
vcsninfo = a.info()
print("vcsninfo:\n{}\n".format(format(vcsninfo)))
ofstinfo = a.fstinfo()
print("ofstinfo:\n{}\n".format(format(ofstinfo)))
for k in ['number of states',
'number of accessible states',
'number of coaccessible states',
'number of final states',
'number of transitions']:
print('Info:', k)
CHECK_EQ(vcsninfo[k], ofstinfo[k])
# Make sure determinizations agree. This automaton, determinized,
# has weights on the final states only, which exercises a bug we
# once had.
......@@ -142,6 +158,7 @@ if have_ofst:
# Make sure compositions agree, even when there are empty words on
# some tapes.
print("Compose")
c_vcsn, c_ofst = compose('<2>a|m', '<3>m|x')
CHECK_EQ(c_vcsn, c_ofst)
......@@ -155,6 +172,7 @@ if have_ofst:
CHECK_EQUIV(c_vcsn, c_ofst)
# Check our proper against OpenFST
print("Proper")
import glob
files = [f for f in glob.glob(os.path.join(medir, '*.efsm'))
if not os.path.basename(f).startswith('bad_')]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment