Vcsn  2.5.dev
Be Rational
efsm.cc
Go to the documentation of this file.
1 #include <fstream>
2 #include <string>
3 
4 #include <boost/algorithm/string/erase.hpp>
5 #include <boost/algorithm/string/predicate.hpp> // starts_with
6 #include <boost/algorithm/string/replace.hpp> // replace_all_copy
7 
8 #include <lib/vcsn/algos/fwd.hh>
10 #include <vcsn/dyn/algos.hh>
11 #include <vcsn/dyn/automaton.hh>
12 #include <vcsn/misc/getargs.hh>
13 #include <vcsn/misc/symbol.hh>
14 #include <vcsn/misc/regex.hh>
15 
16 namespace vcsn
17 {
18  namespace dyn
19  {
20  namespace
21  {
24  std::string
25  read_here_doc(std::istream& is)
26  {
27  static const auto re
28  = std::regex("cat >\\$medir/([a-z]+)\\.[a-z]* <<\\\\EOFSM",
29  std::regex::extended);
30  std::string line;
31  std::smatch res;
32  while (is.good())
33  {
34  std::getline(is, line, '\n');
35  if (std::regex_match(line, res, re))
36  return res[1];
37  }
38  raise("invalid efsm file: missing \"cat\" symbol");
39  }
40 
44  std::string
45  read_symbol_table(std::istream& is)
46  {
47  std::string res;
48  std::string line;
49  std::string val;
50  while (is.good())
51  {
52  std::getline(is, line, '\n');
53  std::istringstream ss{line};
54  ss >> res;
55  if (ss.fail())
56  continue;
57  ss >> val;
58  if (ss.fail())
59  raise("invalid efsm file");
60  if (val == "0" || res == "EOFSM")
61  break;
62  }
63 
64  while (line != "EOFSM" && is.good())
65  std::getline(is, line, '\n');
66 
67  require(line == "EOFSM",
68  "invalid efsm file: missing closing EOFSM");
69  return res;
70  }
71 
74  read_weightset_type(std::istream& is)
75  {
76  using weightset_type = lazy_automaton_editor::weightset_type;
77  std::string line;
78  while (is.good())
79  {
80  std::getline(is, line, '\n');
81  if (boost::starts_with(line, "arc_type="))
82  {
83  boost::algorithm::erase_first(line, "arc_type=");
84  static auto map = getarg<weightset_type>
85  {
86  "arc type",
87  {
88  {"log", weightset_type::logarithmic},
89  {"log64", weightset_type::logarithmic},
90  {"standard", weightset_type::tropical},
91  }
92  };
93  return map[line];
94  }
95  }
96  raise("invalid efsm file: missing \"arc_type=\"");
97  }
98  }
99 
100  automaton
101  read_efsm(std::istream& is, const location&)
102  {
103  std::string file = "file.efsm";
104  using string_t = symbol;
105 
106  // Whether has both isysmbols and osymbols.
107  bool is_transducer = false;
108 
109  // Look for the arc type, which describes the weightset.
110  auto weightset = read_weightset_type(is);
111 
112  // Look for the symbol table.
113  auto isyms = read_here_doc(is);
114  // The single piece of information we need from the symbol
115  // table: the representation of the empty word.
116  std::string ione = read_symbol_table(is);
117 
118  // If we had "isymbols", we now expect "osymbols".
119  std::string oone = ione;
120  if (isyms == "isymbols")
121  {
122  is_transducer = true;
123  auto osyms = read_here_doc(is);
124  require(osyms == "osymbols",
125  "invalid efsm file: expected osymbols: ", osyms);
126  oone = read_symbol_table(is);
127  }
128 
129  auto edit = vcsn::lazy_automaton_editor{};
130  edit.open(true);
131  edit.weightset(weightset);
132 
133  // The first transition also provides the initial state.
134  bool first = true;
135  auto trans = read_here_doc(is);
136  require(trans == "transitions",
137  "invalid efsm file: expected transitions: ", trans);
138  // Line: Source Dest ILabel [OLabel] [Weight].
139  // Line: FinalState [Weight].
140  std::string line;
141  while (is.good())
142  {
143  std::getline(is, line, '\n');
144  if (line == "EOFSM")
145  break;
146  std::istringstream ss{line};
147  string_t s, d, l1, l2, w;
148  ss >> s >> d >> l1 >> l2 >> w;
149  if (first)
150  edit.add_initial(s);
151  if (l1.get().empty())
152  // FinalState [Weight]
153  edit.add_final(s, d);
154  else
155  {
156  if (l1 == ione)
157  l1 = "\\e";
158  if (is_transducer)
159  {
160  if (l2 == oone)
161  l2 = "\\e";
162  edit.add_transition(s, d, l1, l2, w);
163  }
164  else
165  {
166  // l2 is actually the weight.
167  edit.add_transition(s, d, l1, l2);
168  }
169  }
170  first = false;
171  }
172 
173  require(line == "EOFSM",
174  file, ": bad input format, missing EOFSM");
175  // Flush till EOF.
176  while (is.get() != EOF)
177  continue;
178 
179  // We don't want to read it as a `law<char>` automaton, as for
180  // OpenFST, these "words" are insecable. The proper
181  // interpretation is lal<string> (or lan<string>).
182  using boost::algorithm::replace_all_copy;
183  auto ctx = replace_all_copy(edit.result_context(),
184  "law<char>", "lan<string>");
185  return edit.result(ctx);
186  }
187  }
188 }
Abstract a location.
Definition: location.hh:47
weightset_type
Weightset types.
symbol string_t
Definition: parse.hh:66
return res
Definition: multiply.hh:399
void require(Bool b, Args &&... args)
If b is not verified, raise an error with args as message.
Definition: raise.hh:91
Definition: a-star.hh:8
boost::flyweight< std::string, boost::flyweights::no_tracking, boost::flyweights::intermodule_holder > symbol
An internalized string.
Definition: symbol.hh:21
auto map(const std::tuple< Ts... > &ts, Fun f) -> decltype(map_tuple_(f, ts, make_index_sequence< sizeof...(Ts)>()))
Map a function on a tuple, return tuple of the results.
Definition: tuple.hh:223
automaton read_efsm(std::istream &is, const location &)
Definition: efsm.cc:101
bool open(bool o)
Whether unknown letters should be added, or rejected.
Build an automaton with unknown context.