Vcsn  2.5
Be Rational
efsm.cc
Go to the documentation of this file.
1 #include <fstream>
2 #include <set>
3 #include <string>
4 
5 #include <boost/algorithm/string/erase.hpp>
6 #include <boost/algorithm/string/predicate.hpp> // starts_with
7 #include <boost/algorithm/string/replace.hpp> // replace_all_copy
8 
9 #include <lib/vcsn/algos/fwd.hh>
12 #include <vcsn/dyn/registries.hh>
13 #include <vcsn/dyn/algos.hh>
14 #include <vcsn/dyn/automaton.hh>
15 #include <vcsn/misc/getargs.hh>
16 #include <vcsn/misc/symbol.hh>
17 #include <vcsn/misc/regex.hh>
18 
19 namespace vcsn
20 {
21  namespace dyn
22  {
23  namespace
24  {
27  std::string
28  read_here_doc(std::istream& is)
29  {
30  static const auto re
31  = std::regex("cat >\\$medir/([a-z]+)\\.[a-z]* <<\\\\EOFSM",
32  std::regex::extended);
33  std::string line;
34  std::smatch res;
35  while (is.good())
36  {
37  std::getline(is, line, '\n');
38  if (std::regex_match(line, res, re))
39  return res[1];
40  }
41  raise("invalid efsm file: missing \"cat\" symbol");
42  }
43 
47  std::string
48  read_symbol_table(std::istream& is)
49  {
50  std::string res;
51  std::string line;
52  std::string val;
53  while (is.good())
54  {
55  std::getline(is, line, '\n');
56  std::istringstream ss{line};
57  ss >> res;
58  if (ss.fail())
59  continue;
60  ss >> val;
61  if (ss.fail())
62  raise("invalid efsm file");
63  if (val == "0" || res == "EOFSM")
64  break;
65  }
66 
67  while (line != "EOFSM" && is.good())
68  std::getline(is, line, '\n');
69 
70  require(line == "EOFSM",
71  "invalid efsm file: missing closing EOFSM");
72  return res;
73  }
74 
77  read_weightset_type(std::istream& is)
78  {
79  using weightset_type = lazy_automaton_editor::weightset_type;
80  std::string line;
81  while (is.good())
82  {
83  std::getline(is, line, '\n');
84  if (boost::starts_with(line, "arc_type="))
85  {
86  boost::algorithm::erase_first(line, "arc_type=");
87  static auto map = getarg<weightset_type>
88  {
89  "arc type",
90  {
91  {"log", weightset_type::logarithmic},
92  {"log64", weightset_type::logarithmic},
93  {"standard", weightset_type::tropical},
94  }
95  };
96  return map[line];
97  }
98  }
99  raise("invalid efsm file: missing \"arc_type=\"");
100  }
101  }
102 
103  automaton
104  read_efsm(std::istream& is, const location&)
105  {
106  std::string file = "file.efsm";
107  using string_t = symbol;
108 
109  // Whether has both isysmbols and osymbols.
110  bool is_transducer = false;
111 
112  // Look for the arc type, which describes the weightset.
113  auto weightset = read_weightset_type(is);
114 
115  // Look for the symbol table.
116  auto isyms = read_here_doc(is);
117  // The single piece of information we need from the symbol
118  // table: the representation of the empty word.
119  std::string ione = read_symbol_table(is);
120 
121  // If we had "isymbols", we now expect "osymbols".
122  std::string oone = ione;
123  if (isyms == "isymbols")
124  {
125  is_transducer = true;
126  auto osyms = read_here_doc(is);
127  require(osyms == "osymbols",
128  "invalid efsm file: expected osymbols: ", osyms);
129  oone = read_symbol_table(is);
130  }
131 
132  auto edit = vcsn::lazy_automaton_editor{};
133  edit.open(true);
134  edit.weightset(weightset);
135 
136  // The first transition also provides the initial state.
137  bool first = true;
138  auto trans = read_here_doc(is);
139  require(trans == "transitions",
140  "invalid efsm file: expected transitions: ", trans);
141  // Line: Source Dest ILabel [OLabel] [Weight].
142  // Line: FinalState [Weight].
143  std::string line;
144  while (is.good())
145  {
146  std::getline(is, line, '\n');
147  if (line == "EOFSM")
148  break;
149  std::istringstream ss{line};
150  string_t s, d, l1, l2, w;
151  ss >> s >> d >> l1 >> l2 >> w;
152  if (first)
153  edit.add_initial(s);
154  if (l1.get().empty())
155  // FinalState [Weight]
156  edit.add_final(s, d);
157  else
158  {
159  if (l1 == ione)
160  l1 = "\\e";
161  if (is_transducer)
162  {
163  if (l2 == oone)
164  l2 = "\\e";
165  edit.add_transition(s, d, l1, l2, w);
166  }
167  else
168  {
169  // l2 is actually the weight.
170  edit.add_transition(s, d, l1, l2);
171  }
172  }
173  first = false;
174  }
175 
176  require(line == "EOFSM",
177  file, ": bad input format, missing EOFSM");
178  // Flush till EOF.
179  while (is.get() != EOF)
180  continue;
181 
182  // We don't want to read it as a `law<char>` automaton, as for
183  // OpenFST, these "words" are insecable. The proper
184  // interpretation is lal<string> (or lan<string>).
185  using boost::algorithm::replace_all_copy;
186  auto ctx = replace_all_copy(edit.result_context(),
187  "law<char>", "lan<string>");
188  return edit.result(ctx);
189  }
190  }
191 }
Build an automaton with unknown context.
Definition: a-star.hh:8
return res
Definition: multiply.hh:398
auto map(const std::tuple< Ts... > &ts, Fun f) -> decltype(map_tuple_(f, ts, make_index_sequence< sizeof...(Ts)>()))
Map a function on a tuple, return tuple of the results.
Definition: tuple.hh:177
automaton read_efsm(std::istream &is, const location &)
Definition: efsm.cc:104
symbol string_t
Definition: parse.hh:66
bool open(bool o)
Whether unknown letters should be added, or rejected.
boost::flyweight< std::string, boost::flyweights::no_tracking, boost::flyweights::intermodule_holder > symbol
An internalized string.
Definition: symbol.hh:21
weightset_type
Weightset types.
void require(Bool b, Args &&... args)
If b is not verified, raise an error with args as message.
Definition: raise.hh:91
Abstract a location.
Definition: location.hh:47