Vcsn  2.1
Be Rational
efsm.cc
Go to the documentation of this file.
1 #include <fstream>
2 #include <set>
3 #include <string>
4 
5 #include <boost/algorithm/string/erase.hpp>
6 #include <boost/algorithm/string/predicate.hpp> // starts_with
7 #include <boost/algorithm/string/replace.hpp> // replace_all_copy
8 
9 #include <lib/vcsn/algos/fwd.hh>
12 #include <vcsn/dyn/registries.hh>
13 #include <vcsn/dyn/algos.hh>
14 #include <vcsn/dyn/automaton.hh>
15 #include <vcsn/misc/getargs.hh>
16 #include <vcsn/misc/symbol.hh>
17 #include <vcsn/misc/regex.hh>
18 
19 namespace vcsn
20 {
21  namespace dyn
22  {
23  namespace
24  {
27  std::string
28  next_here_doc(std::istream& is)
29  {
30  static std::regex re("cat >\\$medir/([a-z]+)\\.[a-z]* <<\\\\EOFSM",
31  std::regex::extended);
32  std::string line;
33  std::smatch res;
34  while (is.good())
35  {
36  std::getline(is, line, '\n');
37  if (std::regex_match(line, res, re))
38  return res[1];
39  }
40  raise("invalid efsm file: missing \"cat\" symbol");
41  }
42 
46  std::string
47  swallow_symbol_table(std::istream& is)
48  {
49  std::string res;
50  std::string line;
51  std::string val;
52  while (is.good())
53  {
54  std::getline(is, line, '\n');
55  std::istringstream ss{line};
56  ss >> res;
57  if (ss.fail())
58  continue;
59  ss >> val;
60  if (ss.fail())
61  raise("invalid efsm file");
62  if (val == "0" || res == "EOFSM")
63  break;
64  }
65 
66  while (line != "EOFSM" && is.good())
67  std::getline(is, line, '\n');
68 
69  require(line == "EOFSM",
70  "invalid efsm file: missing closing EOFSM");
71  return res;
72  }
73  }
74 
75 
76  automaton
77  read_efsm(std::istream& is)
78  {
79  std::string file = "file.efsm";
80  using string_t = symbol;
81 
82  // Whether has both isysmbols and osymbols.
83  bool is_transducer = false;
84 
85  // Look for the arc type, which describes the weightset.
86  using weightset_type = lazy_automaton_editor::weightset_type;
87  weightset_type weightset = [&is]
88  {
89  std::string line;
90  while (is.good())
91  {
92  std::getline(is, line, '\n');
93  if (boost::starts_with(line, "arc_type="))
94  {
95  boost::algorithm::erase_first(line, "arc_type=");
96  static auto map = std::map<std::string, weightset_type>
97  {
98  {"log", weightset_type::logarithmic},
99  {"log64", weightset_type::logarithmic},
100  {"standard", weightset_type::tropical},
101  };
102  return getargs("arc type", map, line);
103  }
104  }
105  raise("invalid efsm file: missing \"arc_type=\"");
106  }();
107 
108  // Look for the symbol table.
109  auto isyms = next_here_doc(is);
110  // The single piece of information we need from the symbol
111  // table: the representation of the empty word.
112  std::string ione = swallow_symbol_table(is);
113 
114  // If we had "isymbols", we now expect "osymbols".
115  std::string oone = ione;
116  if (isyms == "isymbols")
117  {
118  is_transducer = true;
119  auto osyms = next_here_doc(is);
120  require(osyms == "osymbols",
121  "invalid efsm file: expected osymbols: ", osyms);
122  oone = swallow_symbol_table(is);
123  }
124 
126  edit.open(true);
127  edit.weightset(weightset);
128 
129  // The first transition also provides the initial state.
130  bool first = true;
131  auto trans = next_here_doc(is);
132  require(trans == "transitions",
133  "invalid efsm file: expected transitions: ", trans);
134  // Line: Source Dest ILabel [OLabel] [Weight].
135  // Line: FinalState [Weight].
136  std::string line;
137  while (is.good())
138  {
139  std::getline(is, line, '\n');
140  if (line == "EOFSM")
141  break;
142  std::istringstream ss{line};
143  string_t s, d, l1, l2, w;
144  ss >> s >> d >> l1 >> l2 >> w;
145  if (first)
146  edit.add_initial(s);
147  if (l1.get().empty())
148  // FinalState [Weight]
149  edit.add_final(s, d);
150  else
151  {
152  if (l1 == ione)
153  l1 = "\\e";
154  if (is_transducer)
155  {
156  if (l2 == oone)
157  l2 = "\\e";
158  edit.add_transition(s, d, l1, l2, w);
159  }
160  else
161  {
162  // l2 is actually the weight.
163  edit.add_transition(s, d, l1, l2);
164  }
165  }
166  first = false;
167  }
168 
169  require(line == "EOFSM",
170  file, ": bad input format, missing EOFSM");
171  // Flush till EOF.
172  while (is.get() != EOF)
173  continue;
174 
175  // We don't want to read it as a `law<char>` automaton, as for
176  // OpenFST, these "words" are insecable. The proper
177  // interpretation is lal<string> (or lan<string>).
178  using boost::algorithm::replace_all_copy;
179  auto ctx = replace_all_copy(edit.result_context(),
180  "law<char>", "lan<string>");
181  return edit.result(ctx);
182  }
183  }
184 }
dyn::automaton result(const std::string &ctx={})
Return the built automaton.
void weightset(weightset_type t)
Specify the weightset type.
C::mapped_type getargs(const std::string &kind, const C &map, const std::string &key)
Find a correspondance in a map.
Definition: getargs.hh:21
std::istringstream is
The input stream: the specification to translate.
Definition: translate.cc:372
void add_initial(string_t s, string_t w=string_t{})
Add s as an initial state.
void add_final(string_t s, string_t w=string_t{})
Add s as a final state.
Build an automaton with unknown context.
static dyn::context ctx(const driver &d)
Get the context of the driver.
Definition: parse.cc:80
std::string result_context() const
Return the context that was inferred.
std::shared_ptr< detail::automaton_base > automaton
Definition: automaton.hh:69
void add_transition(string_t src, string_t dst, string_t lbl, string_t w=string_t{})
Add an acceptor transition from src to dst, labeled by lbl.
void require(bool b, Args &&...args)
If b is not verified, raise an error with args as message.
Definition: raise.hh:75
bool open(bool o)
Whether unknown letters should be added, or rejected.
symbol string_t
Definition: parse.hh:66
boost::flyweight< std::string, boost::flyweights::no_tracking, boost::flyweights::intermodule_holder > symbol
An internalized string.
Definition: symbol.hh:23
automaton read_efsm(std::istream &is)
Definition: efsm.cc:77
weightset_type
Weightset types.