Vcsn  2.1
Be Rational
determinize.hh
Go to the documentation of this file.
1 #pragma once
2 
3 #include <set>
4 #include <stack>
5 #include <string>
6 #include <type_traits>
7 #include <queue>
8 
12 #include <vcsn/ctx/traits.hh>
13 #include <vcsn/dyn/automaton.hh> // dyn::make_automaton
14 #include <vcsn/dyn/fwd.hh>
16 #include <vcsn/misc/map.hh> // vcsn::has
17 #include <vcsn/misc/raise.hh> // b
18 #include <vcsn/misc/unordered_map.hh> // vcsn::has
19 #include <vcsn/weightset/fwd.hh> // b
21 
22 namespace vcsn
23 {
24 
25  /*----------------------.
26  | subset construction. |
27  `----------------------*/
28  namespace detail
29  {
36  template <typename Aut>
38  : public automaton_decorator<fresh_automaton_t_of<Aut>>
39  {
40  static_assert(labelset_t_of<Aut>::is_free(),
41  "determinize: boolean: requires free labelset");
42  static_assert(std::is_same<weightset_t_of<Aut>, b>::value,
43  "determinize: boolean: requires Boolean weights");
44 
45  public:
46  using automaton_t = Aut;
48  template <typename Ctx = context_t>
53 
56 
59 
63  : super_t(a->context())
64  , input_(a)
66  {
67  // Pre.
68  state_name_t n;
69  n.resize(state_size_);
70  n.set(input_->pre());
71  map_[n] = super_t::pre();
72  todo_.push(n);
73 
74  // Final states.
75  for (auto t : input_->final_transitions())
76  finals_.set(input_->src_of(t));
77  }
78 
79  static symbol sname()
80  {
81  static symbol res("determinized_automaton<"
83  return res;
84  }
85 
86  std::ostream& print_set(std::ostream& o, format fmt = {}) const
87  {
88  o << "determinized_automaton<";
89  input_->print_set(o, fmt);
90  return o << '>';
91  }
92 
95  state_t state(const state_name_t& ss)
96  {
97  // Benches show that the map_.emplace technique is slower, and
98  // then that operator[] is faster than emplace.
99  state_t res;
100  auto i = map_.find(ss);
101  if (i == std::end(map_))
102  {
103  res = this->new_state();
104  map_[ss] = res;
105 
106  if (ss.intersects(finals_))
107  this->set_final(res);
108 
109  todo_.push(ss);
110  }
111  else
112  res = i->second;
113  return res;
114  }
115 
117  void operator()()
118  {
119  using dests_t
120  = std::map<label_t, state_name_t, vcsn::less<labelset_t>>;
121  auto dests = dests_t{};
122  while (!todo_.empty())
123  {
124  auto ss = std::move(todo_.top());
125  state_t src = state(ss);
126  todo_.pop();
127 
128  dests.clear();
129  for (auto s = ss.find_first(); s != ss.npos;
130  s = ss.find_next(s))
131  {
132  // Cache the output transitions of state s.
133  auto i = successors_.find(s);
134  if (i == successors_.end())
135  {
136  i = successors_.emplace(s, label_map_t{}).first;
137  auto& j = i->second;
138  for (auto t : input_->out(s))
139  {
140  auto l = input_->label_of(t);
141  if (j.find(l) == j.end())
142  j[l].resize(state_size_);
143  j[l].set(input_->dst_of(t));
144  }
145  }
146 
147  // Store in dests the possible destinations per label.
148  for (const auto& p : i->second)
149  {
150  auto j = dests.find(p.first);
151  if (j == dests.end())
152  dests[p.first] = p.second;
153  else
154  j->second |= p.second;
155  }
156  }
157 
158  // Outgoing transitions from the current (result) state.
159  for (const auto& d : dests)
160  this->new_transition(src, state(d.second), d.first);
161  }
162  }
163 
164  bool state_has_name(state_t s) const
165  {
166  return has(origins(), s);
167  }
168 
169  std::ostream&
170  print_state_name(state_t s, std::ostream& o,
171  format fmt = {},
172  bool delimit = false) const
173  {
174  auto i = origins().find(s);
175  if (i == std::end(origins()))
176  this->print_state(s, o);
177  else
178  {
179  if (delimit)
180  o << '{';
181  const char* sep = "";
182  for (auto s: i->second)
183  {
184  o << sep;
185  input_->print_state_name(s, o, fmt, true);
186  sep = ", ";
187  }
188  if (delimit)
189  o << '}';
190  }
191  return o;
192  }
193 
195  using origins_t = std::map<state_t, std::set<state_t>>;
196  mutable origins_t origins_;
197 
198  const origins_t&
199  origins() const
200  {
201  if (origins_.empty())
202  for (const auto& p: map_)
203  {
204  std::set<state_t> from;
205  const auto& ss = p.first;
206  for (auto s = ss.find_first(); s != ss.npos;
207  s = ss.find_next(s))
208  from.emplace(s);
209  origins_.emplace(p.second, std::move(from));
210  }
211  return origins_;
212  }
213 
214  private:
216  using map_t = std::unordered_map<state_name_t, state_t>;
217  map_t map_;
218 
220  automaton_t input_;
221 
225  size_t state_size_ = input_->all_states().back() + 1;
226 
228  using stack = std::stack<state_name_t>;
229  stack todo_;
230 
232  state_name_t finals_;
233 
235  using label_map_t = std::unordered_map<label_t, state_name_t,
236  vcsn::hash<labelset_t>,
237  vcsn::equal_to<labelset_t>>;
238  using successors_t = std::map<state_t, label_map_t>;
239  successors_t successors_;
240  };
241  }
242 
244  template <typename Aut>
245  using determinized_automaton
246  = std::shared_ptr<detail::determinized_automaton_impl<Aut>>;
247 
248  template <typename Aut>
249  inline
250  auto
251  determinize(const Aut& a)
252  -> determinized_automaton<Aut>
253  {
254  auto res = make_shared_ptr<determinized_automaton<Aut>>(a);
255  // Determinize.
256  res->operator()();
257  return res;
258  }
259 
260  template <typename Aut>
261  inline
262  auto
263  codeterminize(const Aut& a)
264  -> decltype(transpose(determinize(transpose(a))))
265  {
266  return transpose(determinize(transpose(a)));
267  }
268 
269  /*---------------------------.
270  | weighted determinization. |
271  `---------------------------*/
272  namespace detail
273  {
279  template <typename Aut>
280  class detweighted_automaton_impl
281  : public automaton_decorator<fresh_automaton_t_of<Aut>>
282  {
283  static_assert(labelset_t_of<Aut>::is_free(),
284  "determinize: weighted: requires free labelset");
285 
286  public:
287  using automaton_t = Aut;
288  using context_t = context_t_of<automaton_t>;
289  template <typename Ctx = context_t>
290  using fresh_automaton_t = fresh_automaton_t_of<Aut, Ctx>;
291  using super_t = automaton_decorator<fresh_automaton_t<>>;
292 
293  using label_t = label_t_of<automaton_t>;
294  using labelset_t = labelset_t_of<automaton_t>;
295  using weightset_t = weightset_t_of<automaton_t>;
296 
297  using state_t = state_t_of<automaton_t>;
298  using weight_t = weight_t_of<automaton_t>;
299 
301  struct stateset
302  {
303  stateset(const automaton_t& aut)
304  : aut_(aut)
305  {}
306 
307  using value_t = state_t;
308 
309  // So that we don't try to print ranges of states.
310  static constexpr bool
312  {
313  return false;
314  }
315 
316  using kind_t = void;
317  static bool equal(state_t l, state_t r)
318  {
319  return l == r;
320  }
321 
322  static bool less(state_t l, state_t r)
323  {
324  return l < r;
325  }
326 
327  static size_t hash(state_t s)
328  {
329  return hash_value(s);
330  }
331 
332  std::ostream&
333  print(state_t s, std::ostream& out,
334  format fmt = {}) const
335  {
336  return aut_->print_state_name(s, out, fmt);
337  }
338 
340  };
341 
343  using state_name_t = typename state_nameset_t::value_t;
344 
348  : super_t(a->context())
349  , input_(a)
350  {
351  // Pre.
352  state_name_t n;
353  n.set(input_->pre(), ws_.one());
354  map_[n] = super_t::pre();
355  todo_.push(n);
356 
357  // Post.
358  n.clear();
359  n.set(input_->post(), ws_.one());
360  map_[n] = super_t::post();
361  }
362 
363  static symbol sname()
364  {
365  static symbol res("detweighted_automaton<"
367  return res;
368  }
369 
370  std::ostream& print_set(std::ostream& o, format fmt = {}) const
371  {
372  o << "detweighted_automaton<";
373  input_->print_set(o, fmt);
374  return o << '>';
375  }
376 
378  void operator()()
379  {
380  // label -> <destination, sum of weights>.
381  using dests_t
382  = std::map<label_t, state_name_t, vcsn::less<labelset_t>>;
383  auto dests = dests_t{};
384  while (!todo_.empty())
385  {
386  auto ss = std::move(todo_.front());
387  todo_.pop();
388  auto src = map_[ss];
389 
390  dests.clear();
391  for (const auto& p : ss)
392  {
393  auto s = label_of(p);
394  auto v = weight_of(p);
395  for (auto t : input_->all_out(s))
396  {
397  auto l = input_->label_of(t);
398  auto dst = input_->dst_of(t);
399  auto w = ws_.mul(v, input_->weight_of(t));
400 
401  // For each letter, update destination state, and
402  // sum of weights.
403  if (!has(dests, l))
404  dests.emplace(l, ns_.zero());
405  auto& d = dests[l];
406  ns_.add_here(d, dst, w);
407  }
408  }
409 
410  for (auto& d : dests)
411  if (!ns_.is_zero(d.second))
412  {
413  weight_t w = ns_.normalize_here(d.second);
414  this->new_transition(src, state_(d.second),
415  d.first, w);
416  }
417  }
418  }
419 
420  bool state_has_name(state_t s) const
421  {
422  return has(origins(), s);
423  }
424 
425  std::ostream&
426  print_state_name(state_t ss, std::ostream& o,
427  format fmt = {}, bool delimit = false) const
428  {
429  auto i = origins().find(ss);
430  if (i == origins().end())
431  this->print_state(ss, o);
432  else
433  {
434  if (delimit)
435  o << '{';
436  ns_.print(i->second, o, fmt, ", ");
437  if (delimit)
438  o << '}';
439  }
440  return o;
441  }
442 
444  using origins_t = std::map<state_t, state_name_t>;
445  mutable origins_t origins_;
446  const origins_t&
447  origins() const
448  {
449  if (origins_.empty())
450  for (const auto& p: map_)
451  origins_.emplace(p.second, p.first);
452  return origins_;
453  }
454 
455  private:
458  state_t state_(const state_name_t& name)
459  {
460  // Benches show that the map_.emplace technique is slower, and
461  // then that operator[] is faster than emplace.
462  state_t res;
463  auto i = map_.find(name);
464  if (i == std::end(map_))
465  {
466  res = this->new_state();
467  map_[name] = res;
468  todo_.push(name);
469  }
470  else
471  res = i->second;
472  return res;
473  };
474 
476  using map_t = std::unordered_map<state_name_t, state_t,
477  vcsn::hash<state_nameset_t>,
478  vcsn::equal_to<state_nameset_t>>;
479  map_t map_;
480 
482  automaton_t input_;
483 
485  weightset_t ws_ = *input_->weightset();
486 
488  state_nameset_t ns_ = {{stateset(input_), ws_}};
489 
491  using queue = std::queue<state_name_t>;
492  queue todo_;
493  };
494  }
495 
497  template <typename Aut>
498  using detweighted_automaton
499  = std::shared_ptr<detail::detweighted_automaton_impl<Aut>>;
500 
501  template <typename Aut>
502  inline
503  auto
504  determinize_weighted(const Aut& a)
505  -> detweighted_automaton<Aut>
506  {
507  auto res = make_shared_ptr<detweighted_automaton<Aut>>(a);
508  res->operator()();
509  return res;
510  }
511 
512  template <typename Aut>
513  inline
514  auto
515  codeterminize_weighted(const Aut& aut)
516  -> decltype(transpose(determinize_weighted(transpose(aut))))
517  {
518  return transpose(determinize_weighted(transpose(aut)));
519  }
520 
521  /*-------------------.
522  | dyn::determinize. |
523  `-------------------*/
524 
525  namespace dyn
526  {
527  namespace detail
528  {
529  template <typename Aut, typename Type>
530  using if_boolean_t
531  = vcsn::enable_if_t<std::is_same<weightset_t_of<Aut>, b>::value, Type>;
532 
533  template <typename Aut, typename Type>
534  using if_not_boolean_t
535  = vcsn::enable_if_t<!std::is_same<weightset_t_of<Aut>, b>::value, Type>;
536 
537 
539  template <typename Aut, typename String>
540  inline
541  if_boolean_t<Aut, automaton>
542  determinize_(const automaton& aut, const std::string& algo)
543  {
544  const auto& a = aut->as<Aut>();
545  if (algo == "auto" || algo == "boolean")
546  return make_automaton(::vcsn::determinize(a));
547  else if (algo == "weighted")
548  return make_automaton(::vcsn::determinize_weighted(a));
549  else
550  raise("determinize: invalid algorithm: ", str_escape(algo));
551  }
552 
554  template <typename Aut, typename String>
555  inline
556  if_not_boolean_t<Aut, automaton>
557  determinize_(const automaton& aut, const std::string& algo)
558  {
559  const auto& a = aut->as<Aut>();
560  if (algo == "boolean")
561  raise("determinize: cannot apply Boolean determinization");
562  else if (algo == "auto" || algo == "weighted")
563  return make_automaton(::vcsn::determinize_weighted(a));
564  else
565  raise("determinize: invalid algorithm: ", str_escape(algo));
566  }
567 
569  template <typename Aut, typename String>
570  inline
571  automaton
572  determinize(const automaton& aut, const std::string& algo)
573  {
574  return determinize_<Aut, String>(aut, algo);
575  }
576  }
577  }
578 
579 
580  /*---------------------.
581  | dyn::codeterminize. |
582  `---------------------*/
583 
584  // FIXME: duplicate code with determinize.
585  namespace dyn
586  {
587  namespace detail
588  {
590  template <typename Aut, typename String>
591  inline
592  if_boolean_t<Aut, automaton>
593  codeterminize_(const automaton& aut, const std::string& algo)
594  {
595  const auto& a = aut->as<Aut>();
596  if (algo == "auto" || algo == "boolean")
597  return make_automaton(::vcsn::codeterminize(a));
598  else if (algo == "weighted")
599  return make_automaton(::vcsn::codeterminize_weighted(a));
600  else
601  raise("codeterminize: invalid algorithm: ", str_escape(algo));
602  }
603 
605  template <typename Aut, typename String>
606  inline
607  if_not_boolean_t<Aut, automaton>
608  codeterminize_(const automaton& aut, const std::string& algo)
609  {
610  const auto& a = aut->as<Aut>();
611  if (algo == "boolean")
612  raise("codeterminize: cannot apply Boolean determinization");
613  else if (algo == "auto" || algo == "weighted")
614  return make_automaton(::vcsn::codeterminize_weighted(a));
615  else
616  raise("codeterminize: invalid algorithm: ", str_escape(algo));
617  }
618 
620  template <typename Aut, typename String>
621  inline
622  automaton
623  codeterminize(const automaton& aut, const std::string& algo)
624  {
625  return codeterminize_<Aut, String>(aut, algo);
626  }
627  }
628  }
629 } // namespace vcsn
boost::dynamic_bitset<> dynamic_bitset
typename detail::label_t_of_impl< base_t< ValueSet >>::type label_t_of
Definition: traits.hh:46
auto out(Args &&...args) const -> decltype(aut_-> out(std::forward< Args >(args)...))
std::ostream & print(state_t s, std::ostream &out, format fmt={}) const
Definition: determinize.hh:333
fresh_automaton_t_of< Aut, Ctx > fresh_automaton_t
Definition: determinize.hh:49
std::ostream & print(const std::tuple< Args...> &args, std::ostream &o)
Definition: tuple.hh:339
weightset_mixin< detail::r_impl > r
Definition: fwd.hh:54
typename state_nameset_t::value_t state_name_t
Definition: determinize.hh:343
static constexpr auto pre(Args &&...args) -> decltype(element_type::pre(std::forward< Args >(args)...))
static bool equal(state_t l, state_t r)
Definition: determinize.hh:317
static constexpr auto post(Args &&...args) -> decltype(element_type::post(std::forward< Args >(args)...))
auto states(Args &&...args) const -> decltype(aut_-> states(std::forward< Args >(args)...))
size_t state_size_
We use state numbers as indexes, so we need to know the last state number.
Definition: determinize.hh:225
state_t_of< automaton_t > state_t
Definition: determinize.hh:297
typename detail::state_t_of_impl< base_t< ValueSet >>::type state_t_of
Definition: traits.hh:48
typename detail::context_t_of_impl< base_t< ValueSet >>::type context_t_of
Definition: traits.hh:45
polynomialset< context< stateset, weightset_t >> state_nameset_t
Definition: determinize.hh:342
typename detail::weightset_t_of_impl< base_t< ValueSet >>::type weightset_t_of
Definition: traits.hh:51
state_name_t finals_
Set of final states in the input automaton.
Definition: determinize.hh:232
Aggregate an automaton, and forward calls to it.
detweighted_automaton_impl(const automaton_t &a)
Build the weighted determinizer.
Definition: determinize.hh:347
The subset construction automaton from another.
Definition: determinize.hh:37
std::ostream & print_set(std::ostream &o, format fmt={}) const
Definition: determinize.hh:370
auto hash_value(const T &v) -> decltype(std::hash< T >
Following the naming convention of Boost.
Definition: functional.hh:63
determinized_automaton_impl(const automaton_t &a)
Build the determinizer.
Definition: determinize.hh:62
typename Aut::element_type::template fresh_automaton_t< Context > fresh_automaton_t_of
Definition: traits.hh:57
context_t_of< automaton_t > context_t
Definition: determinize.hh:47
An input/output format.
Definition: format.hh:11
Provide a variadic mul on top of a binary mul(), and one().
Definition: fwd.hh:46
labelset_t_of< automaton_t > labelset_t
Definition: determinize.hh:51
typename detail::labelset_t_of_impl< base_t< ValueSet >>::type labelset_t_of
Definition: traits.hh:47
std::ostream & print_set(std::ostream &o, format fmt={}) const
Definition: determinize.hh:86
state_t_of< automaton_t > state_t
Result automaton state type.
Definition: determinize.hh:58
label_t_of< automaton_t > label_t
Definition: determinize.hh:50
symbol sname()
Definition: name.hh:67
boost::flyweight< std::string, boost::flyweights::no_tracking, boost::flyweights::intermodule_holder > symbol
An internalized string.
Definition: symbol.hh:23
dynamic_bitset state_name_t
The name: set of (input) states.
Definition: determinize.hh:55
automaton_t input_
Input automaton.
Definition: determinize.hh:220
automaton_t aut_
The wrapped automaton, possibly const.
static bool less(state_t l, state_t r)
Definition: determinize.hh:322