Vaucanson 1.4
krat_exp_parser.hxx
00001 // krat_exp_parser.hxx: this file is part of the Vaucanson project.
00002 //
00003 // Vaucanson, a generic library for finite state machines.
00004 //
00005 // Copyright (C) 2008, 2009, 2011 The Vaucanson Group.
00006 //
00007 // This program is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU General Public License
00009 // as published by the Free Software Foundation; either version 2
00010 // of the License, or (at your option) any later version.
00011 //
00012 // The complete GNU General Public Licence Notice can be found as the
00013 // `COPYING' file in the root directory.
00014 //
00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file.
00016 //
00017 
00018 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00020 # include <map>
00021 # include <queue>
00022 # include <set>
00023 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00024 # include <vaucanson/algebra/concept/monoid_base.hh>
00025 # include <vaucanson/algebra/implementation/series/krat_exp_parser_private.hh>
00026 
00027 namespace vcsn
00028 {
00029   namespace algebra
00030   {
00031 
00032     template <class S, class T>
00033     struct Lexer
00034     {
00035       // Type helpers.
00036       typedef typename Element<S, T>::monoid_elt_t monoid_elt_t;
00037       typedef typename Element<S, T>::semiring_elt_t semiring_elt_t;
00038       typedef typename Element<S, T>::set_t::shared_series_rep_t
00039         shared_series_rep_t;
00040 
00041       Lexer(const std::string& from,
00042             Element<S, T>& e,
00043             vcsnyy::krat_exp_parser& parser,
00044             bool lex_trace,
00045             std::string& error) :
00046         from_(from),
00047         e_(e),
00048         parser_(parser),
00049         lex_trace_(lex_trace),
00050         close_weight_("}"),
00051         // Reserve seven elements for the "visible" tokens.
00052         token_tab_(7),
00053         error_(error)
00054       {
00055         // Get the series representation.
00056         const shared_series_rep_t& rep = e.structure().representation();
00057 
00058         token_tab_[0] = rep->open_par;
00059         token_tab_[1] = rep->close_par;
00060         token_tab_[2] = rep->plus;
00061         token_tab_[3] = rep->times;
00062         token_tab_[4] = rep->star;
00063         token_tab_[5] = rep->zero;
00064         token_tab_[6] = rep->open_weight;
00065         close_weight_ = rep->close_weight;
00066 
00067         // Concat the spaces representation vector.
00068         token_tab_.insert(token_tab_.end(), rep->spaces.begin(), rep->spaces.end());
00069 
00070         std::string::const_iterator sit;
00071         semiring_elt_t ww(e_.structure().semiring());
00072         sit = close_weight_.begin();
00073         if (parse_weight(ww, close_weight_, sit))
00074           error_ += "Warning : the token '" + close_weight_ +
00075                     + "' is already defined as a weight.\n";
00076         sit = token_tab_[7].begin();
00077         if (parse_weight(ww, token_tab_[7], sit))
00078           error_ += "Warning : the token '" + token_tab_[7]
00079                     + "' is already defined as a weight.\n";
00080         for (unsigned i = 0; i < token_tab_.size(); i++)
00081         {
00082           monoid_elt_t w(e_.structure().monoid());
00083           if (parse_word(w, token_tab_[i]).first)
00084             error_ +=  "Warning : the token '" + token_tab_[i]
00085                        + "' is already defined as a word.\n";
00086         }
00087 
00088       }
00089 
00090       bool
00091       lex()
00092       {
00093         size_t size = from_.size();
00094         size_t it = 0;
00095         while (it < size)
00096         {
00097           // Try to recognize a word.
00098           monoid_elt_t w(e_.structure().monoid());
00099           std::pair<bool, int> res = parse_word(w, from_.substr(it));
00100           if (res.second > 0)
00101             {
00102               insert_word(w);
00103               // If the entire input was a word there is nothing else to lex.
00104               if (res.first)
00105                 {
00106                   assertion(it + res.second == size);
00107                   break;
00108                 }
00109               it += res.second;
00110             }
00111 
00112           // Try to recognize a regex token.
00113           size_t i;
00114           for (i = 0; i < token_tab_.size(); i++)
00115           {
00116             if (!from_.compare(it, token_tab_[i].size(), token_tab_[i]))
00117             {
00118               if (i == 6)
00119               {
00120                 if (!insert_weight(it))
00121                   return false;
00122               }
00123               else
00124               {
00125                 if (i < 6)
00126                   insert_token(i);
00127                 it += token_tab_[i].size();
00128               }
00129               break;
00130             }
00131           }
00132           if (i >= token_tab_.size())
00133             {
00134               error_ += "Lexer error, unrecognized characters: "
00135                 + from_.substr(it) + "\n";
00136               return false;
00137             }
00138         }
00139         return true;
00140       }
00141 
00142       private:
00143       void
00144       insert_word(monoid_elt_t& w)
00145       {
00146         Element<S, T> ww = (w.value().empty() ?
00147                             identity_as<T>::of(e_.structure()) :
00148                             Element<S, T>(e_.structure(), w.value()));
00149 
00150         krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(ww);
00151         parser_.insert_word(rexp);
00152       }
00153 
00154       bool
00155       insert_weight(size_t& it)
00156       {
00157         it += token_tab_[7].size();
00158         size_t bg = it;
00159         size_t size = from_.size();
00160         unsigned cpt = 1;
00161         for (; it < size; it++)
00162         {
00163           if (!from_.compare(it, token_tab_[7].size(), token_tab_[7]))
00164             cpt++;
00165           else
00166             if (!from_.compare(it, close_weight_.size(), close_weight_))
00167             {
00168               if (cpt == 1)
00169               {
00170                 semiring_elt_t w(e_.structure().semiring());
00171                 std::string s = from_.substr(bg, it - bg);
00172                 std::string::const_iterator sit = s.begin();
00173                 if (parse_weight(w, s, sit))
00174                 {
00175                   semiring_proxy<S, T>* sem = new semiring_proxy<S, T>(w);
00176                   parser_.insert_weight(sem);
00177                 }
00178                 else
00179                 {
00180                   error_ += "Lexer error : " + s + " is not a weight\n";
00181                   return false;
00182                 }
00183                 it += close_weight_.size();
00184                 return true;
00185               }
00186               else
00187                 cpt--;
00188             }
00189         }
00190         error_ += "Lexer error : Expected " + close_weight_
00191                   + "instead of END\n";
00192         return false;
00193       }
00194 
00195       void
00196       insert_token(int i)
00197       {
00198         if (i == 5)
00199         {
00200           Element<S, T> w = zero_as<T>::of(e_.structure());
00201           krat_exp_proxy<S, T>* rexp = new krat_exp_proxy<S, T>(w);
00202           parser_.insert_zero(rexp);
00203         }
00204         else
00205         {
00206           std::string* str = new std::string(token_tab_[i]);
00207           parser_.insert_token(i, str);
00208         }
00209       }
00210 
00211       const std::string& from_;
00212       Element<S, T>& e_;
00213       vcsnyy::krat_exp_parser& parser_;
00214       bool lex_trace_;
00215       std::string close_weight_;
00216       std::vector<std::string> token_tab_;
00217       std::string& error_;
00218     }; // Lexer
00219 
00220     template <class S, class T>
00221     std::pair<bool, std::string>
00222     parse(const std::string& from, Element<S, T>& exp,
00223           bool lex_trace, bool parse_trace)
00224     {
00225       parse_trace = parse_trace;
00226       std::string error;
00227       vcsnyy::krat_exp_parser parser;
00228       Lexer<S, T> lex(from, exp, parser, lex_trace, error);
00229       if (!lex.lex())
00230         return std::make_pair(true, error);
00231       krat_exp_proxy<S, T> rexp(exp);
00232       if (parser.parse(rexp, error))
00233         return std::make_pair(true, error);
00234       exp = rexp.self;
00235       return std::make_pair(false, error);
00236     }
00237 
00238   } // algebra
00239 
00240 } // vcsn
00241 
00242 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX