krat_exp_parser.hxx

00001 // krat_exp_parser.hxx: this file is part of the Vaucanson project.
00002 //
00003 // Vaucanson, a generic library for finite state machines.
00004 //
00005 // Copyright (C) 2001, 2002, 2003, 2004 The Vaucanson Group.
00006 //
00007 // This program is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU General Public License
00009 // as published by the Free Software Foundation; either version 2
00010 // of the License, or (at your option) any later version.
00011 //
00012 // The complete GNU General Public Licence Notice can be found as the
00013 // `COPYING' file in the root directory.
00014 //
00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file.
00016 //
00017 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00018 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019 
00020 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00021 
00022 # include <vaucanson/algebra/concept/monoid_base.hh>
00023 # include <vaucanson/tools/usual_escaped_characters.hh>
00024 
00025 # include <list>
00026 # include <set>
00027 
00028 namespace vcsn {
00029 
00030   namespace algebra {
00031 
00035 
00036     namespace krat_exp_lexing {
00037 
00046       enum token_e
00047         {
00048           lparen,
00049           rparen,
00050           space,
00051           plus,
00052           e_star,
00053           dot,
00054           zero,
00055           one,
00056           a_word,
00057           a_weight,
00058           eof
00059         };
00060 
00061     } // krat_exp_lexing
00062 
00063     using namespace krat_exp_lexing;
00064 
00075     template <class MonoidValue, class SemiringEltValue>
00076     class KRatExpToken
00077     {
00078     public:
00089       struct token
00090       {
00091         token(const token_e& tok_type) :
00092           type(tok_type)
00093         {}
00094 
00095         token(const token_e& tok_type, const MonoidValue& word)
00096         {
00097           m = word;
00098           type = tok_type;
00099         }
00100 
00101         token(const token_e& tok_type, const SemiringEltValue& weight)
00102         {
00103           w = weight;
00104           type = tok_type;
00105         }
00106 
00107         std::string to_string() const
00108         {
00109           switch (type)
00110             {
00111             case a_word                 : return "word";
00112             case a_weight               : return "weight";
00113             case lparen                 : return "(";
00114             case rparen                 : return ")";
00115             case one                    : return "one";
00116             case zero                   : return "zero";
00117             case krat_exp_lexing::dot   : return ".";
00118             case e_star                 : return "*";
00119             case krat_exp_lexing::plus  : return "+";
00120             case space                  : return " ";
00121             case eof                    : return "EOF";
00122             }
00123           return "";
00124         }
00125 
00126         token_e type;
00127         MonoidValue m;
00128         SemiringEltValue w;
00129       };
00130 
00131       KRatExpToken(const token_e& tok)
00132       {
00133         tok_.push_back(tok);
00134       }
00135 
00136       KRatExpToken()
00137       {
00138       }
00139 
00140       std::string to_string() const
00141       {
00142         if (tok_.size() == 0)
00143           return "no token";
00144         else if (tok_.size() == 1)
00145           return tok_.front().to_string();
00146         else
00147           {
00148             std::string s("schrod(");
00149             typename std::list<token>::const_iterator i = tok_.begin();
00150             while (i != tok_.end())
00151               s += i->to_string() + (++i != tok_.end() ? " ": "");
00152             s += ")";
00153             return s;
00154           }
00155       }
00156 
00157       bool is_just_a(const token_e tok)
00158       {
00159         return ((tok_.size() == 1) && (tok_.front().type == tok));
00160       }
00161 
00162       bool is_defined()
00163       {
00164         return (tok_.size() > 0);
00165       }
00166 
00167       bool is_schrod()
00168       {
00169         return (tok_.size() > 1);
00170       }
00171 
00172       bool is_a(const token_e tok)
00173       {
00174         for (typename std::list<token>::const_iterator i = tok_.begin();
00175              i != tok_.end();
00176              ++i)
00177           if (i->type == tok)
00178             return true;
00179         return false;
00180       }
00181 
00182       MonoidValue as_word()
00183       {
00184         for (typename std::list<token>::const_iterator i = tok_.begin();
00185              i != tok_.end();
00186              ++i)
00187           if (i->type == a_word)
00188             return i->m;
00189         unreachable("internal error in krat_exp parser");
00190         return MonoidValue();
00191       }
00192 
00193       SemiringEltValue as_weight()
00194       {
00195         for (typename std::list<token>::const_iterator i = tok_.begin();
00196              i != tok_.end();
00197              ++i)
00198           if (i->type == a_weight)
00199             return i->w;
00200         unreachable("internal error in krat_exp parser.");
00201         return SemiringEltValue();
00202       }
00203 
00204       KRatExpToken& operator=(const token_e tok)
00205       {
00206         tok_.push_back(token(tok));
00207         return *this;
00208       }
00209 
00210       KRatExpToken& operator=(const MonoidValue& word_value)
00211       {
00212         tok_.push_back(token(a_word, word_value));
00213         return *this;
00214       }
00215 
00216       KRatExpToken& operator=(const SemiringEltValue& weight_value)
00217       {
00218         tok_.push_back(token(a_weight, weight_value));
00219         return *this;
00220       }
00221 
00222       void reset()
00223       {
00224         tok_.clear();
00225       }
00226 
00227     private:
00228       std::list<token> tok_;
00229     };
00230 
00236     template <class S, class T>
00237     struct Lexer
00238     {
00239       typedef KRatExpToken
00240       <
00241         typename Element<S, T>::monoid_elt_value_t,
00242         typename Element<S, T>::semiring_elt_value_t
00243       >                                               krat_exp_token_t;
00244       typedef std::list<krat_exp_token_t>             token_stream_t;
00245 
00246       Lexer(bool trace = false) :
00247         tokens_ (),
00248         trace_ (trace),
00249         error_msg_ ("No error."),
00250         error_ (false)
00251       {
00252       }
00253 
00255       void
00256       lex_error(const std::string& msg = "Lex error.")
00257       {
00258         error_ = true;
00259         error_msg_ = msg;
00260       }
00261 
00269       void
00270       lex(const std::string& in, const Element<S, T>& e)
00271       {
00272         typedef typename Element<S, T>::monoid_elt_t       monoid_elt_t;
00273         typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00274         typedef typename Element<S, T>::semiring_elt_t     semiring_elt_t;
00275         typedef std::string::const_iterator                iterator_t;
00276 
00277         iterator_t i = in.begin();
00278         std::set<char> escaped = tools::usual_escaped_characters();
00279         int len = 0;
00280 
00281         while (!error_ & (i != in.end()))
00282           {
00283             len = 0;
00284             krat_exp_token_t tok;
00285             switch (*i)
00286               {
00287                 // operator case.
00288               case '(' : tok = lparen;                  len = 1; break;
00289               case ')' : tok = rparen;                  len = 1; break;
00290               case '+' : tok = krat_exp_lexing::plus;   len = 1; break;
00291               case '*' : tok = e_star;                  len = 1; break;
00292               case '.' : tok = krat_exp_lexing::dot;    len = 1; break;
00293               case ' ' : tok = space;                   len = 1; break;
00294               case '0' : tok = zero;                    len = 1; break;
00295               case '1' : tok = one;                     len = 1; break;
00296               }
00297             // try word parser.
00298             iterator_t mli = i;
00299             monoid_elt_t w (e.structure().monoid());
00300             if (parse_word(w, in, mli, escaped))
00301               {
00302                 if (mli - i > 1)
00303                   tok.reset();
00304                 tok = w.value();
00305                 len = mli - i;
00306               }
00307             // try weight lexer.
00308             iterator_t wli = i;
00309             semiring_elt_t ww (e.structure().semiring());
00310             if (parse_weight(ww, in, wli))
00311               {
00312                 if (wli - i > len)
00313                   tok.reset();
00314                 if (wli - i >= len)
00315                   {
00316                     tok = ww.value();
00317                     len = wli - i;
00318                   }
00319               }
00320             if (len == 0)
00321               lex_error(std::string("Invalid token '") + *i + "'.");
00322             else
00323               tokens_.push_back(tok);
00324             i += len;
00325           }
00326       }
00327 
00329       bool
00330       error() const
00331       {
00332         return error_;
00333       }
00334 
00336       const std::string&
00337       error_msg() const
00338       {
00339         return error_msg_;
00340       }
00341 
00343       krat_exp_token_t first() const
00344       {
00345         return (tokens_.size() > 0) ? tokens_.front() :
00346           krat_exp_token_t (eof);
00347       }
00348 
00350       krat_exp_token_t second() const
00351       {
00352         return (tokens_.size() >= 2) ?
00353           *++tokens_.begin() : krat_exp_token_t (eof);
00354       }
00355 
00362       void eat()
00363       {
00364         if (tokens_.size() > 0)
00365           {
00366             if (trace_)
00367               std::cerr << "LEXER: eating '" << tokens_.front().to_string()
00368                         << "'." << std::endl;
00369             tokens_.pop_front();
00370           }
00371       }
00372 
00373       void set_trace(bool trace)
00374       {
00375         trace_ = trace;
00376       }
00377 
00378     protected:
00379       token_stream_t tokens_;
00380       bool trace_;
00381       std::string error_msg_;
00382       bool error_;
00383     };
00384 
00419     template <class S, class T>
00420     struct Parser
00421     {
00422       typedef KRatExpToken
00423       <
00424         typename Element<S, T>::monoid_elt_value_t,
00425         typename Element<S, T>::semiring_elt_value_t
00426       >                                               krat_exp_token_t;
00427       typedef typename Element<S, T>::monoid_elt_t    monoid_elt_t;
00428       typedef typename Element<S, T>::monoid_elt_value_t  monoid_elt_value_t;
00429       typedef typename monoid_elt_t::set_t            monoid_t;
00430       typedef typename Element<S, T>::semiring_elt_t          semiring_elt_t;
00431       typedef typename semiring_elt_t::set_t                  semiring_t;
00432       typedef typename Element<S, T>::semiring_elt_value_t  semiring_elt_value_t;
00433       typedef std::list<krat_exp_token_t>             token_stream_t;
00434 
00435       Parser(Lexer<S, T>& lexer, bool trace = false) :
00436         lexer_          (lexer),
00437         trace_          (trace),
00438         error_          (lexer.error()),
00439         error_msg_      (lexer.error_msg())
00440       {
00441       }
00442 
00444       void parse(Element<S, T>& exp)
00445       {
00446         if (!error_)
00447           {
00448             trace("*** START ***");
00449             try
00450               {
00451                 parse_exp(exp);
00452                 accept(eof);
00453               }
00454             catch (const std::string& s)
00455               {
00456                 error_ = true;
00457                 error_msg_ = s;
00458               }
00459             trace("*** END ***");
00460           }
00461       }
00462 
00464       bool error() const
00465       {
00466         return error_;
00467       }
00468 
00470       const std::string& error_msg() const
00471       {
00472         return error_msg_;
00473       }
00474 
00475       void set_trace(bool trace)
00476       {
00477         trace_ = trace;
00478       }
00479 
00480     protected:
00481 
00483 
00484       void
00485       trace(const std::string& msg)
00486       {
00487         if (trace_)
00488           std::cerr << "PARSER: " << msg << '.' << std::endl;
00489       }
00490 
00491       template <class Misc>
00492       void
00493       trace(const std::string& msg, const Misc& v)
00494       {
00495         if (trace_)
00496           std::cerr << "PARSER: " << msg << " (" << v << ")." << std::endl;
00497       }
00499 
00501       void parse_error(const std::string& msg = "parse_error.")
00502         throw (const std::string&)
00503       {
00504         trace("Stop on error.", msg);
00505         throw msg;
00506       }
00507 
00517       void accept(token_e tok)
00518       {
00519         krat_exp_token_t                        get_token = lexer_.first();
00520         typename krat_exp_token_t::token        expected (tok);
00521 
00522         trace("accept", expected.to_string());
00523         if (!get_token.is_a(tok))
00524           parse_error("waiting for a '" + expected.to_string() +
00525                       "' get a '" + get_token.to_string() + "'.");
00526         else
00527           lexer_.eat();
00528       }
00529 
00531       void parse_exp(Element<S, T>& exp)
00532       {
00533         trace("parse_exp: Start");
00534         parse_term (exp);
00535         while (lexer_.first().is_a(krat_exp_lexing::plus))
00536           {
00537             accept(krat_exp_lexing::plus);
00538             Element<S, T> rhs (exp.structure());
00539             parse_term(rhs);
00540             exp = exp + rhs;
00541           }
00542         trace("parse_exp: End",  exp);
00543       }
00544 
00546       void parse_term(Element<S, T>& exp)
00547       {
00548         trace("parse_term: Start");
00549         parse_right_weighted (exp);
00550         while (lexer_.first().is_a(krat_exp_lexing::dot)        ||
00551                lexer_.first().is_a(a_weight)                    ||
00552                lexer_.first().is_a(lparen)                      ||
00553                lexer_.first().is_a(one)                         ||
00554                lexer_.first().is_a(zero)                        ||
00555                lexer_.first().is_a(a_word))
00556           {
00557             if (lexer_.first().is_a(krat_exp_lexing::dot))
00558               accept(krat_exp_lexing::dot);
00559             Element<S, T> rhs (exp.structure());
00560             parse_right_weighted(rhs);
00561             exp = exp * rhs;
00562           }
00563         trace("parse_term: End",  exp);
00564       }
00565 
00567       void parse_right_weighted(Element<S, T>& exp)
00568       {
00569         trace("parse_right_weighted: Start");
00570         parse_left_weighted(exp);
00571         while (lexer_.first().is_a(space))
00572           {
00573             accept(space);
00574             krat_exp_token_t tok = lexer_.first();
00575             accept(a_weight);
00576             exp = exp * semiring_elt_t (exp.structure().semiring(), tok.as_weight());
00577           }
00578         trace("parse_right_weighted: End", exp);
00579       }
00580 
00582       void parse_left_weighted(Element<S, T>& exp)
00583       {
00584         trace("parse_left_weighted: Start");
00585         // a 2-lookahead will be sufficient to disambiguate.
00586         if (lexer_.first().is_a(a_weight) &&
00587             (!lexer_.first().is_schrod() || lexer_.second().is_a(space)))
00588           {
00589             // token must be a weight, but may be a monoid '1' or '0'.
00590             // i.e. in "1 2" or "0 2".
00591             // token may even be a word!
00592             // i.e. in '2 3' when 2 is in the alphabet
00593             semiring_elt_t w (exp.structure().semiring(), lexer_.first().as_weight());
00594             bool just_a_weight = true;
00595             Element<S, T> m (exp.structure());
00596             if (lexer_.first().is_a(zero))
00597               {
00598                 just_a_weight = false;
00599                 m = zero_as<T>::of(exp.structure());
00600               }
00601             else if (lexer_.first().is_a(one))
00602               {
00603                 just_a_weight = false;
00604                 m = identity_as<T>::of(exp.structure());
00605               }
00606             else if (lexer_.first().is_a(a_word))
00607               {
00608                 just_a_weight = false;
00609                 m = Element<S, T> (exp.structure(), lexer_.first().as_word());
00610               }
00611             accept(a_weight);
00612             accept(space);
00613             if (just_a_weight ||
00614                 !lexer_.first().is_a(a_weight) ||
00615                 lexer_.second().is_a(space))
00616               {
00617                 // Then it could be any weighted.
00618                 Element<S, T> rhs (exp.structure());
00619                 parse_left_weighted(rhs);
00620                 exp = w * rhs;
00621               }
00622             else
00623               {
00624                 // Arg ! First was not a weight, but a monoid value !
00625                 w = lexer_.first().as_weight();
00626                 accept(a_weight);
00627                 exp = m * w;
00628               }
00629           }
00630         else
00631           /* !lexer_.first().is_a(a_weight)  ||
00632              (lexer_.first.is_schrod() && !lexer_.second().is_a(space)) */
00633           parse_stared(exp);
00634         trace("parse_left_weighted: End",  exp);
00635       }
00636 
00638       void parse_stared(Element<S, T>& exp)
00639       {
00640         trace("parse_stared: Start");
00641         parse_factor(exp);
00642         while (lexer_.first().is_a(e_star))
00643           {
00644             accept(e_star);
00645             exp.star();
00646           }
00647         trace("parse_stared: End",  exp);
00648       }
00649 
00651       void parse_factor(Element<S, T>& exp)
00652       {
00653         trace("parse_factor: Start");
00654         if (lexer_.first().is_a(one))
00655           {
00656             accept(one);
00657             exp = identity_as<T>::of(exp.structure());
00658           }
00659         else if (lexer_.first().is_a(zero))
00660           {
00661             accept(zero);
00662             exp = zero_as<T>::of(exp.structure());
00663           }
00664         else if (lexer_.first().is_a(a_word))
00665           {
00666             exp = monoid_elt_t (exp.structure().monoid(), lexer_.first().as_word());
00667             accept(a_word);
00668           }
00669         else if (lexer_.first().is_a(lparen))
00670           {
00671             accept(lparen);
00672             parse_exp(exp);
00673             accept(rparen);
00674           }
00675         else
00676           parse_error("waiting for a factor get a " +
00677                       lexer_.first().to_string());
00678         trace("parse_factor: End",  exp);
00679       }
00680 
00681       Lexer<S, T>&      lexer_;
00682       bool              trace_;
00683       bool              error_;
00684       std::string       error_msg_;
00685     };
00686 
00687     template <class S, class T>
00688     std::pair<bool, std::string>
00689     parse(const std::string& from,
00690           Element<S, T>& exp,
00691           bool lex_trace,
00692           bool parse_trace)
00693     {
00694       Lexer<S, T> lexer(lex_trace);
00695       lexer.lex(from, exp);
00696       Parser<S, T> parser(lexer, parse_trace);
00697       parser.parse(exp);
00698       return std::make_pair(parser.error(), parser.error_msg());
00699     }
00700 
00704   } // algebra
00705 
00706 } // vcsn
00707 
00708 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX

Generated on Fri Jul 28 12:18:42 2006 for Vaucanson by  doxygen 1.4.6