krat_exp_parser.hxx

00001 // krat_exp_parser.hxx: this file is part of the Vaucanson project.
00002 //
00003 // Vaucanson, a generic library for finite state machines.
00004 //
00005 // Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 The Vaucanson Group.
00006 //
00007 // This program is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU General Public License
00009 // as published by the Free Software Foundation; either version 2
00010 // of the License, or (at your option) any later version.
00011 //
00012 // The complete GNU General Public Licence Notice can be found as the
00013 // `COPYING' file in the root directory.
00014 //
00015 // The Vaucanson Group consists of people listed in the `AUTHORS' file.
00016 //
00017 #ifndef VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00018 # define VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX
00019 
00020 # include <vaucanson/algebra/implementation/series/krat_exp_parser.hh>
00021 
00022 # include <vaucanson/algebra/concept/monoid_base.hh>
00023 # include <vaucanson/misc/usual_escaped_characters.hh>
00024 
00025 # include <list>
00026 # include <set>
00027 # include <iostream>
00028 
00029 namespace vcsn {
00030 
00031   namespace algebra {
00032 
00036 
00037     namespace krat_exp_lexing {
00038 
00047       enum token_e
00048         {
00049           lparen,
00050           rparen,
00051           space,
00052           plus,
00053           e_star,
00054           dot,
00055           zero,
00056           one,
00057           a_word,
00058           a_weight,
00059           eof
00060         };
00061 
00062     } // krat_exp_lexing
00063 
00064     using namespace krat_exp_lexing;
00065 
00076     template <class MonoidValue, class SemiringEltValue>
00077     class KRatExpToken
00078     {
00079     public:
00090       struct token
00091       {
00092         token(const token_e& tok_type) :
00093           type(tok_type)
00094         {}
00095 
00096         token(const token_e& tok_type, const MonoidValue& word)
00097         {
00098           m = word;
00099           type = tok_type;
00100         }
00101 
00102         token(const token_e& tok_type, const SemiringEltValue& weight)
00103         {
00104           w = weight;
00105           type = tok_type;
00106         }
00107 
00108         std::string to_string() const
00109         {
00110           switch (type)
00111             {
00112             case a_word                 : return "word";
00113             case a_weight               : return "weight";
00114             case lparen                 : return "(";
00115             case rparen                 : return ")";
00116             case one                    : return "one";
00117             case zero                   : return "zero";
00118             case krat_exp_lexing::dot   : return ".";
00119             case e_star                 : return "*";
00120             case krat_exp_lexing::plus  : return "+";
00121             case space                  : return " ";
00122             case eof                    : return "EOF";
00123             }
00124           return "";
00125         }
00126 
00127         token_e type;
00128         MonoidValue m;
00129         SemiringEltValue w;
00130       };
00131 
00132       KRatExpToken(const token_e& tok)
00133       {
00134         tok_.push_back(tok);
00135       }
00136 
00137       KRatExpToken()
00138       {
00139       }
00140 
00141       std::string to_string() const
00142       {
00143         if (tok_.size() == 0)
00144           return "no token";
00145         else if (tok_.size() == 1)
00146           return tok_.front().to_string();
00147         else
00148           {
00149             std::string s("schrod(");
00150             typename std::list<token>::const_iterator i = tok_.begin();
00151             while (i != tok_.end())
00152               s += i->to_string() + (++i != tok_.end() ? " ": "");
00153             s += ")";
00154             return s;
00155           }
00156       }
00157 
00158       bool is_just_a(const token_e tok)
00159       {
00160         return ((tok_.size() == 1) && (tok_.front().type == tok));
00161       }
00162 
00163       bool is_defined()
00164       {
00165         return (tok_.size() > 0);
00166       }
00167 
00168       bool is_schrod()
00169       {
00170         return (tok_.size() > 1);
00171       }
00172 
00173       bool is_a(const token_e tok)
00174       {
00175         for (typename std::list<token>::const_iterator i = tok_.begin();
00176              i != tok_.end();
00177              ++i)
00178           if (i->type == tok)
00179             return true;
00180         return false;
00181       }
00182 
00183       MonoidValue as_word()
00184       {
00185         for (typename std::list<token>::const_iterator i = tok_.begin();
00186              i != tok_.end();
00187              ++i)
00188           if (i->type == a_word)
00189             return i->m;
00190         unreachable("internal error in krat_exp parser");
00191         return MonoidValue();
00192       }
00193 
00194       SemiringEltValue as_weight()
00195       {
00196         for (typename std::list<token>::const_iterator i = tok_.begin();
00197              i != tok_.end();
00198              ++i)
00199           if (i->type == a_weight)
00200             return i->w;
00201         unreachable("internal error in krat_exp parser.");
00202         return SemiringEltValue();
00203       }
00204 
00205       KRatExpToken& operator=(const token_e tok)
00206       {
00207         tok_.push_back(token(tok));
00208         return *this;
00209       }
00210 
00211       KRatExpToken& operator=(const MonoidValue& word_value)
00212       {
00213         tok_.push_back(token(a_word, word_value));
00214         return *this;
00215       }
00216 
00217       KRatExpToken& operator=(const SemiringEltValue& weight_value)
00218       {
00219         tok_.push_back(token(a_weight, weight_value));
00220         return *this;
00221       }
00222 
00223       void reset()
00224       {
00225         tok_.clear();
00226       }
00227 
00228     private:
00229       std::list<token> tok_;
00230     };
00231 
00237     template <class S, class T>
00238     struct Lexer
00239     {
00240       typedef KRatExpToken
00241       <
00242         typename Element<S, T>::monoid_elt_value_t,
00243         typename Element<S, T>::semiring_elt_value_t
00244       >                                               krat_exp_token_t;
00245       typedef std::list<krat_exp_token_t>             token_stream_t;
00246 
00247       Lexer(bool trace = false) :
00248         tokens_ (),
00249         trace_ (trace),
00250         error_msg_ ("No error."),
00251         error_ (false)
00252       {
00253       }
00254 
00256       void
00257       lex_error(const std::string& msg = "Lex error.")
00258       {
00259         error_ = true;
00260         error_msg_ = msg;
00261       }
00262 
00270       void
00271       lex(const std::string& in, const Element<S, T>& e)
00272       {
00273         typedef typename Element<S, T>::monoid_elt_t       monoid_elt_t;
00274         typedef typename Element<S, T>::monoid_elt_value_t monoid_elt_value_t;
00275         typedef typename Element<S, T>::semiring_elt_t     semiring_elt_t;
00276         typedef std::string::const_iterator                iterator_t;
00277 
00278         iterator_t i = in.begin();
00279         std::set<char> escaped = misc::usual_escaped_characters();
00280         int len = 0;
00281 
00282         while (!error_ & (i != in.end()))
00283           {
00284             len = 0;
00285             krat_exp_token_t tok;
00286             switch (*i)
00287               {
00288                 // operator case.
00289               case '(' : tok = lparen;                  len = 1; break;
00290               case ')' : tok = rparen;                  len = 1; break;
00291               case '+' : tok = krat_exp_lexing::plus;   len = 1; break;
00292               case '*' : tok = e_star;                  len = 1; break;
00293               case '.' : tok = krat_exp_lexing::dot;    len = 1; break;
00294               case ' ' : tok = space;                   len = 1; break;
00295               case '0' : tok = zero;                    len = 1; break;
00296               case '1' : tok = one;                     len = 1; break;
00297               }
00298             // try word parser.
00299             iterator_t mli = i;
00300             monoid_elt_t w (e.structure().monoid());
00301             if (parse_word(w, in, mli, escaped))
00302               {
00303                 if (mli - i > 1)
00304                   tok.reset();
00305                 tok = w.value();
00306                 len = mli - i;
00307               }
00308             // try weight lexer.
00309             iterator_t wli = i;
00310             semiring_elt_t ww (e.structure().semiring());
00311             if (parse_weight(ww, in, wli))
00312               {
00313                 if (wli - i > len)
00314                   tok.reset();
00315                 if (wli - i >= len)
00316                   {
00317                     tok = ww.value();
00318                     len = wli - i;
00319                   }
00320               }
00321             if (len == 0)
00322               lex_error(std::string("Invalid token '") + *i + "'.");
00323             else
00324               tokens_.push_back(tok);
00325             i += len;
00326           }
00327       }
00328 
00330       bool
00331       error() const
00332       {
00333         return error_;
00334       }
00335 
00337       const std::string&
00338       error_msg() const
00339       {
00340         return error_msg_;
00341       }
00342 
00344       krat_exp_token_t first() const
00345       {
00346         return (tokens_.size() > 0) ? tokens_.front() :
00347           krat_exp_token_t (eof);
00348       }
00349 
00351       krat_exp_token_t second() const
00352       {
00353         return (tokens_.size() >= 2) ?
00354           *++tokens_.begin() : krat_exp_token_t (eof);
00355       }
00356 
00363       void eat()
00364       {
00365         if (tokens_.size() > 0)
00366           {
00367             if (trace_)
00368               std::cerr << "LEXER: eating '" << tokens_.front().to_string()
00369                         << "'." << std::endl;
00370             tokens_.pop_front();
00371           }
00372       }
00373 
00374       void set_trace(bool trace)
00375       {
00376         trace_ = trace;
00377       }
00378 
00379     protected:
00380       token_stream_t tokens_;
00381       bool trace_;
00382       std::string error_msg_;
00383       bool error_;
00384     };
00385 
00420     template <class S, class T>
00421     struct Parser
00422     {
00423       typedef KRatExpToken
00424       <
00425         typename Element<S, T>::monoid_elt_value_t,
00426         typename Element<S, T>::semiring_elt_value_t
00427       >                                               krat_exp_token_t;
00428       typedef typename Element<S, T>::monoid_elt_t    monoid_elt_t;
00429       typedef typename Element<S, T>::monoid_elt_value_t  monoid_elt_value_t;
00430       typedef typename monoid_elt_t::set_t            monoid_t;
00431       typedef typename Element<S, T>::semiring_elt_t          semiring_elt_t;
00432       typedef typename semiring_elt_t::set_t                  semiring_t;
00433       typedef typename Element<S, T>::semiring_elt_value_t  semiring_elt_value_t;
00434       typedef std::list<krat_exp_token_t>             token_stream_t;
00435 
00436       Parser(Lexer<S, T>& lexer, bool trace = false) :
00437         lexer_          (lexer),
00438         trace_          (trace),
00439         error_          (lexer.error()),
00440         error_msg_      (lexer.error_msg())
00441       {
00442       }
00443 
00445       void parse(Element<S, T>& exp)
00446       {
00447         if (!error_)
00448           {
00449             trace("*** START ***");
00450             try
00451               {
00452                 parse_exp(exp);
00453                 accept(eof);
00454               }
00455             catch (const std::string& s)
00456               {
00457                 error_ = true;
00458                 error_msg_ = s;
00459               }
00460             trace("*** END ***");
00461           }
00462       }
00463 
00465       bool error() const
00466       {
00467         return error_;
00468       }
00469 
00471       const std::string& error_msg() const
00472       {
00473         return error_msg_;
00474       }
00475 
00476       void set_trace(bool trace)
00477       {
00478         trace_ = trace;
00479       }
00480 
00481     protected:
00482 
00484 
00485       void
00486       trace(const std::string& msg)
00487       {
00488         if (trace_)
00489           std::cerr << "PARSER: " << msg << '.' << std::endl;
00490       }
00491 
00492       template <class Misc>
00493       void
00494       trace(const std::string& msg, const Misc& v)
00495       {
00496         if (trace_)
00497           std::cerr << "PARSER: " << msg << " (" << v << ")." << std::endl;
00498       }
00500 
00502       void parse_error(const std::string& msg = "parse_error.")
00503         throw (const std::string&)
00504       {
00505         trace("Stop on error.", msg);
00506         throw msg;
00507       }
00508 
00518       void accept(token_e tok)
00519       {
00520         krat_exp_token_t                        get_token = lexer_.first();
00521         typename krat_exp_token_t::token        expected (tok);
00522 
00523         trace("accept", expected.to_string());
00524         if (!get_token.is_a(tok))
00525           parse_error("waiting for a '" + expected.to_string() +
00526                       "' get a '" + get_token.to_string() + "'.");
00527         else
00528           lexer_.eat();
00529       }
00530 
00532       void parse_exp(Element<S, T>& exp)
00533       {
00534         trace("parse_exp: Start");
00535         parse_term (exp);
00536         while (lexer_.first().is_a(krat_exp_lexing::plus))
00537           {
00538             accept(krat_exp_lexing::plus);
00539             Element<S, T> rhs (exp.structure());
00540             parse_term(rhs);
00541             exp = exp + rhs;
00542           }
00543         trace("parse_exp: End",  exp);
00544       }
00545 
00547       void parse_term(Element<S, T>& exp)
00548       {
00549         trace("parse_term: Start");
00550         parse_right_weighted (exp);
00551         while (lexer_.first().is_a(krat_exp_lexing::dot)        ||
00552                lexer_.first().is_a(a_weight)                    ||
00553                lexer_.first().is_a(lparen)                      ||
00554                lexer_.first().is_a(one)                         ||
00555                lexer_.first().is_a(zero)                        ||
00556                lexer_.first().is_a(a_word))
00557           {
00558             if (lexer_.first().is_a(krat_exp_lexing::dot))
00559               accept(krat_exp_lexing::dot);
00560             Element<S, T> rhs (exp.structure());
00561             parse_right_weighted(rhs);
00562             exp = exp * rhs;
00563           }
00564         trace("parse_term: End",  exp);
00565       }
00566 
00568       void parse_right_weighted(Element<S, T>& exp)
00569       {
00570         trace("parse_right_weighted: Start");
00571         parse_left_weighted(exp);
00572         while (lexer_.first().is_a(space))
00573           {
00574             accept(space);
00575             krat_exp_token_t tok = lexer_.first();
00576             accept(a_weight);
00577             exp = exp * semiring_elt_t (exp.structure().semiring(), tok.as_weight());
00578           }
00579         trace("parse_right_weighted: End", exp);
00580       }
00581 
00583       void parse_left_weighted(Element<S, T>& exp)
00584       {
00585         trace("parse_left_weighted: Start");
00586         // a 2-lookahead will be sufficient to disambiguate.
00587         if (lexer_.first().is_a(a_weight) &&
00588             (!lexer_.first().is_schrod() || lexer_.second().is_a(space)))
00589           {
00590             // token must be a weight, but may be a monoid '1' or '0'.
00591             // i.e. in "1 2" or "0 2".
00592             // token may even be a word!
00593             // i.e. in '2 3' when 2 is in the alphabet
00594             semiring_elt_t w (exp.structure().semiring(), lexer_.first().as_weight());
00595             bool just_a_weight = true;
00596             Element<S, T> m (exp.structure());
00597             if (lexer_.first().is_a(zero))
00598               {
00599                 just_a_weight = false;
00600                 m = zero_as<T>::of(exp.structure());
00601               }
00602             else if (lexer_.first().is_a(one))
00603               {
00604                 just_a_weight = false;
00605                 m = identity_as<T>::of(exp.structure());
00606               }
00607             else if (lexer_.first().is_a(a_word))
00608               {
00609                 just_a_weight = false;
00610                 m = Element<S, T> (exp.structure(), lexer_.first().as_word());
00611               }
00612             accept(a_weight);
00613             accept(space);
00614             if (just_a_weight ||
00615                 !lexer_.first().is_a(a_weight) ||
00616                 lexer_.second().is_a(space))
00617               {
00618                 // Then it could be any weighted.
00619                 Element<S, T> rhs (exp.structure());
00620                 parse_left_weighted(rhs);
00621                 exp = w * rhs;
00622               }
00623             else
00624               {
00625                 // Arg ! First was not a weight, but a monoid value !
00626                 w = lexer_.first().as_weight();
00627                 accept(a_weight);
00628                 exp = m * w;
00629               }
00630           }
00631         else
00632           /* !lexer_.first().is_a(a_weight)  ||
00633              (lexer_.first.is_schrod() && !lexer_.second().is_a(space)) */
00634           parse_stared(exp);
00635         trace("parse_left_weighted: End",  exp);
00636       }
00637 
00639       void parse_stared(Element<S, T>& exp)
00640       {
00641         trace("parse_stared: Start");
00642         parse_factor(exp);
00643         while (lexer_.first().is_a(e_star))
00644           {
00645             accept(e_star);
00646             exp.star();
00647           }
00648         trace("parse_stared: End",  exp);
00649       }
00650 
00652       void parse_factor(Element<S, T>& exp)
00653       {
00654         trace("parse_factor: Start");
00655         if (lexer_.first().is_a(one))
00656           {
00657             accept(one);
00658             exp = identity_as<T>::of(exp.structure());
00659           }
00660         else if (lexer_.first().is_a(zero))
00661           {
00662             accept(zero);
00663             exp = zero_as<T>::of(exp.structure());
00664           }
00665         else if (lexer_.first().is_a(a_word))
00666           {
00667             exp = monoid_elt_t (exp.structure().monoid(), lexer_.first().as_word());
00668             accept(a_word);
00669           }
00670         else if (lexer_.first().is_a(lparen))
00671           {
00672             accept(lparen);
00673             parse_exp(exp);
00674             accept(rparen);
00675           }
00676         else
00677           parse_error("waiting for a factor get a " +
00678                       lexer_.first().to_string());
00679         trace("parse_factor: End",  exp);
00680       }
00681 
00682       Lexer<S, T>&      lexer_;
00683       bool              trace_;
00684       bool              error_;
00685       std::string       error_msg_;
00686     };
00687 
00688     template <class S, class T>
00689     std::pair<bool, std::string>
00690     parse(const std::string& from,
00691           Element<S, T>& exp,
00692           bool lex_trace,
00693           bool parse_trace)
00694     {
00695       Lexer<S, T> lexer(lex_trace);
00696       lexer.lex(from, exp);
00697       Parser<S, T> parser(lexer, parse_trace);
00698       parser.parse(exp);
00699       return std::make_pair(parser.error(), parser.error_msg());
00700     }
00701 
00705   } // algebra
00706 
00707 } // vcsn
00708 
00709 #endif // ! VCSN_ALGEBRA_IMPLEMENTATION_SERIES_KRAT_EXP_PARSER_HXX

Generated on Sun Jul 29 19:35:24 2007 for Vaucanson by  doxygen 1.5.2