BESUtil.cc

Go to the documentation of this file.
00001 // BESUtil.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 //
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025 
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include "config.h"
00034 
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037 
00038 #if HAVE_UNISTD_H
00039 #include <unistd.h>
00040 #endif
00041 
00042 #include <cerrno>
00043 #include <cstring>
00044 #include <cstdlib>
00045 #include <cstdio>
00046 #include <sstream>
00047 #include <iostream>
00048 
00049 using std::istringstream ;
00050 using std::cout ;
00051 using std::endl ;
00052 using std::snprintf ;
00053 
00054 #include "BESUtil.h"
00055 #include "BESForbiddenError.h"
00056 #include "BESNotFoundError.h"
00057 #include "BESInternalError.h"
00058 
00059 #define CRLF "\r\n"
00060 
00065 void
00066 BESUtil::set_mime_text( ostream &strm )
00067 {
00068     strm << "HTTP/1.0 200 OK" << CRLF ;
00069     strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
00070 
00071     const time_t t = time(0);
00072     strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
00073     strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
00074 
00075     strm << "Content-Type: text/plain" << CRLF ;
00076     // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
00077     strm << "Content-Description: unknown" << CRLF ;
00078     strm << CRLF ;
00079 }
00080 
00085 void
00086 BESUtil::set_mime_html( ostream &strm )
00087 {
00088     strm << "HTTP/1.0 200 OK" << CRLF ;
00089     strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
00090 
00091     const time_t t = time(0);
00092     strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
00093     strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
00094 
00095     strm << "Content-type: text/html" << CRLF ;
00096     // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
00097     strm << "Content-Description: unknown" << CRLF ;
00098     strm << CRLF ;
00099 }
00100 
00101 // Return a MIME rfc-822 date. The grammar for this is:
00102 //       date-time   =  [ day "," ] date time        ; dd mm yy
00103 //                                                   ;  hh:mm:ss zzz
00104 //
00105 //       day         =  "Mon"  / "Tue" /  "Wed"  / "Thu"
00106 //                   /  "Fri"  / "Sat" /  "Sun"
00107 //
00108 //       date        =  1*2DIGIT month 2DIGIT        ; day month year
00109 //                                                   ;  e.g. 20 Jun 82
00110 //                   NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
00111 //
00112 //       month       =  "Jan"  /  "Feb" /  "Mar"  /  "Apr"
00113 //                   /  "May"  /  "Jun" /  "Jul"  /  "Aug"
00114 //                   /  "Sep"  /  "Oct" /  "Nov"  /  "Dec"
00115 //
00116 //       time        =  hour zone                    ; ANSI and Military
00117 //
00118 //       hour        =  2DIGIT ":" 2DIGIT [":" 2DIGIT]
00119 //                                                   ; 00:00:00 - 23:59:59
00120 //
00121 //       zone        =  "UT"  / "GMT"                ; Universal Time
00122 //                                                   ; North American : UT
00123 //                   /  "EST" / "EDT"                ;  Eastern:  - 5/ - 4
00124 //                   /  "CST" / "CDT"                ;  Central:  - 6/ - 5
00125 //                   /  "MST" / "MDT"                ;  Mountain: - 7/ - 6
00126 //                   /  "PST" / "PDT"                ;  Pacific:  - 8/ - 7
00127 //                   /  1ALPHA                       ; Military: Z = UT;
00128 //                                                   ;  A:-1; (J not used)
00129 //                                                   ;  M:-12; N:+1; Y:+12
00130 //                   / ( ("+" / "-") 4DIGIT )        ; Local differential
00131 //                                                   ;  hours+min. (HHMM)
00132 
00133 static const char *days[]={"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
00134 static const char *months[]={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
00135                         "Aug", "Sep", "Oct", "Nov", "Dec"};
00136 
00146 string
00147 BESUtil::rfc822_date(const time_t t)
00148 {
00149     struct tm *stm = gmtime(&t);
00150     char d[256];
00151 
00152     snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
00153             stm->tm_mday, months[stm->tm_mon],
00154             1900 + stm->tm_year,
00155             stm->tm_hour, stm->tm_min, stm->tm_sec);
00156     d[255] = '\0';
00157     return string(d);
00158 }
00159 
00160 string
00161 BESUtil::unhexstring( string s )
00162 {
00163     int val;
00164     istringstream ss( s ) ;
00165     ss >> std::hex >> val;
00166     char tmp_str[2];
00167     tmp_str[0] = static_cast<char>(val);
00168     tmp_str[1] = '\0';
00169     return string(tmp_str);
00170 }
00171 
00172 // I modified this to mirror the version in libdap. The change allows several
00173 // escape sequences to by listed in 'except'. jhrg 2/18/09
00174 string
00175 BESUtil::www2id(const string &in, const string &escape, const string &except)
00176 {
00177     string::size_type i = 0;
00178     string res = in;
00179     while ((i = res.find_first_of(escape, i)) != string::npos) {
00180         if (except.find(res.substr(i, 3)) != string::npos) {
00181             i += 3;
00182             continue;
00183         }
00184         res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00185     }
00186 
00187     return res;
00188 }
00189 
00190 string
00191 BESUtil::lowercase( const string &s )
00192 {
00193     string return_string = s ;
00194     for( int j = 0; j < return_string.length(); j++ )
00195     {
00196         return_string[j] = (char)tolower( return_string[j] ) ;
00197     }
00198 
00199     return return_string ;
00200 }
00201 
00202 string
00203 BESUtil::unescape( const string &s )
00204 {
00205     bool done = false ;
00206     string::size_type index = 0 ;
00207     string::size_type new_index = 0 ;
00208     string new_str ;
00209     while( !done )
00210     {
00211         string::size_type bs = s.find( '\\', index ) ;
00212         if( bs == string::npos )
00213         {
00214             new_str += s.substr( index, s.length() - index ) ;
00215             done = true ;
00216         }
00217         else
00218         {
00219             new_str += s.substr( index, bs - index ) ;
00220             new_str += s[bs+1] ;
00221             index = bs+2 ;
00222         }
00223     }
00224 
00225     return new_str ;
00226 }
00227 
00249 void
00250 BESUtil::check_path( const string &path,
00251                      const string &root,
00252                      bool follow_sym_links )
00253 {
00254     // if nothing is passed in path, then the path checks out since root is
00255     // assumed to be valid.
00256     if( path == "" )
00257         return ;
00258 
00259     // make sure there are no ../ in the directory, backing up in any way is
00260     // not allowed.
00261     string::size_type dotdot = path.find( ".." ) ;
00262     if( dotdot != string::npos )
00263     {
00264         string s = (string)"You are not allowed to access the node " + path;
00265         throw BESForbiddenError( s, __FILE__, __LINE__ ) ;
00266     }
00267 
00268     // What I want to do is to take each part of path and check to see if it
00269     // is a symbolic link and it is accessible. If everything is ok, add the
00270     // next part of the path.
00271     bool done = false ;
00272 
00273     // what is remaining to check
00274     string rem = path ;
00275     if( rem[0] == '/' )
00276         rem = rem.substr( 1, rem.length() - 1 ) ;
00277     if( rem[rem.length()-1] == '/' )
00278         rem = rem.substr( 0, rem.length() - 1 ) ;
00279 
00280     // full path of the thing to check
00281     string fullpath = root ;
00282     if( fullpath[fullpath.length()-1] == '/' )
00283     {
00284         fullpath = fullpath.substr( 0, fullpath.length() - 1 ) ;
00285     }
00286 
00287     // path checked so far
00288     string checked ;
00289 
00290     while( !done )
00291     {
00292         size_t slash = rem.find( '/' ) ;
00293         if( slash == string::npos )
00294         {
00295             fullpath = fullpath + "/" + rem ;
00296             checked = checked + "/" + rem ;
00297             done = true ;
00298         }
00299         else
00300         {
00301             fullpath = fullpath + "/" + rem.substr( 0, slash ) ;
00302             checked = checked + "/" + rem.substr( 0, slash ) ;
00303             rem = rem.substr( slash + 1, rem.length() - slash ) ;
00304         }
00305 
00306         if( !follow_sym_links )
00307         {
00308             struct stat buf;
00309             int statret = lstat( fullpath.c_str(), &buf ) ;
00310             if( statret == -1 )
00311             {
00312                 int errsv = errno ;
00313                 // stat failed, so not accessible. Get the error string,
00314                 // store in error, and throw exception
00315                 char *s_err = strerror( errsv ) ;
00316                 string error = "Unable to access node " + checked + ": " ;
00317                 if( s_err )
00318                 {
00319                     error = error + s_err ;
00320                 }
00321                 else
00322                 {
00323                     error = error + "unknow access error" ;
00324                 }
00325                 // ENOENT means that the node wasn't found. Otherise, access
00326                 // is denied for some reason
00327                 if( errsv == ENOENT )
00328                 {
00329                     throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
00330                 }
00331                 else
00332                 {
00333                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00334                 }
00335             }
00336             else
00337             {
00338                 // lstat was successful, now check if sym link
00339                 if( S_ISLNK( buf.st_mode ) )
00340                 {
00341                     string error = "You do not have permission to access "
00342                                    + checked ;
00343                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00344                 }
00345             }
00346         }
00347         else
00348         {
00349             // just do a stat and see if we can access the thing. If we
00350             // can't, get the error information and throw an exception
00351             struct stat buf ;
00352             int statret = stat( fullpath.c_str(), &buf ) ;
00353             if( statret == -1 )
00354             {
00355                 int errsv = errno ;
00356                 // stat failed, so not accessible. Get the error string,
00357                 // store in error, and throw exception
00358                 char *s_err = strerror( errsv ) ;
00359                 string error = "Unable to access node " + checked + ": " ;
00360                 if( s_err )
00361                 {
00362                     error = error + s_err ;
00363                 }
00364                 else
00365                 {
00366                     error = error + "unknow access error" ;
00367                 }
00368                 // ENOENT means that the node wasn't found. Otherise, access
00369                 // is denied for some reason
00370                 if( errsv == ENOENT )
00371                 {
00372                     throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
00373                 }
00374                 else
00375                 {
00376                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00377                 }
00378             }
00379         }
00380     }
00381 }
00382 
00383 char *
00384 BESUtil::fastpidconverter( char *buf, int base )
00385 {
00386     return fastpidconverter( getpid(), buf, base ) ;
00387 }
00388 
00389 char *
00390 BESUtil::fastpidconverter(
00391       long val,                                 /* value to be converted */
00392       char *buf,                                /* output string         */
00393       int base)                                 /* conversion base       */
00394 {
00395       ldiv_t r;                                 /* result of val / base  */
00396 
00397       if (base > 36 || base < 2)          /* no conversion if wrong base */
00398       {
00399             *buf = '\0';
00400             return buf;
00401       }
00402       if (val < 0)
00403             *buf++ = '-';
00404       r = ldiv (labs(val), base);
00405 
00406       /* output digits of val/base first */
00407 
00408       if (r.quot > 0)
00409             buf = fastpidconverter ( r.quot, buf, base);
00410       /* output last digit */
00411 
00412       *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int)r.rem];
00413       *buf   = '\0';
00414       return buf;
00415 }
00416 
00417 void
00418 BESUtil::removeLeadingAndTrailingBlanks( string &key )
00419 {
00420     if( !key.empty() )
00421     {
00422         string::size_type first = key.find_first_not_of( " \t\n\r" ) ;
00423         string::size_type last = key.find_last_not_of( " \t\n\r" ) ;
00424         if( first == string::npos ) key = "" ;
00425         else
00426         {
00427             string::size_type num = last - first + 1 ;
00428             string new_key = key.substr( first, num ) ;
00429             key = new_key ;
00430         }
00431     }
00432 }
00433 
00434 string
00435 BESUtil::entity( char c )
00436 {
00437     switch( c )
00438     {
00439         case '>': return "&gt;";
00440         case '<': return "&lt;";
00441         case '&': return "&amp;";
00442         case '\'': return "&apos;";
00443         case '\"': return "&quot;";
00444     }
00445 }
00446 
00453 string
00454 BESUtil::id2xml( string in, const string &not_allowed )
00455 {
00456     string::size_type i = 0 ;
00457 
00458     while( ( i = in.find_first_of( not_allowed, i ) ) != string::npos )
00459     {
00460         in.replace( i, 1, entity( in[i] ) ) ;
00461         i++ ;
00462     }
00463 
00464     return in ;
00465 }
00466 
00472 string
00473 BESUtil::xml2id(string in)
00474 {
00475     string::size_type i = 0;
00476 
00477     while ((i = in.find("&gt;", i)) != string::npos)
00478         in.replace(i, 4, ">");
00479 
00480     i = 0;
00481     while ((i = in.find("&lt;", i)) != string::npos)
00482         in.replace(i, 4, "<");
00483 
00484     i = 0;
00485     while ((i = in.find("&amp;", i)) != string::npos)
00486         in.replace(i, 5, "&");
00487 
00488     i = 0;
00489     while ((i = in.find("&apos;", i)) != string::npos)
00490         in.replace(i, 6, "'");
00491 
00492     i = 0;
00493     while ((i = in.find("&quot;", i)) != string::npos)
00494         in.replace(i, 6, "\"");
00495 
00496     return in;
00497 }
00498 
00512 void
00513 BESUtil::explode( char delim, const string &str, list<string> &values )
00514 {
00515     std::string::size_type start = 0 ;
00516     std::string::size_type qstart = 0 ;
00517     std::string::size_type adelim = 0 ;
00518     std::string::size_type aquote = 0 ;
00519     bool done = false ;
00520     while( !done )
00521     {
00522         string aval ;
00523         if( str[start] == '"' )
00524         {
00525             bool endquote = false ;
00526             qstart = start+1 ;
00527             while( !endquote )
00528             {
00529                 aquote = str.find( '"', qstart ) ;
00530                 if( aquote == string::npos )
00531                 {
00532                     string currval = str.substr( start, str.length() - start ) ;
00533                     string err = "BESUtil::explode - No end quote after value "
00534                                  + currval ;
00535                     throw BESInternalError( err, __FILE__, __LINE__ ) ;
00536                 }
00537                 // could be an escaped escape character and an escaped
00538                 // quote, or an escaped escape character and a quote
00539                 if( str[aquote-1] == '\\' )
00540                 {
00541                     if( str[aquote-2] == '\\' )
00542                     {
00543                         endquote = true ;
00544                         qstart = aquote + 1 ;
00545                     }
00546                     else
00547                     {
00548                         qstart = aquote+1 ;
00549                     }
00550                 }
00551                 else
00552                 {
00553                     endquote = true ;
00554                     qstart = aquote + 1 ;
00555                 }
00556             }
00557             if( str[qstart] != delim && qstart != str.length() )
00558             {
00559                 string currval = str.substr( start, qstart - start ) ;
00560                 string err = "BESUtil::explode - No delim after end quote "
00561                              + currval ;
00562                 throw BESInternalError( err, __FILE__, __LINE__ ) ;
00563             }
00564             if( qstart == str.length() )
00565             {
00566                 adelim = string::npos ;
00567             }
00568             else
00569             {
00570                 adelim = qstart ;
00571             }
00572         }
00573         else
00574         {
00575             adelim = str.find( delim, start ) ;
00576         }
00577         if( adelim == string::npos )
00578         {
00579             aval = str.substr( start, str.length() - start ) ;
00580             done = true ;
00581         }
00582         else
00583         {
00584             aval = str.substr( start, adelim - start ) ;
00585         }
00586         values.push_back( aval ) ;
00587         start = adelim + 1 ;
00588         if( start == str.length() )
00589         {
00590             done = true ;
00591         }
00592     }
00593 }
00594 
00605 string
00606 BESUtil::implode( const list<string> &values, char delim )
00607 {
00608     string result ;
00609     list<string>::const_iterator i = values.begin() ;
00610     list<string>::const_iterator e = values.end() ;
00611     bool first = true ;
00612     string::size_type d; // = string::npos ;
00613     for( ; i != e; i++ )
00614     {
00615         if( !first ) result += delim ;
00616         d = (*i).find( delim ) ;
00617         if( d != string::npos && (*i)[0] != '"' )
00618         {
00619             string err = (string)"BESUtil::implode - delimiter exists in value "
00620                          + (*i) ;
00621             throw BESInternalError( err, __FILE__, __LINE__ ) ;
00622         }
00623         //d = string::npos ;
00624         result += (*i) ;
00625         first = false ;
00626     }
00627     return result ;
00628 }
00629 

Generated on 18 Feb 2010 for OPeNDAP Hyrax Back End Server (BES) by  doxygen 1.6.1