BESCache.cc

Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2007 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include <unistd.h>
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <dirent.h>
00037 #include <stdio.h>
00038 #include <fcntl.h>
00039 #include <errno.h>
00040 
00041 #include <map>
00042 #include <iostream>
00043 #include <sstream>
00044 
00045 using std::multimap ;
00046 using std::pair ;
00047 using std::greater ;
00048 using std::endl ;
00049 
00050 #include "BESCache.h"
00051 #include "TheBESKeys.h"
00052 #include "BESContainerStorageException.h"
00053 #include "BESDebug.h"
00054 
00055 #define BES_CACHE_CHAR '#'
00056 
00057 typedef struct _cache_entry
00058 {
00059     string name ;
00060     int size ;
00061 } cache_entry ;
00062 
00063 void 
00064 BESCache::check_ctor_params()
00065 {
00066     if( _cache_dir.empty() )
00067     {
00068         string err = "The cache dir was not specified, must be non-empty" ;
00069         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00070     }
00071 
00072     struct stat buf;
00073     int statret = stat( _cache_dir.c_str(), &buf ) ;
00074     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00075     {
00076         string err = "The cache dir " + _cache_dir + " does not exist" ;
00077         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00078     }
00079 
00080     if( _prefix.empty() )
00081     {
00082         string err = "The prefix was not specified, must be non-empty" ;
00083         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00084     }
00085 
00086     if( _cache_size == 0 )
00087     {
00088         string err = "The cache size was not specified, must be non-zero" ;
00089         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00090     }
00091 }
00092 
00102 BESCache::BESCache( const string &cache_dir,
00103                     const string &prefix,
00104                     unsigned int size )
00105     : _cache_dir( cache_dir ),
00106       _prefix( prefix ),
00107       _cache_size( size ),
00108       _lock_fd( -1 )
00109 {
00110     check_ctor_params(); // Throws BESContainerStorageException on error.
00111 }
00112 
00127 BESCache::BESCache( BESKeys &keys,
00128                     const string &cache_dir_key,
00129                     const string &prefix_key,
00130                     const string &size_key )
00131     : _cache_size( 0 ),
00132       _lock_fd( -1 )
00133 {
00134     bool found = false ;
00135     _cache_dir = keys.get_key( cache_dir_key, found ) ;
00136     if( !found )
00137     {
00138         string err = "The cache dir key " + cache_dir_key
00139                      + " was not found" ;
00140         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00141     }
00142 
00143     found = false ;
00144     _prefix = keys.get_key( prefix_key, found ) ;
00145     if( !found )
00146     {
00147         string err = "The prefix key " + prefix_key
00148                      + " was not found" ;
00149         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00150     }
00151 
00152     found = false ;
00153     string _cache_size_str = keys.get_key( size_key, found ) ;
00154     if( !found )
00155     {
00156         string err = "The size key " + size_key
00157                      + " was not found" ;
00158         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00159     }
00160 
00161 
00162     std::istringstream is( _cache_size_str ) ;
00163     is >> _cache_size ;
00164 
00165     check_ctor_params(); // Throws BESContainerStorageException on error.
00166 }
00167 
00174 bool
00175 BESCache::lock( unsigned int retry, unsigned int num_tries )
00176 {
00177     // make sure we aren't retrying too many times
00178     if( num_tries > MAX_LOCK_TRIES )
00179         num_tries = MAX_LOCK_TRIES ;
00180     if( retry > MAX_LOCK_RETRY_MS )
00181         retry = MAX_LOCK_RETRY_MS ;
00182 
00183     bool got_lock = true ;
00184     if( _lock_fd == -1 )
00185     {
00186         string lock_file = _cache_dir + "/lock" ;
00187         unsigned int tries = 0 ;
00188         _lock_fd = open( lock_file.c_str(),
00189                          O_CREAT | O_EXCL,
00190                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00191         while( _lock_fd < 0 && got_lock )
00192         {
00193             tries ++ ;
00194             if( tries > num_tries )
00195             {
00196                 _lock_fd = -1 ;
00197                 got_lock = false ;
00198                 /*
00199                 string err = "Unable to lock the cache directory "
00200                              + _cache_dir + ", timed out" ;
00201                 throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00202                 */
00203             }
00204             else
00205             {
00206                 usleep( retry ) ;
00207                 _lock_fd = open( lock_file.c_str(),
00208                                  O_CREAT | O_EXCL,
00209                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00210             }
00211         }
00212     }
00213     else
00214     {
00215         // This would be a programming error, or we've gotten into a
00216         // situation where the lock is lost. Lock has been called on the
00217         // same cache object twice in a row without an unlock being called.
00218         string err = "The cache dir " + _cache_dir + " is already locked" ;
00219         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00220     }
00221 
00222     return got_lock ;
00223 }
00224 
00231 bool
00232 BESCache::unlock()
00233 {
00234     // if we call unlock twice in a row, does it matter? I say no, just say
00235     // that it is unlocked.
00236     bool unlocked = true ;
00237     if( _lock_fd != -1 )
00238     {
00239         string lock_file = _cache_dir + "/lock" ;
00240         close( _lock_fd ) ;
00241         unlink( lock_file.c_str() ) ;
00242     }
00243 
00244     _lock_fd = -1 ;
00245 
00246     return unlocked ;
00247 }
00248 
00262 bool
00263 BESCache::is_cached( const string &src, string &target )
00264 {
00265     bool is_it = true ;
00266     string tmp_target = src ;
00267 
00268     // Create the file that would be created in the cache directory
00269     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00270     if( tmp_target.at(0) == '/' )
00271     {
00272         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00273     }
00274     string::size_type slash = 0 ;
00275     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00276     {
00277         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00278     }
00279     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00280     if( last_dot != string::npos )
00281     {
00282         tmp_target = tmp_target.substr( 0, last_dot ) ;
00283     }
00284 
00285     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00286 
00287     // Determine if the target file is already in the cache or not
00288     struct stat buf;
00289     int statret = stat( target.c_str(), &buf ) ;
00290     if( statret != 0 )
00291     {
00292         is_it = false ;
00293     }
00294 
00295     return is_it ;
00296 }
00297 
00306 void
00307 BESCache::purge( )
00308 {
00309     int max_size = _cache_size * 1048576 ; // Bytes/Meg
00310     struct stat buf;
00311     int size = 0 ; // total size of all cached files
00312     time_t curr_time = time( NULL ) ; // grab the current time so we can
00313                                       // determine the oldest file
00314     // map of time,entry values
00315     multimap<double,cache_entry,greater<double> > contents ;
00316 
00317     // the prefix is actually the specified prefix plus the cache char '#'
00318     string match_prefix = _prefix + BES_CACHE_CHAR ;
00319 
00320     // go through the cache directory and collect all of the files that
00321     // start with the matching prefix
00322     DIR *dip = opendir( _cache_dir.c_str() ) ;
00323     if( dip != NULL )
00324     {
00325         struct dirent *dit;
00326         while( ( dit = readdir( dip ) ) != NULL )
00327         {
00328             string dirEntry = dit->d_name ;
00329             if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
00330             {
00331                 // Now that we have found a match we want to get the size of
00332                 // the file and the last access time from the file.
00333                 string fullPath = _cache_dir + "/" + dirEntry ;
00334                 int statret = stat( fullPath.c_str(), &buf ) ;
00335                 if( statret == 0 )
00336                 {
00337                     size += buf.st_size ;
00338 
00339                     // Find out how old the file is
00340                     time_t file_time = buf.st_atime ;
00341                     // I think we can use the access time without the diff,
00342                     // since it's the relative ages that determine when to
00343                     // delete a file. Good idea to use the access time so
00344                     // recently used (read) files will linger. jhrg 5/9/07
00345                     double time_diff = difftime( curr_time, file_time ) ;
00346                     cache_entry entry ;
00347                     entry.name = fullPath ;
00348                     entry.size = buf.st_size ;
00349                     contents.insert( pair<double,cache_entry>( time_diff, entry ) );
00350                 }
00351             }
00352         }
00353 
00354         // We're done looking in the directory, close it
00355         closedir( dip ) ;
00356 
00357         if( BESISDEBUG( "bes" ) )
00358         {
00359             BESDEBUG( "bes", endl << "BEFORE" << endl )
00360             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00361             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00362             for( ; ti != te; ti++ )
00363             {
00364                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00365             }
00366             BESDEBUG( "bes", endl )
00367         }
00368 
00369         // if the size of files is greater than max allowed then we need to
00370         // purge the cache directory. Keep going until the size is less than
00371         // the max.
00372         multimap<double,cache_entry,greater<double> >::iterator i ;
00373         if( size > max_size )
00374         {
00375             // Maybe change this to size + (fraction of max_size) > max_size?
00376             // jhrg 5/9/07
00377             while( size > max_size )
00378             {
00379                 i = contents.begin() ;
00380                 BESDEBUG( "bes", "BESCache::purge - removing " << (*i).second.name << endl )
00381                 if( remove( (*i).second.name.c_str() ) != 0 )
00382                 {
00383                     char *s_err = strerror( errno ) ;
00384                     string err = "Unable to remove the file "
00385                                  + (*i).second.name + " from the cache: " ;
00386                     if( s_err )
00387                     {
00388                         err.append( s_err ) ;
00389                     }
00390                     else
00391                     {
00392                         err.append( "Unknown error" ) ;
00393                     }
00394                     throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00395                 }
00396                 size -= (*i).second.size ;
00397                 contents.erase( i ) ;
00398             }
00399         }
00400 
00401         if( BESISDEBUG( "bes" ) )
00402         {
00403             BESDEBUG( "bes", endl << "AFTER" << endl )
00404             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00405             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00406             for( ; ti != te; ti++ )
00407             {
00408                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00409             }
00410         }
00411     }
00412     else
00413     {
00414         string err = "Unable to open cache directory " + _cache_dir ;
00415         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00416     }
00417 }
00418 
00426 void
00427 BESCache::dump( ostream &strm ) const
00428 {
00429     strm << BESIndent::LMarg << "BESCache::dump - ("
00430                              << (void *)this << ")" << endl ;
00431     BESIndent::Indent() ;
00432     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00433     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00434     strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
00435     BESIndent::UnIndent() ;
00436 }
00437 

Generated on Wed Jan 2 06:00:39 2008 for OPeNDAP Back End Server (BES) by  doxygen 1.5.4