libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #include "config.h" 00027 00028 //#define DODS_DEBUG 00029 //#define DODS_DEBUG2 00030 00031 #include <pthread.h> 00032 #include <limits.h> 00033 #include <unistd.h> // for stat 00034 #include <sys/types.h> // for stat and mkdir 00035 #include <sys/stat.h> 00036 00037 #include <cstring> 00038 #include <iostream> 00039 #include <sstream> 00040 #include <algorithm> 00041 #include <iterator> 00042 #include <set> 00043 00044 #include "Error.h" 00045 #include "InternalErr.h" 00046 #include "ResponseTooBigErr.h" 00047 #ifndef WIN32 00048 #include "SignalHandler.h" 00049 #endif 00050 #include "HTTPCacheInterruptHandler.h" 00051 #include "HTTPCacheTable.h" 00052 #include "HTTPCache.h" 00053 00054 #include "util_mit.h" 00055 #include "debug.h" 00056 00057 using namespace std; 00058 00059 namespace libdap { 00060 00061 HTTPCache *HTTPCache::_instance = 0; 00062 00063 // instance_mutex is used to ensure that only one instance is created. 00064 // That is, it protects the body of the HTTPCache::instance() method. This 00065 // mutex is initialized from within the static function once_init_routine() 00066 // and the call to that takes place using pthread_once_init() where the mutex 00067 // once_block is used to protect that call. All of this ensures that no matter 00068 // how many threads call the instance() method, only one instance is ever 00069 // made. 00070 static pthread_mutex_t instance_mutex; 00071 static pthread_once_t once_block = PTHREAD_ONCE_INIT; 00072 00073 #ifdef WIN32 00074 #include <direct.h> 00075 #include <time.h> 00076 #include <fcntl.h> 00077 #define MKDIR(a,b) _mkdir((a)) 00078 #define UMASK(a) _umask((a)) 00079 #define REMOVE(a) remove((a)) 00080 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE) 00081 #define DIR_SEPARATOR_CHAR '\\' 00082 #define DIR_SEPARATOR_STR "\\" 00083 #else 00084 #define MKDIR(a,b) mkdir((a), (b)) 00085 #define UMASK(a) umask((a)) 00086 #define REMOVE(a) remove((a)) 00087 #define MKSTEMP(a) mkstemp((a)) 00088 #define DIR_SEPARATOR_CHAR '/' 00089 #define DIR_SEPARATOR_STR "/" 00090 #endif 00091 00092 #ifdef WIN32 00093 #define CACHE_LOC "\\tmp\\" 00094 #define CACHE_ROOT "dods-cache\\" 00095 #else 00096 #define CACHE_LOC "/tmp/" 00097 #define CACHE_ROOT "dods-cache/" 00098 #endif 00099 #define CACHE_INDEX ".index" 00100 #define CACHE_LOCK ".lock" 00101 #define CACHE_META ".meta" 00102 //#define CACHE_EMPTY_ETAG "@cache@" 00103 00104 #define NO_LM_EXPIRATION 24*3600 // 24 hours 00105 00106 #define DUMP_FREQUENCY 10 // Dump index every x loads 00107 00108 #define MEGA 0x100000L 00109 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 00110 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 00111 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 00112 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 00113 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 00114 00115 static void 00116 once_init_routine() 00117 { 00118 int status; 00119 status = INIT(&instance_mutex); 00120 00121 if (status != 0) 00122 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00123 } 00124 00153 HTTPCache * 00154 HTTPCache::instance(const string &cache_root, bool force) 00155 { 00156 LOCK(&instance_mutex); 00157 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" 00158 << "... "); 00159 00160 try { 00161 if (!_instance) { 00162 _instance = new HTTPCache(cache_root, force); 00163 00164 DBG(cerr << "New instance: " << _instance << ", cache root: " 00165 << _instance->d_cache_root << endl); 00166 00167 atexit(delete_instance); 00168 00169 #ifndef WIN32 00170 // Register the interrupt handler. If we've already registered 00171 // one, barf. If this becomes a problem, hack SignalHandler so 00172 // that we can chain these handlers... 02/10/04 jhrg 00173 // 00174 // Technically we're leaking memory here. However, since this 00175 // class is a singleton, we know that only three objects will 00176 // ever be created and they will all exist until the process 00177 // exits. We can let this slide... 02/12/04 jhrg 00178 EventHandler *old_eh = SignalHandler::instance()->register_handler 00179 (SIGINT, new HTTPCacheInterruptHandler); 00180 if (old_eh) { 00181 SignalHandler::instance()->register_handler(SIGINT, old_eh); 00182 throw SignalHandlerRegisteredErr( 00183 "Could not register event handler for SIGINT without superseding an existing one."); 00184 } 00185 00186 old_eh = SignalHandler::instance()->register_handler 00187 (SIGPIPE, new HTTPCacheInterruptHandler); 00188 if (old_eh) { 00189 SignalHandler::instance()->register_handler(SIGPIPE, old_eh); 00190 throw SignalHandlerRegisteredErr( 00191 "Could not register event handler for SIGPIPE without superseding an existing one."); 00192 } 00193 00194 old_eh = SignalHandler::instance()->register_handler 00195 (SIGTERM, new HTTPCacheInterruptHandler); 00196 if (old_eh) { 00197 SignalHandler::instance()->register_handler(SIGTERM, old_eh); 00198 throw SignalHandlerRegisteredErr( 00199 "Could not register event handler for SIGTERM without superseding an existing one."); 00200 } 00201 #endif 00202 } 00203 } 00204 catch (...) { 00205 DBG2(cerr << "The constructor threw an Error!" << endl); 00206 UNLOCK(&instance_mutex); 00207 throw; 00208 } 00209 00210 UNLOCK(&instance_mutex); 00211 DBGN(cerr << "returning " << hex << _instance << dec << endl); 00212 00213 return _instance; 00214 } 00215 00219 void 00220 HTTPCache::delete_instance() 00221 { 00222 DBG(cerr << "Entering delete_instance()..." << endl); 00223 if (HTTPCache::_instance) { 00224 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl); 00225 delete HTTPCache::_instance; 00226 HTTPCache::_instance = 0; 00227 } 00228 00229 DBG(cerr << "Exiting delete_instance()" << endl); 00230 } 00231 00246 HTTPCache::HTTPCache(string cache_root, bool force) : 00247 d_locked_open_file(0), 00248 d_cache_enabled(false), 00249 d_cache_protected(false), 00250 d_expire_ignored(false), 00251 d_always_validate(false), 00252 d_total_size(CACHE_TOTAL_SIZE * MEGA), 00253 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), 00254 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT), 00255 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), 00256 d_default_expiration(NO_LM_EXPIRATION), 00257 d_max_age(-1), 00258 d_max_stale(-1), 00259 d_min_fresh(-1), 00260 d_http_cache_table(0) 00261 { 00262 DBG(cerr << "Entering the constructor for " << this << "... "); 00263 00264 int status = pthread_once(&once_block, once_init_routine); 00265 if (status != 0) 00266 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00267 00268 INIT(&d_cache_mutex); 00269 00270 // This used to throw an Error object if we could not get the 00271 // single user lock. However, that results in an invalid object. It's 00272 // better to have an instance that has default values. If we cannot get 00273 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg 00274 // 00275 // I fixed this block so that the cache root is set before we try to get 00276 // the single user lock. That was the fix for bug #661. To make that 00277 // work, I had to move the call to create_cache_root out of 00278 // set_cache_root(). 09/08/03 jhrg 00279 00280 set_cache_root(cache_root); 00281 int block_size; 00282 00283 if (!get_single_user_lock(force)) 00284 throw Error("Could not get single user lock for the cache"); 00285 00286 #ifdef WIN32 00287 // Windows is unable to provide us this information. 4096 appears 00288 // a best guess. It is likely to be in the range [2048, 8192] on 00289 // windows, but will the level of truth of that statement vary over 00290 // time ? 00291 block_size = 4096; 00292 #else 00293 struct stat s; 00294 if (stat(cache_root.c_str(), &s) == 0) 00295 block_size = s.st_blksize; 00296 else 00297 throw Error("Could not set file system block size."); 00298 #endif 00299 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size); 00300 d_cache_enabled = true; 00301 00302 DBGN(cerr << "exiting" << endl); 00303 } 00304 00317 HTTPCache::~HTTPCache() 00318 { 00319 DBG(cerr << "Entering the destructor for " << this << "... "); 00320 00321 try { 00322 if (startGC()) 00323 perform_garbage_collection(); 00324 00325 d_http_cache_table->cache_index_write(); 00326 } 00327 catch (Error &e) { 00328 // If the cache index cannot be written, we've got problems. However, 00329 // unless we're debugging, still free up the cache table in memory. 00330 // How should we let users know they cache index is not being 00331 // written?? 10/03/02 jhrg 00332 DBG(cerr << e.get_error_message() << endl); 00333 } 00334 00335 delete d_http_cache_table; 00336 00337 release_single_user_lock(); 00338 00339 DBGN(cerr << "exiting destructor." << endl); 00340 DESTROY(&d_cache_mutex); 00341 } 00342 00343 00347 00351 bool 00352 HTTPCache::stopGC() const 00353 { 00354 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer); 00355 } 00356 00363 bool 00364 HTTPCache::startGC() const 00365 { 00366 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl); 00367 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size); 00368 } 00369 00384 void 00385 HTTPCache::perform_garbage_collection() 00386 { 00387 DBG(cerr << "Performing garbage collection" << endl); 00388 00389 // Remove all the expired responses. 00390 expired_gc(); 00391 00392 // Remove entries larger than max_entry_size. 00393 too_big_gc(); 00394 00395 // Remove entries starting with zero hits, 1, ..., until stopGC() 00396 // returns true. 00397 hits_gc(); 00398 } 00399 00405 void 00406 HTTPCache::expired_gc() 00407 { 00408 if (!d_expire_ignored) { 00409 d_http_cache_table->delete_expired_entries(); 00410 } 00411 } 00412 00429 void 00430 HTTPCache::hits_gc() 00431 { 00432 int hits = 0; 00433 00434 if (startGC()) { 00435 while (!stopGC()) { 00436 d_http_cache_table->delete_by_hits(hits); 00437 hits++; 00438 } 00439 } 00440 } 00441 00446 void HTTPCache::too_big_gc() { 00447 if (startGC()) 00448 d_http_cache_table->delete_by_size(d_max_entry_size); 00449 } 00450 00452 00463 bool HTTPCache::get_single_user_lock(bool force) { 00464 if (!d_locked_open_file) { 00465 FILE * fp = NULL; 00466 00467 try { 00468 // It's OK to call create_cache_root if the directory already 00469 // exists. 00470 create_cache_root(d_cache_root); 00471 } 00472 catch (Error &e) { 00473 // We need to catch and return false because this method is 00474 // called from a ctor and throwing at this point will result in a 00475 // partially constructed object. 01/22/04 jhrg 00476 return false; 00477 } 00478 00479 // Try to read the lock file. If we can open for reading, it exists. 00480 string lock = d_cache_root + CACHE_LOCK; 00481 if ((fp = fopen(lock.c_str(), "r")) != NULL) { 00482 int res = fclose(fp); 00483 if (res) { 00484 DBG(cerr << "Failed to close " << (void *)fp << endl); 00485 } 00486 if (force) 00487 REMOVE(lock.c_str()); 00488 else 00489 return false; 00490 } 00491 00492 if ((fp = fopen(lock.c_str(), "w")) == NULL) 00493 return false; 00494 00495 d_locked_open_file = fp; 00496 return true; 00497 } 00498 00499 return false; 00500 } 00501 00504 void 00505 HTTPCache::release_single_user_lock() 00506 { 00507 if (d_locked_open_file) { 00508 int res = fclose(d_locked_open_file); 00509 if (res) { 00510 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ; 00511 } 00512 d_locked_open_file = 0; 00513 } 00514 00515 string lock = d_cache_root + CACHE_LOCK; 00516 REMOVE(lock.c_str()); 00517 } 00518 00521 00525 string 00526 HTTPCache::get_cache_root() const 00527 { 00528 return d_cache_root; 00529 } 00530 00531 00540 void 00541 HTTPCache::create_cache_root(const string &cache_root) 00542 { 00543 struct stat stat_info; 00544 string::size_type cur = 0; 00545 00546 #ifdef WIN32 00547 cur = cache_root[1] == ':' ? 3 : 1; 00548 typedef int mode_t; 00549 #else 00550 cur = 1; 00551 #endif 00552 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) { 00553 string dir = cache_root.substr(0, cur); 00554 if (stat(dir.c_str(), &stat_info) == -1) { 00555 DBG2(cerr << "Cache....... Creating " << dir << endl); 00556 mode_t mask = UMASK(0); 00557 if (MKDIR(dir.c_str(), 0777) < 0) { 00558 DBG2(cerr << "Error: can't create." << endl); 00559 UMASK(mask); 00560 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string(".")); 00561 } 00562 UMASK(mask); 00563 } 00564 else { 00565 DBG2(cerr << "Cache....... Found " << dir << endl); 00566 } 00567 cur++; 00568 } 00569 } 00570 00585 void 00586 HTTPCache::set_cache_root(const string &root) 00587 { 00588 if (root != "") { 00589 d_cache_root = root; 00590 // cache root should end in /. 00591 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00592 d_cache_root += DIR_SEPARATOR_CHAR; 00593 } 00594 else { 00595 // If no cache root has been indicated then look for a suitable 00596 // location. 00597 char * cr = (char *) getenv("DODS_CACHE"); 00598 if (!cr) cr = (char *) getenv("TMP"); 00599 if (!cr) cr = (char *) getenv("TEMP"); 00600 if (!cr) cr = CACHE_LOC; 00601 00602 d_cache_root = cr; 00603 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00604 d_cache_root += DIR_SEPARATOR_CHAR; 00605 00606 d_cache_root += CACHE_ROOT; 00607 } 00608 00609 // Test d_hhtp_cache_table because this method can be called before that 00610 // instance is created and also can be called later to cahnge the cache 00611 // root. jhrg 05.14.08 00612 if (d_http_cache_table) 00613 d_http_cache_table->set_cache_root(d_cache_root); 00614 } 00615 00627 void 00628 HTTPCache::set_cache_enabled(bool mode) 00629 { 00630 lock_cache_interface(); 00631 00632 d_cache_enabled = mode; 00633 00634 unlock_cache_interface(); 00635 } 00636 00639 bool 00640 HTTPCache::is_cache_enabled() const 00641 { 00642 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" 00643 << endl); 00644 return d_cache_enabled; 00645 } 00646 00657 void 00658 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode) 00659 { 00660 lock_cache_interface(); 00661 00662 d_cache_disconnected = mode; 00663 00664 unlock_cache_interface(); 00665 } 00666 00669 CacheDisconnectedMode 00670 HTTPCache::get_cache_disconnected() const 00671 { 00672 return d_cache_disconnected; 00673 } 00674 00683 void 00684 HTTPCache::set_expire_ignored(bool mode) 00685 { 00686 lock_cache_interface(); 00687 00688 d_expire_ignored = mode; 00689 00690 unlock_cache_interface(); 00691 } 00692 00693 /* Is the cache ignoring Expires headers returned with responses that have 00694 been cached? */ 00695 00696 bool 00697 HTTPCache::is_expire_ignored() const 00698 { 00699 return d_expire_ignored; 00700 } 00701 00717 void 00718 HTTPCache::set_max_size(unsigned long size) 00719 { 00720 lock_cache_interface(); 00721 00722 try { 00723 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? 00724 MIN_CACHE_TOTAL_SIZE * MEGA : 00725 (size > ULONG_MAX ? ULONG_MAX : size * MEGA); 00726 unsigned long old_size = d_total_size; 00727 d_total_size = new_size; 00728 d_folder_size = d_total_size / CACHE_FOLDER_PCT; 00729 d_gc_buffer = d_total_size / CACHE_GC_PCT; 00730 00731 if (new_size < old_size && startGC()) { 00732 perform_garbage_collection(); 00733 d_http_cache_table->cache_index_write(); 00734 } 00735 } 00736 catch (...) { 00737 unlock_cache_interface(); 00738 DBGN(cerr << "Unlocking interface." << endl); 00739 throw; 00740 } 00741 00742 DBG2(cerr << "Cache....... Total cache size: " << d_total_size 00743 << " with " << d_folder_size 00744 << " bytes for meta information and folders and at least " 00745 << d_gc_buffer << " bytes free after every gc" << endl); 00746 00747 unlock_cache_interface(); 00748 } 00749 00752 unsigned long 00753 HTTPCache::get_max_size() const 00754 { 00755 return d_total_size / MEGA; 00756 } 00757 00766 void 00767 HTTPCache::set_max_entry_size(unsigned long size) 00768 { 00769 lock_cache_interface(); 00770 00771 try { 00772 unsigned long new_size = size * MEGA; 00773 if (new_size > 0 && new_size < d_total_size - d_folder_size) { 00774 unsigned long old_size = d_max_entry_size; 00775 d_max_entry_size = new_size; 00776 if (new_size < old_size && startGC()) { 00777 perform_garbage_collection(); 00778 d_http_cache_table->cache_index_write(); 00779 } 00780 } 00781 } 00782 catch (...) { 00783 unlock_cache_interface(); 00784 throw; 00785 } 00786 00787 DBG2(cerr << "Cache...... Max entry cache size is " 00788 << d_max_entry_size << endl); 00789 00790 unlock_cache_interface(); 00791 } 00792 00797 unsigned long 00798 HTTPCache::get_max_entry_size() const 00799 { 00800 return d_max_entry_size / MEGA; 00801 } 00802 00813 void 00814 HTTPCache::set_default_expiration(const int exp_time) 00815 { 00816 lock_cache_interface(); 00817 00818 d_default_expiration = exp_time; 00819 00820 unlock_cache_interface(); 00821 } 00822 00825 int 00826 HTTPCache::get_default_expiration() const 00827 { 00828 return d_default_expiration; 00829 } 00830 00835 void 00836 HTTPCache::set_always_validate(bool validate) 00837 { 00838 d_always_validate = validate; 00839 } 00840 00844 bool 00845 HTTPCache::get_always_validate() const 00846 { 00847 return d_always_validate; 00848 } 00849 00866 void 00867 HTTPCache::set_cache_control(const vector<string> &cc) 00868 { 00869 lock_cache_interface(); 00870 00871 try { 00872 d_cache_control = cc; 00873 00874 vector<string>::const_iterator i; 00875 for (i = cc.begin(); i != cc.end(); ++i) { 00876 string header = (*i).substr(0, (*i).find(':')); 00877 string value = (*i).substr((*i).find(": ") + 2); 00878 if (header != "Cache-Control") { 00879 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found."); 00880 } 00881 else { 00882 if (value == "no-cache" || value == "no-store") 00883 d_cache_enabled = false; 00884 else if (value.find("max-age") != string::npos) { 00885 string max_age = value.substr(value.find("=" + 1)); 00886 d_max_age = parse_time(max_age.c_str()); 00887 } 00888 else if (value == "max-stale") 00889 d_max_stale = 0; // indicates will take anything; 00890 else if (value.find("max-stale") != string::npos) { 00891 string max_stale = value.substr(value.find("=" + 1)); 00892 d_max_stale = parse_time(max_stale.c_str()); 00893 } 00894 else if (value.find("min-fresh") != string::npos) { 00895 string min_fresh = value.substr(value.find("=" + 1)); 00896 d_min_fresh = parse_time(min_fresh.c_str()); 00897 } 00898 } 00899 } 00900 } 00901 catch (...) { 00902 unlock_cache_interface(); 00903 throw; 00904 } 00905 00906 unlock_cache_interface(); 00907 } 00908 00909 00914 vector<string> 00915 HTTPCache::get_cache_control() 00916 { 00917 return d_cache_control; 00918 } 00919 00921 00930 bool 00931 HTTPCache::is_url_in_cache(const string &url) 00932 { 00933 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl); 00934 00935 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 00936 bool status = entry != 0; 00937 if (entry) { 00938 entry->unlock_read_response(); 00939 } 00940 return status; 00941 } 00942 00948 bool 00949 is_hop_by_hop_header(const string &header) 00950 { 00951 return header.find("Connection") != string::npos 00952 || header.find("Keep-Alive") != string::npos 00953 || header.find("Proxy-Authenticate") != string::npos 00954 || header.find("Proxy-Authorization") != string::npos 00955 || header.find("Transfer-Encoding") != string::npos 00956 || header.find("Upgrade") != string::npos; 00957 } 00958 00970 void 00971 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) 00972 { 00973 string fname = cachename + CACHE_META; 00974 d_open_files.push_back(fname); 00975 00976 FILE *dest = fopen(fname.c_str(), "w"); 00977 if (!dest) { 00978 throw InternalErr(__FILE__, __LINE__, 00979 "Could not open named cache entry file."); 00980 } 00981 00982 vector<string>::const_iterator i; 00983 for (i = headers.begin(); i != headers.end(); ++i) { 00984 if (!is_hop_by_hop_header(*i)) { 00985 fwrite((*i).c_str(), (*i).size(), 1, dest); 00986 fwrite("\n", 1, 1, dest); 00987 } 00988 } 00989 00990 int res = fclose(dest); 00991 if (res) { 00992 DBG(cerr << "HTTPCache::write_metadata - Failed to close " 00993 << dest << endl); 00994 } 00995 00996 d_open_files.pop_back(); 00997 } 00998 01009 void 01010 HTTPCache::read_metadata(const string &cachename, vector<string> &headers) 01011 { 01012 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r"); 01013 if (!md) { 01014 throw InternalErr(__FILE__, __LINE__, 01015 "Could not open named cache entry meta data file."); 01016 } 01017 01018 char line[1024]; 01019 while (!feof(md) && fgets(line, 1024, md)) { 01020 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline 01021 headers.push_back(string(line)); 01022 } 01023 01024 int res = fclose(md); 01025 if (res) { 01026 DBG(cerr << "HTTPCache::read_metadata - Failed to close " 01027 << md << endl); 01028 } 01029 } 01030 01052 int 01053 HTTPCache::write_body(const string &cachename, const FILE *src) 01054 { 01055 d_open_files.push_back(cachename); 01056 01057 FILE *dest = fopen(cachename.c_str(), "wb"); 01058 if (!dest) { 01059 throw InternalErr(__FILE__, __LINE__, 01060 "Could not open named cache entry file."); 01061 } 01062 01063 // Read and write in 1k blocks; an attempt at doing this efficiently. 01064 // 09/30/02 jhrg 01065 char line[1024]; 01066 size_t n; 01067 int total = 0; 01068 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) { 01069 total += fwrite(line, 1, n, dest); 01070 DBG2(sleep(3)); 01071 } 01072 01073 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) { 01074 int res = fclose(dest); 01075 res = res & unlink(cachename.c_str()); 01076 if (res) { 01077 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " 01078 << dest << endl); 01079 } 01080 throw InternalErr(__FILE__, __LINE__, 01081 "I/O error transferring data to the cache."); 01082 } 01083 01084 rewind(const_cast<FILE *>(src)); 01085 01086 int res = fclose(dest); 01087 if (res) { 01088 DBG(cerr << "HTTPCache::write_body - Failed to close " 01089 << dest << endl); 01090 } 01091 01092 d_open_files.pop_back(); 01093 01094 return total; 01095 } 01096 01105 FILE * 01106 HTTPCache::open_body(const string &cachename) 01107 { 01108 FILE *src = fopen(cachename.c_str(), "rb"); // Read only 01109 if (!src) 01110 throw InternalErr(__FILE__, __LINE__, "Could not open cache file."); 01111 01112 return src; 01113 } 01114 01140 bool 01141 HTTPCache::cache_response(const string &url, time_t request_time, 01142 const vector<string> &headers, const FILE *body) 01143 { 01144 lock_cache_interface(); 01145 01146 DBG(cerr << "Caching url: " << url << "." << endl); 01147 01148 try { 01149 // If this is not an http or https URL, don't cache. 01150 if (url.find("http:") == string::npos && 01151 url.find("https:") == string::npos) { 01152 unlock_cache_interface(); 01153 return false; 01154 } 01155 01156 // This does nothing if url is not already in the cache. It's 01157 // more efficient to do this than to first check and see if the entry 01158 // exists. 10/10/02 jhrg 01159 d_http_cache_table->remove_entry_from_cache_table(url); 01160 01161 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url); 01162 entry->lock_write_response(); 01163 01164 try { 01165 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age. 01166 if (entry->is_no_cache()) { 01167 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry 01168 << "(" << url << ")" << endl); 01169 entry->unlock_write_response(); 01170 delete entry; entry = 0; 01171 unlock_cache_interface(); 01172 return false; 01173 } 01174 01175 // corrected_initial_age, freshness_lifetime, response_time. 01176 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01177 01178 d_http_cache_table->create_location(entry); // cachename, cache_body_fd 01179 // move these write function to cache table 01180 entry->set_size(write_body(entry->get_cachename(), body)); 01181 write_metadata(entry->get_cachename(), headers); 01182 d_http_cache_table->add_entry_to_cache_table(entry); 01183 entry->unlock_write_response(); 01184 } 01185 catch (ResponseTooBigErr &e) { 01186 // Oops. Bummer. Clean up and exit. 01187 DBG(cerr << e.get_error_message() << endl); 01188 REMOVE(entry->get_cachename().c_str()); 01189 REMOVE(string(entry->get_cachename() + CACHE_META).c_str()); 01190 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url 01191 << ")" << endl); 01192 entry->unlock_write_response(); 01193 delete entry; entry = 0; 01194 unlock_cache_interface(); 01195 return false; 01196 } 01197 01198 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) { 01199 if (startGC()) 01200 perform_garbage_collection(); 01201 01202 d_http_cache_table->cache_index_write(); // resets new_entries 01203 } 01204 } 01205 catch (...) { 01206 unlock_cache_interface(); 01207 throw; 01208 } 01209 01210 unlock_cache_interface(); 01211 01212 return true; 01213 } 01214 01233 vector<string> 01234 HTTPCache::get_conditional_request_headers(const string &url) 01235 { 01236 lock_cache_interface(); 01237 01238 HTTPCacheTable::CacheEntry *entry = 0; 01239 vector<string> headers; 01240 01241 DBG(cerr << "Getting conditional request headers for " << url << endl); 01242 01243 try { 01244 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01245 if (!entry) 01246 throw Error("There is no cache entry for the URL: " + url); 01247 01248 if (entry->get_etag() != "") 01249 headers.push_back(string("If-None-Match: ") + entry->get_etag()); 01250 01251 if (entry->get_lm() > 0) { 01252 time_t lm = entry->get_lm(); 01253 headers.push_back(string("If-Modified-Since: ") 01254 + date_time_str(&lm)); 01255 } 01256 else if (entry->get_max_age() > 0) { 01257 time_t max_age = entry->get_max_age(); 01258 headers.push_back(string("If-Modified-Since: ") 01259 + date_time_str(&max_age)); 01260 } 01261 else if (entry->get_expires() > 0) { 01262 time_t expires = entry->get_expires(); 01263 headers.push_back(string("If-Modified-Since: ") 01264 + date_time_str(&expires)); 01265 } 01266 entry->unlock_read_response(); 01267 unlock_cache_interface(); 01268 } 01269 catch (...) { 01270 unlock_cache_interface(); 01271 if (entry) { 01272 entry->unlock_read_response(); 01273 } 01274 throw; 01275 } 01276 01277 return headers; 01278 } 01279 01283 struct HeaderLess: binary_function<const string&, const string&, bool> 01284 { 01285 bool operator()(const string &s1, const string &s2) const { 01286 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':')); 01287 } 01288 }; 01289 01303 void 01304 HTTPCache::update_response(const string &url, time_t request_time, 01305 const vector<string> &headers) 01306 { 01307 lock_cache_interface(); 01308 01309 HTTPCacheTable::CacheEntry *entry = 0; 01310 DBG(cerr << "Updating the response headers for: " << url << endl); 01311 01312 try { 01313 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url); 01314 if (!entry) 01315 throw Error("There is no cache entry for the URL: " + url); 01316 01317 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object. 01318 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); 01319 01320 // Update corrected_initial_age, freshness_lifetime, response_time. 01321 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01322 01323 // Merge the new headers with those in the persistent store. How: 01324 // Load the new headers into a set, then merge the old headers. Since 01325 // set<> ignores duplicates, old headers with the same name as a new 01326 // header will got into the bit bucket. Define a special compare 01327 // functor to make sure that headers are compared using only their 01328 // name and not their value too. 01329 set<string, HeaderLess> merged_headers; 01330 01331 // Load in the new headers 01332 copy(headers.begin(), headers.end(), 01333 inserter(merged_headers, merged_headers.begin())); 01334 01335 // Get the old headers and load them in. 01336 vector<string> old_headers; 01337 read_metadata(entry->get_cachename(), old_headers); 01338 copy(old_headers.begin(), old_headers.end(), 01339 inserter(merged_headers, merged_headers.begin())); 01340 01341 // Read the values back out. Use reverse iterators with back_inserter 01342 // to preserve header order. NB: vector<> does not support push_front 01343 // so we can't use front_inserter(). 01/09/03 jhrg 01344 vector<string> result; 01345 copy(merged_headers.rbegin(), merged_headers.rend(), 01346 back_inserter(result)); 01347 01348 write_metadata(entry->get_cachename(), result); 01349 entry->unlock_write_response(); 01350 unlock_cache_interface(); 01351 } 01352 catch (...) { 01353 if (entry) { 01354 entry->unlock_read_response(); 01355 } 01356 unlock_cache_interface(); 01357 throw; 01358 } 01359 } 01360 01372 bool 01373 HTTPCache::is_url_valid(const string &url) 01374 { 01375 lock_cache_interface(); 01376 01377 bool freshness; 01378 HTTPCacheTable::CacheEntry *entry = 0; 01379 01380 DBG(cerr << "Is this URL valid? (" << url << ")" << endl); 01381 01382 try { 01383 if (d_always_validate) { 01384 unlock_cache_interface(); 01385 return false; // force re-validation. 01386 } 01387 01388 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01389 if (!entry) 01390 throw Error("There is no cache entry for the URL: " + url); 01391 01392 // If we supported range requests, we'd need code here to check if 01393 // there was only a partial response in the cache. 10/02/02 jhrg 01394 01395 // In case this entry is of type "must-revalidate" then we consider it 01396 // invalid. 01397 if (entry->get_must_revalidate()) { 01398 entry->unlock_read_response(); 01399 unlock_cache_interface(); 01400 return false; 01401 } 01402 01403 time_t resident_time = time(NULL) - entry->get_response_time(); 01404 time_t current_age = entry->get_corrected_initial_age() + resident_time; 01405 01406 // Check that the max-age, max-stale, and min-fresh directives 01407 // given in the request cache control header is followed. 01408 if (d_max_age >= 0 && current_age > d_max_age) { 01409 DBG(cerr << "Cache....... Max-age validation" << endl); 01410 entry->unlock_read_response(); 01411 unlock_cache_interface(); 01412 return false; 01413 } 01414 if (d_min_fresh >= 0 01415 && entry->get_freshness_lifetime() < current_age + d_min_fresh) { 01416 DBG(cerr << "Cache....... Min-fresh validation" << endl); 01417 entry->unlock_read_response(); 01418 unlock_cache_interface(); 01419 return false; 01420 } 01421 01422 freshness = (entry->get_freshness_lifetime() 01423 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age); 01424 entry->unlock_read_response(); 01425 unlock_cache_interface(); 01426 } 01427 catch (...) { 01428 if (entry) { 01429 entry->unlock_read_response(); 01430 } 01431 unlock_cache_interface(); 01432 throw; 01433 } 01434 01435 return freshness; 01436 } 01437 01465 FILE * HTTPCache::get_cached_response(const string &url, 01466 vector<string> &headers, string &cacheName) { 01467 lock_cache_interface(); 01468 01469 FILE *body; 01470 HTTPCacheTable::CacheEntry *entry = 0; 01471 01472 DBG(cerr << "Getting the cached response for " << url << endl); 01473 01474 try { 01475 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01476 if (!entry) { 01477 unlock_cache_interface(); 01478 return 0; 01479 } 01480 01481 cacheName = entry->get_cachename(); 01482 read_metadata(entry->get_cachename(), headers); 01483 01484 DBG(cerr << "Headers just read from cache: " << endl); 01485 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n"))); 01486 01487 body = open_body(entry->get_cachename()); 01488 01489 DBG(cerr << "Returning: " << url << " from the cache." << endl); 01490 01491 d_http_cache_table->bind_entry_to_data(entry, body); 01492 } 01493 catch (...) { 01494 if (entry) 01495 unlock_cache_interface(); 01496 throw; 01497 } 01498 01499 unlock_cache_interface(); 01500 01501 return body; 01502 } 01514 FILE * 01515 HTTPCache::get_cached_response(const string &url, vector<string> &headers) 01516 { 01517 string discard_name; 01518 return get_cached_response(url, headers, discard_name); 01519 } 01520 01531 FILE * 01532 HTTPCache::get_cached_response(const string &url) 01533 { 01534 string discard_name; 01535 vector<string> discard_headers; 01536 return get_cached_response(url, discard_headers, discard_name); 01537 } 01538 01551 void 01552 HTTPCache::release_cached_response(FILE *body) 01553 { 01554 lock_cache_interface(); 01555 01556 try { 01557 d_http_cache_table->uncouple_entry_from_data(body); 01558 } 01559 catch (...) { 01560 unlock_cache_interface(); 01561 throw; 01562 } 01563 01564 unlock_cache_interface(); 01565 } 01566 01579 void 01580 HTTPCache::purge_cache() 01581 { 01582 lock_cache_interface(); 01583 01584 try { 01585 if (d_http_cache_table->is_locked_read_responses()) 01586 throw Error("Attempt to purge the cache with entries in use."); 01587 01588 d_http_cache_table->delete_all_entries(); 01589 } 01590 catch (...) { 01591 unlock_cache_interface(); 01592 throw; 01593 } 01594 01595 unlock_cache_interface(); 01596 } 01597 01598 } // namespace libdap