libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 00027 #include "config.h" 00028 00029 static char rcsid[] not_used = 00030 { "$Id: HTTPConnect.cc 21699 2009-11-05 00:06:01Z jimg $" 00031 }; 00032 00033 #ifdef HAVE_UNISTD_H 00034 #include <unistd.h> 00035 #endif 00036 00037 #ifdef WIN32 00038 #include <io.h> 00039 #endif 00040 00041 #include <string> 00042 #include <vector> 00043 #include <functional> 00044 #include <algorithm> 00045 #include <sstream> 00046 #include <iterator> 00047 #include <cstdlib> 00048 #include <cstring> 00049 00050 //#define DODS_DEBUG 00051 //#define DODS_DEBUG2 00052 //#define HTTP_TRACE 00053 00054 #include "debug.h" 00055 #include "mime_util.h" 00056 #include "GNURegex.h" 00057 #include "HTTPCache.h" 00058 #include "HTTPConnect.h" 00059 #include "RCReader.h" 00060 #include "HTTPResponse.h" 00061 #include "HTTPCacheResponse.h" 00062 00063 using namespace std; 00064 00065 namespace libdap { 00066 00067 // These global variables are not MT-Safe, but I'm leaving them as is because 00068 // they are used only for debugging (set them in a debugger like gdb or ddd). 00069 // They are not static because I *believe* that many debuggers cannot access 00070 // static variables. 08/07/02 jhrg 00071 00072 // Set this to 1 to turn on libcurl's verbose mode (for debugging). 00073 int www_trace = 0; 00074 00075 // Keep the temporary files; useful for debugging. 00076 int dods_keep_temps = 0; 00077 00078 #define CLIENT_ERR_MIN 400 00079 #define CLIENT_ERR_MAX 417 00080 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] = 00081 { 00082 "Bad Request:", 00083 "Unauthorized: Contact the server administrator.", 00084 "Payment Required.", 00085 "Forbidden: Contact the server administrator.", 00086 "Not Found: The data source or server could not be found.\n\ 00087 Often this means that the OPeNDAP server is missing or needs attention;\n\ 00088 Please contact the server administrator.", 00089 "Method Not Allowed.", 00090 "Not Acceptable.", 00091 "Proxy Authentication Required.", 00092 "Request Time-out.", 00093 "Conflict.", 00094 "Gone:.", 00095 "Length Required.", 00096 "Precondition Failed.", 00097 "Request Entity Too Large.", 00098 "Request URI Too Large.", 00099 "Unsupported Media Type.", 00100 "Requested Range Not Satisfiable.", 00101 "Expectation Failed." 00102 }; 00103 00104 #define SERVER_ERR_MIN 500 00105 #define SERVER_ERR_MAX 505 00106 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] = 00107 { 00108 "Internal Server Error.", 00109 "Not Implemented.", 00110 "Bad Gateway.", 00111 "Service Unavailable.", 00112 "Gateway Time-out.", 00113 "HTTP Version Not Supported." 00114 }; 00115 00118 static string 00119 http_status_to_string(int status) 00120 { 00121 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX) 00122 return string(http_client_errors[status - CLIENT_ERR_MIN]); 00123 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX) 00124 return string(http_server_errors[status - SERVER_ERR_MIN]); 00125 else 00126 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."); 00127 } 00128 00133 class ParseHeader : public unary_function<const string &, void> 00134 { 00135 ObjectType type; // What type of object is in the stream? 00136 string server; // Server's version string. 00137 string protocol; // Server's protocol version. 00138 string location; // Url returned by server 00139 00140 public: 00141 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") 00142 { } 00143 00144 void operator()(const string &line) 00145 { 00146 string name, value; 00147 parse_mime_header(line, name, value); 00148 if (name == "content-description") { 00149 DBG2(cerr << name << ": " << value << endl); 00150 type = get_description_type(value); 00151 } 00152 // The second test (== "dods/0.0") tests if xopendap-server has already 00153 // been seen. If so, use that header in preference to the old 00154 // XDODS-Server header. jhrg 2/7/06 00155 else if (name == "xdods-server" && server == "dods/0.0") { 00156 DBG2(cerr << name << ": " << value << endl); 00157 server = value; 00158 } 00159 else if (name == "xopendap-server") { 00160 DBG2(cerr << name << ": " << value << endl); 00161 server = value; 00162 } 00163 else if (name == "xdap") { 00164 DBG2(cerr << name << ": " << value << endl); 00165 protocol = value; 00166 } 00167 else if (server == "dods/0.0" && name == "server") { 00168 DBG2(cerr << name << ": " << value << endl); 00169 server = value; 00170 } 00171 else if (name == "location") { 00172 DBG2(cerr << name << ": " << value << endl); 00173 location = value; 00174 } 00175 else if (type == unknown_type && name == "content-type" 00176 && line.find("text/html") != string::npos) { 00177 DBG2(cerr << name << ": text/html..." << endl); 00178 type = web_error; 00179 } 00180 } 00181 00182 ObjectType get_object_type() 00183 { 00184 return type; 00185 } 00186 00187 string get_server() 00188 { 00189 return server; 00190 } 00191 00192 string get_protocol() 00193 { 00194 return protocol; 00195 } 00196 00197 string get_location() { 00198 return location; 00199 } 00200 }; 00201 00218 static size_t 00219 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) 00220 { 00221 DBG2(cerr << "Inside the header parser." << endl); 00222 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs); 00223 00224 // Grab the header, minus the trailing newline. Or \r\n pair. 00225 string complete_line; 00226 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r') 00227 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2)); 00228 else 00229 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1)); 00230 00231 // Store all non-empty headers that are not HTTP status codes 00232 if (complete_line != "" && complete_line.find("HTTP") == string::npos) { 00233 DBG(cerr << "Header line: " << complete_line << endl); 00234 hdrs->push_back(complete_line); 00235 } 00236 00237 return size * nmemb; 00238 } 00239 00241 static int 00242 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) 00243 { 00244 string message(msg, size); 00245 00246 switch (info) { 00247 case CURLINFO_TEXT: 00248 cerr << "Text: " << message; break; 00249 case CURLINFO_HEADER_IN: 00250 cerr << "Header in: " << message; break; 00251 case CURLINFO_HEADER_OUT: 00252 cerr << "Header out: " << message; break; 00253 case CURLINFO_DATA_IN: 00254 cerr << "Data in: " << message; break; 00255 case CURLINFO_DATA_OUT: 00256 cerr << "Data out: " << message; break; 00257 case CURLINFO_END: 00258 cerr << "End: " << message; break; 00259 #ifdef CURLINFO_SSL_DATA_IN 00260 case CURLINFO_SSL_DATA_IN: 00261 cerr << "SSL Data in: " << message; break; 00262 #endif 00263 #ifdef CURLINFO_SSL_DATA_OUT 00264 case CURLINFO_SSL_DATA_OUT: 00265 cerr << "SSL Data out: " << message; break; 00266 #endif 00267 default: 00268 cerr << "Curl info: " << message; break; 00269 } 00270 return 0; 00271 } 00272 00276 void 00277 HTTPConnect::www_lib_init() 00278 { 00279 d_curl = curl_easy_init(); 00280 if (!d_curl) 00281 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl."); 00282 00283 // Now set options that will remain constant for the duration of this 00284 // CURL object. 00285 00286 // Set the proxy host. 00287 if (!d_rcr->get_proxy_server_host().empty()) { 00288 DBG(cerr << "Setting up a proxy server." << endl); 00289 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() 00290 << endl); 00291 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() 00292 << endl); 00293 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() 00294 << endl); 00295 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00296 d_rcr->get_proxy_server_host().c_str()); 00297 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, 00298 d_rcr->get_proxy_server_port()); 00299 00300 // As of 4/21/08 only NTLM, Digest and Basic work. 00301 #ifdef CURLOPT_PROXYAUTH 00302 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY); 00303 #endif 00304 00305 // Password might not be required. 06/21/04 jhrg 00306 if (!d_rcr->get_proxy_server_userpw().empty()) 00307 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, 00308 d_rcr->get_proxy_server_userpw().c_str()); 00309 } 00310 00311 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer); 00312 // We have to set FailOnError to false for any of the non-Basic 00313 // authentication schemes to work. 07/28/03 jhrg 00314 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0); 00315 00316 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM, 00317 // choosing the the 'safest' one supported by the server. 00318 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg 00319 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00320 00321 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1); 00322 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1); 00323 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers); 00324 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth 00325 // param of save_raw_http_headers to a vector<string> object. 00326 00327 // Follow 302 (redirect) responses 00328 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1); 00329 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5); 00330 00331 // If the user turns off SSL validation... 00332 if (!d_rcr->get_validate_ssl() == 0) { 00333 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0); 00334 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0); 00335 } 00336 00337 // Look to see if cookies are turned on in the .dodsrc file. If so, 00338 // activate here. We honor 'session cookies' (cookies without an 00339 // expiration date) here so that session-base SSO systems will work as 00340 // expected. 00341 if (!d_cookie_jar.empty()) { 00342 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl); 00343 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str()); 00344 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1); 00345 } 00346 00347 if (www_trace) { 00348 cerr << "Curl version: " << curl_version() << endl; 00349 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1); 00350 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug); 00351 } 00352 } 00353 00357 class BuildHeaders : public unary_function<const string &, void> 00358 { 00359 struct curl_slist *d_cl; 00360 00361 public: 00362 BuildHeaders() : d_cl(0) 00363 {} 00364 00365 void operator()(const string &header) 00366 { 00367 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." 00368 << endl); 00369 d_cl = curl_slist_append(d_cl, header.c_str()); 00370 } 00371 00372 struct curl_slist *get_headers() 00373 { 00374 return d_cl; 00375 } 00376 }; 00377 00392 long 00393 HTTPConnect::read_url(const string &url, FILE *stream, 00394 vector<string> *resp_hdrs, 00395 const vector<string> *headers) 00396 { 00397 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str()); 00398 00399 #ifdef WIN32 00400 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA) 00401 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as 00402 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the 00403 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of 00404 // this issue is that one should not pass a FILE * to a windows DLL. Close 00405 // inspection of libcurl yields that their default write function when using 00406 // the CURLOPT_WRITEDATA is just "fwrite". 00407 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00408 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite); 00409 #else 00410 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00411 #endif 00412 00413 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00414 ostream_iterator<string>(cerr, "\n"))); 00415 00416 BuildHeaders req_hdrs; 00417 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), 00418 req_hdrs); 00419 if (headers) 00420 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs); 00421 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers()); 00422 00423 // Turn off the proxy for this URL? 00424 bool temporary_proxy = false; 00425 if ((temporary_proxy = url_uses_no_proxy_for(url))) { 00426 DBG(cerr << "Suppress proxy for url: " << url << endl); 00427 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0); 00428 } 00429 00430 string::size_type at_sign = url.find('@'); 00431 // Assume username:password present *and* assume it's an HTTP URL; it *is* 00432 // HTTPConnect, after all. 7 is position after "http://"; the second arg 00433 // to substr() is the sub string length. 00434 if (at_sign != url.npos) 00435 d_upstring = url.substr(7, at_sign - 7); 00436 00437 if (!d_upstring.empty()) 00438 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str()); 00439 00440 // Pass save_raw_http_headers() a pointer to the vector<string> where the 00441 // response headers may be stored. Callers can use the resp_hdrs 00442 // value/result parameter to get the raw response header information . 00443 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs); 00444 00445 CURLcode res = curl_easy_perform(d_curl); 00446 00447 // Free the header list and null the value in d_curl. 00448 curl_slist_free_all(req_hdrs.get_headers()); 00449 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0); 00450 00451 // Reset the proxy? 00452 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty()) 00453 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00454 d_rcr->get_proxy_server_host().c_str()); 00455 00456 if (res != 0) 00457 throw Error(d_error_buffer); 00458 00459 long status; 00460 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status); 00461 if (res != 0) 00462 throw Error(d_error_buffer); 00463 00464 return status; 00465 } 00466 00470 bool 00471 HTTPConnect::url_uses_proxy_for(const string &url) throw() 00472 { 00473 if (d_rcr->is_proxy_for_used()) { 00474 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str()); 00475 int index = 0, matchlen; 00476 return host_regex.search(url.c_str(), url.size(), matchlen, index) 00477 != -1; 00478 } 00479 00480 return false; 00481 } 00482 00486 bool 00487 HTTPConnect::url_uses_no_proxy_for(const string &url) throw() 00488 { 00489 return d_rcr->is_no_proxy_for_used() 00490 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos; 00491 } 00492 00493 // Public methods. Mostly... 00494 00501 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""), 00502 d_cookie_jar(""), 00503 d_dap_client_protocol_major(2), 00504 d_dap_client_protocol_minor(0) 00505 00506 { 00507 d_accept_deflate = rcr->get_deflate(); 00508 d_rcr = rcr; 00509 00510 // Load in the default headers to send with a request. The empty Pragma 00511 // headers overrides libcurl's default Pragma: no-cache header (which 00512 // will disable caching by Squid, et c.). The User-Agent header helps 00513 // make server logs more readable. 05/05/03 jhrg 00514 d_request_headers.push_back(string("Pragma:")); 00515 string user_agent = string("User-Agent: ") + string(CNAME) 00516 + string("/") + string(CVER); 00517 d_request_headers.push_back(user_agent); 00518 if (d_accept_deflate) 00519 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00520 00521 // HTTPCache::instance returns a valid ptr or 0. 00522 if (d_rcr->get_use_cache()) 00523 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(), 00524 true); 00525 else 00526 d_http_cache = 0; 00527 00528 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec 00529 << ")" << endl); 00530 00531 if (d_http_cache) { 00532 d_http_cache->set_cache_enabled(d_rcr->get_use_cache()); 00533 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0); 00534 d_http_cache->set_max_size(d_rcr->get_max_cache_size()); 00535 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj()); 00536 d_http_cache->set_default_expiration(d_rcr->get_default_expires()); 00537 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0); 00538 } 00539 00540 d_cookie_jar = rcr->get_cookie_jar(); 00541 00542 www_lib_init(); // This may throw either Error or InternalErr 00543 } 00544 00545 HTTPConnect::~HTTPConnect() 00546 { 00547 DBG2(cerr << "Entering the HTTPConnect dtor" << endl); 00548 00549 curl_easy_cleanup(d_curl); 00550 00551 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl); 00552 } 00553 00566 HTTPResponse * 00567 HTTPConnect::fetch_url(const string &url) 00568 { 00569 #ifdef HTTP_TRACE 00570 cout << "GET " << url << " HTTP/1.0" << endl; 00571 #endif 00572 00573 HTTPResponse *stream; 00574 00575 if (d_http_cache && d_http_cache->is_cache_enabled()) { 00576 stream = caching_fetch_url(url); 00577 } 00578 else { 00579 stream = plain_fetch_url(url); 00580 } 00581 00582 #ifdef HTTP_TRACE 00583 stringstream ss; 00584 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl; 00585 for (size_t i = 0; i < stream->get_headers()->size(); i++) { 00586 ss << stream->get_headers()->at(i) << endl; 00587 } 00588 cout << ss.str(); 00589 #endif 00590 00591 ParseHeader parser; 00592 00593 parser = for_each(stream->get_headers()->begin(), 00594 stream->get_headers()->end(), ParseHeader()); 00595 00596 #ifdef HTTP_TRACE 00597 cout << endl << endl; 00598 #endif 00599 00600 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu) 00601 if (parser.get_location() != "" && 00602 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) { 00603 return fetch_url(parser.get_location()); 00604 } 00605 00606 stream->set_type(parser.get_object_type()); 00607 stream->set_version(parser.get_server()); 00608 stream->set_protocol(parser.get_protocol()); 00609 00610 return stream; 00611 } 00612 00613 // Look around for a reasonable place to put a temporary file. Check first 00614 // the value of the TMPDIR env var. If that does not yeild a path that's 00615 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00616 // defined in stdio.h. If both come up empty, then use `./'. 00617 // 00618 // This function allocates storage using new. The caller must delete the char 00619 // array. 00620 00621 // Change this to a version that either returns a string or an open file 00622 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00623 // (see open()) to make it more secure. Ideal solution: get deserialize() 00624 // methods to read from a stream returned by libcurl, not from a temporary 00625 // file. 9/21/07 jhrg 00626 static char * 00627 get_tempfile_template(const char *file_template) 00628 { 00629 const char *c; 00630 00631 #ifdef WIN32 00632 // whitelist for a WIN32 directory 00633 Regex directory("[-a-zA-Z0-9_\\]*"); 00634 00635 c = getenv("TEMP"); 00636 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00637 goto valid_temp_directory; 00638 00639 c= getenv("TMP"); 00640 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00641 goto valid_temp_directory; 00642 #else 00643 // whitelist for a directory 00644 Regex directory("[-a-zA-Z0-9_/]*"); 00645 00646 c = getenv("TMPDIR"); 00647 if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0)) 00648 goto valid_temp_directory; 00649 00650 #ifdef P_tmpdir 00651 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00652 c = P_tmpdir; 00653 goto valid_temp_directory; 00654 } 00655 #endif 00656 00657 #endif // WIN32 00658 00659 c = "."; 00660 00661 valid_temp_directory: 00662 // Sanitize allocation 00663 int size = strlen(c) + strlen(file_template) + 2; 00664 if (!size_ok(1, size)) 00665 throw Error("Bad temporary file name."); 00666 00667 char *temp = new char[size]; 00668 strncpy(temp, c, size-2); 00669 strcat(temp, "/"); 00670 00671 strcat(temp, file_template); 00672 00673 return temp; 00674 } 00675 00694 string 00695 get_temp_file(FILE *&stream) throw(InternalErr) 00696 { 00697 // get_tempfile_template() uses new, must call delete 00698 char *dods_temp = get_tempfile_template("dodsXXXXXX"); 00699 00700 // Open truncated for update. NB: mkstemp() returns a file descriptor. 00701 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 00702 stream = fopen(_mktemp(dods_temp), "w+b"); 00703 #else 00704 stream = fdopen(mkstemp(dods_temp), "w+"); 00705 #endif 00706 00707 if (!stream) 00708 throw InternalErr("I/O Error: Failed to open a temporary file for the data values."); 00709 00710 string dods_temp_s = dods_temp; 00711 delete[] dods_temp; dods_temp = 0; 00712 00713 return dods_temp_s; 00714 } 00715 00717 void 00718 close_temp(FILE *s, const string &name) 00719 { 00720 int res = fclose(s); 00721 if (res) { 00722 DBG(cerr << "Counld not close the temporary file: " << name << endl); 00723 } 00724 00725 unlink(name.c_str()); 00726 } 00727 00749 HTTPResponse * 00750 HTTPConnect::caching_fetch_url(const string &url) 00751 { 00752 DBG(cerr << "Is this URL (" << url << ") in the cache?... "); 00753 00754 vector<string> *headers = new vector<string> ; 00755 FILE *s = d_http_cache->get_cached_response(url, *headers); 00756 if (!s) { 00757 // url not in cache; get it and cache it 00758 DBGN(cerr << "no; getting response and caching." << endl); 00759 time_t now = time(0); 00760 HTTPResponse *rs = plain_fetch_url(url); 00761 d_http_cache->cache_response(url, now, *(rs->get_headers()), 00762 rs->get_stream()); 00763 00764 return rs; 00765 } 00766 else { // url in cache 00767 DBGN(cerr << "yes... "); 00768 00769 if (d_http_cache->is_url_valid(url)) { // url in cache and valid 00770 DBGN(cerr << "and it's valid; using cached response." << endl); 00771 HTTPCacheResponse *crs = 00772 new HTTPCacheResponse(s, 200, headers, d_http_cache); 00773 return crs; 00774 } 00775 else { // url in cache but not valid; validate 00776 DBGN(cerr << "but it's not valid; validating... "); 00777 00778 d_http_cache->release_cached_response(s); 00779 00780 vector<string> *resp_hdrs = new vector<string> ; 00781 vector<string> cond_hdrs = 00782 d_http_cache->get_conditional_request_headers(url); 00783 FILE *body = 0; 00784 string dods_temp = get_temp_file(body); 00785 time_t now = time(0); // When was the request made (now). 00786 long http_status; 00787 00788 try { 00789 http_status = read_url(url, body, resp_hdrs, &cond_hdrs); 00790 rewind(body); 00791 } 00792 catch (Error &e) { 00793 close_temp(body, dods_temp); 00794 throw ; 00795 } 00796 00797 switch (http_status) { 00798 case 200: { // New headers and new body 00799 DBGN(cerr << "read a new response; caching." << endl); 00800 00801 d_http_cache->cache_response(url, now, *resp_hdrs, body); 00802 HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs, 00803 dods_temp); 00804 00805 return rs; 00806 } 00807 00808 case 304: { // Just new headers, use cached body 00809 DBGN(cerr << "cached response valid; updating." << endl); 00810 00811 close_temp(body, dods_temp); 00812 d_http_cache->update_response(url, now, *resp_hdrs); 00813 00814 vector<string> *headers = new vector<string>; 00815 FILE *hs = d_http_cache->get_cached_response(url, *headers); 00816 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, d_http_cache); 00817 return crs; 00818 } 00819 00820 default: { // Oops. 00821 close_temp(body, dods_temp); 00822 if (http_status >= 400) { 00823 string msg = "Error while reading the URL: "; 00824 msg += url; 00825 msg 00826 += ".\nThe OPeNDAP server returned the following message:\n"; 00827 msg += http_status_to_string(http_status); 00828 throw Error(msg); 00829 } 00830 else { 00831 throw InternalErr(__FILE__, __LINE__, 00832 "Bad response from the HTTP server: " + long_to_string(http_status)); 00833 } 00834 } 00835 } 00836 } 00837 } 00838 00839 throw InternalErr(__FILE__, __LINE__, "Should never get here"); 00840 } 00841 00853 HTTPResponse * 00854 HTTPConnect::plain_fetch_url(const string &url) 00855 { 00856 DBG(cerr << "Getting URL: " << url << endl); 00857 FILE *stream = 0; 00858 string dods_temp = get_temp_file(stream); 00859 vector<string> *resp_hdrs = new vector<string>; 00860 00861 int status = -1; 00862 try { 00863 status = read_url(url, stream, resp_hdrs); // Throws Error. 00864 if (status >= 400) { 00865 string msg = "Error while reading the URL: "; 00866 msg += url; 00867 msg += ".\nThe OPeNDAP server returned the following message:\n"; 00868 msg += http_status_to_string(status); 00869 throw Error(msg); 00870 } 00871 } 00872 00873 catch (Error &e) { 00874 close_temp(stream, dods_temp); 00875 throw e; 00876 } 00877 00878 rewind(stream); 00879 00880 return new HTTPResponse(stream, status, resp_hdrs, dods_temp); 00881 } 00882 00894 void 00895 HTTPConnect::set_accept_deflate(bool deflate) 00896 { 00897 d_accept_deflate = deflate; 00898 00899 if (d_accept_deflate) { 00900 if (find(d_request_headers.begin(), d_request_headers.end(), 00901 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end()) 00902 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00903 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00904 ostream_iterator<string>(cerr, "\n"))); 00905 } 00906 else { 00907 vector<string>::iterator i; 00908 i = remove_if(d_request_headers.begin(), d_request_headers.end(), 00909 bind2nd(equal_to<string>(), 00910 string("Accept-Encoding: deflate, gzip, compress"))); 00911 d_request_headers.erase(i, d_request_headers.end()); 00912 } 00913 } 00914 00916 class HeaderMatch : public unary_function<const string &, bool> { 00917 const string &d_header; 00918 public: 00919 HeaderMatch(const string &header) : d_header(header) {} 00920 bool operator()(const string &arg) { return arg.find(d_header) == 0; } 00921 }; 00922 00931 void 00932 HTTPConnect::set_xdap_protocol(int major, int minor) 00933 { 00934 // Look for, and remove if one exists, an XDAP-Accept header 00935 vector<string>::iterator i; 00936 i = find_if(d_request_headers.begin(), d_request_headers.end(), 00937 HeaderMatch("XDAP-Accept:")); 00938 if (i != d_request_headers.end()) 00939 d_request_headers.erase(i); 00940 00941 // Record and add the new header value 00942 d_dap_client_protocol_major = major; 00943 d_dap_client_protocol_minor = minor; 00944 ostringstream xdap_accept; 00945 xdap_accept << "XDAP-Accept: " << major << "." << minor; 00946 00947 d_request_headers.push_back(xdap_accept.str()); 00948 00949 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00950 ostream_iterator<string>(cerr, "\n"))); 00951 } 00952 00968 void 00969 HTTPConnect::set_credentials(const string &u, const string &p) 00970 { 00971 if (u.empty()) 00972 return; 00973 00974 // Store the credentials locally. 00975 d_username = u; 00976 d_password = p; 00977 00978 d_upstring = u + ":" + p; 00979 } 00980 00981 } // namespace libdap