libdap++ Updated for version 3.8.2

HTTPConnect.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 #include "config.h"
00028 
00029 static char rcsid[] not_used =
00030     { "$Id: HTTPConnect.cc 21699 2009-11-05 00:06:01Z jimg $"
00031     };
00032 
00033 #ifdef HAVE_UNISTD_H
00034 #include <unistd.h>
00035 #endif
00036 
00037 #ifdef WIN32
00038 #include <io.h>
00039 #endif
00040 
00041 #include <string>
00042 #include <vector>
00043 #include <functional>
00044 #include <algorithm>
00045 #include <sstream>
00046 #include <iterator>
00047 #include <cstdlib>
00048 #include <cstring>
00049 
00050 //#define DODS_DEBUG
00051 //#define DODS_DEBUG2
00052 //#define HTTP_TRACE
00053 
00054 #include "debug.h"
00055 #include "mime_util.h"
00056 #include "GNURegex.h"
00057 #include "HTTPCache.h"
00058 #include "HTTPConnect.h"
00059 #include "RCReader.h"
00060 #include "HTTPResponse.h"
00061 #include "HTTPCacheResponse.h"
00062 
00063 using namespace std;
00064 
00065 namespace libdap {
00066 
00067 // These global variables are not MT-Safe, but I'm leaving them as is because
00068 // they are used only for debugging (set them in a debugger like gdb or ddd).
00069 // They are not static because I *believe* that many debuggers cannot access
00070 // static variables. 08/07/02 jhrg
00071 
00072 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
00073 int www_trace = 0;
00074 
00075 // Keep the temporary files; useful for debugging.
00076 int dods_keep_temps = 0;
00077 
00078 #define CLIENT_ERR_MIN 400
00079 #define CLIENT_ERR_MAX 417
00080 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00081     {
00082         "Bad Request:",
00083         "Unauthorized: Contact the server administrator.",
00084         "Payment Required.",
00085         "Forbidden: Contact the server administrator.",
00086         "Not Found: The data source or server could not be found.\n\
00087         Often this means that the OPeNDAP server is missing or needs attention;\n\
00088         Please contact the server administrator.",
00089         "Method Not Allowed.",
00090         "Not Acceptable.",
00091         "Proxy Authentication Required.",
00092         "Request Time-out.",
00093         "Conflict.",
00094         "Gone:.",
00095         "Length Required.",
00096         "Precondition Failed.",
00097         "Request Entity Too Large.",
00098         "Request URI Too Large.",
00099         "Unsupported Media Type.",
00100         "Requested Range Not Satisfiable.",
00101         "Expectation Failed."
00102     };
00103 
00104 #define SERVER_ERR_MIN 500
00105 #define SERVER_ERR_MAX 505
00106 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
00107     {
00108         "Internal Server Error.",
00109         "Not Implemented.",
00110         "Bad Gateway.",
00111         "Service Unavailable.",
00112         "Gateway Time-out.",
00113         "HTTP Version Not Supported."
00114     };
00115 
00118 static string
00119 http_status_to_string(int status)
00120 {
00121     if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00122         return string(http_client_errors[status - CLIENT_ERR_MIN]);
00123     else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00124         return string(http_server_errors[status - SERVER_ERR_MIN]);
00125     else
00126         return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00127 }
00128 
00133 class ParseHeader : public unary_function<const string &, void>
00134 {
00135     ObjectType type;  // What type of object is in the stream?
00136     string server;  // Server's version string.
00137     string protocol;            // Server's protocol version.
00138     string location;            // Url returned by server
00139 
00140 public:
00141     ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00142     { }
00143 
00144     void operator()(const string &line)
00145     {
00146         string name, value;
00147         parse_mime_header(line, name, value);
00148         if (name == "content-description") {
00149             DBG2(cerr << name << ": " << value << endl);
00150             type = get_description_type(value);
00151         }
00152         // The second test (== "dods/0.0") tests if xopendap-server has already
00153         // been seen. If so, use that header in preference to the old
00154         // XDODS-Server header. jhrg 2/7/06
00155         else if (name == "xdods-server" && server == "dods/0.0") {
00156             DBG2(cerr << name << ": " << value << endl);
00157             server = value;
00158         }
00159         else if (name == "xopendap-server") {
00160             DBG2(cerr << name << ": " << value << endl);
00161             server = value;
00162         }
00163         else if (name == "xdap") {
00164             DBG2(cerr << name << ": " << value << endl);
00165             protocol = value;
00166         }
00167         else if (server == "dods/0.0" && name == "server") {
00168             DBG2(cerr << name << ": " << value << endl);
00169             server = value;
00170         }
00171         else if (name == "location") {
00172             DBG2(cerr << name << ": " << value << endl);
00173             location = value;
00174         }
00175         else if (type == unknown_type && name == "content-type"
00176                  && line.find("text/html") != string::npos) {
00177             DBG2(cerr << name << ": text/html..." << endl);
00178             type = web_error;
00179         }
00180     }
00181 
00182     ObjectType get_object_type()
00183     {
00184         return type;
00185     }
00186 
00187     string get_server()
00188     {
00189         return server;
00190     }
00191 
00192     string get_protocol()
00193     {
00194         return protocol;
00195     }
00196 
00197     string get_location() {
00198            return location;
00199     }
00200 };
00201 
00218 static size_t
00219 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00220 {
00221     DBG2(cerr << "Inside the header parser." << endl);
00222     vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00223 
00224     // Grab the header, minus the trailing newline. Or \r\n pair.
00225     string complete_line;
00226     if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00227         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00228     else
00229         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00230 
00231     // Store all non-empty headers that are not HTTP status codes
00232     if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00233         DBG(cerr << "Header line: " << complete_line << endl);
00234         hdrs->push_back(complete_line);
00235     }
00236 
00237     return size * nmemb;
00238 }
00239 
00241 static int
00242 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
00243 {
00244     string message(msg, size);
00245 
00246     switch (info) {
00247     case CURLINFO_TEXT:
00248         cerr << "Text: " << message; break;
00249     case CURLINFO_HEADER_IN:
00250         cerr << "Header in: " << message; break;
00251     case CURLINFO_HEADER_OUT:
00252         cerr << "Header out: " << message; break;
00253     case CURLINFO_DATA_IN:
00254         cerr << "Data in: " << message; break;
00255     case CURLINFO_DATA_OUT:
00256         cerr << "Data out: " << message; break;
00257     case CURLINFO_END:
00258         cerr << "End: " << message; break;
00259 #ifdef CURLINFO_SSL_DATA_IN
00260     case CURLINFO_SSL_DATA_IN:
00261         cerr << "SSL Data in: " << message; break;
00262 #endif
00263 #ifdef CURLINFO_SSL_DATA_OUT
00264     case CURLINFO_SSL_DATA_OUT:
00265         cerr << "SSL Data out: " << message; break;
00266 #endif
00267     default:
00268         cerr << "Curl info: " << message; break;
00269     }
00270     return 0;
00271 }
00272 
00276 void
00277 HTTPConnect::www_lib_init()
00278 {
00279     d_curl = curl_easy_init();
00280     if (!d_curl)
00281         throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00282 
00283     // Now set options that will remain constant for the duration of this
00284     // CURL object.
00285 
00286     // Set the proxy host.
00287     if (!d_rcr->get_proxy_server_host().empty()) {
00288         DBG(cerr << "Setting up a proxy server." << endl);
00289         DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00290             << endl);
00291         DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00292             << endl);
00293         DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00294             << endl);
00295         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00296                          d_rcr->get_proxy_server_host().c_str());
00297         curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00298                          d_rcr->get_proxy_server_port());
00299 
00300         // As of 4/21/08 only NTLM, Digest and Basic work.
00301 #ifdef CURLOPT_PROXYAUTH
00302         curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
00303 #endif
00304 
00305         // Password might not be required. 06/21/04 jhrg
00306         if (!d_rcr->get_proxy_server_userpw().empty())
00307             curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00308                              d_rcr->get_proxy_server_userpw().c_str());
00309     }
00310 
00311     curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00312     // We have to set FailOnError to false for any of the non-Basic
00313     // authentication schemes to work. 07/28/03 jhrg
00314     curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00315 
00316     // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
00317     // choosing the the 'safest' one supported by the server.
00318     // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
00319     curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00320 
00321     curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00322     curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00323     curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00324     // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
00325     // param of save_raw_http_headers to a vector<string> object.
00326 
00327     // Follow 302 (redirect) responses
00328     curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
00329     curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
00330 
00331     // If the user turns off SSL validation...
00332     if (!d_rcr->get_validate_ssl() == 0) {
00333         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00334         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00335     }
00336 
00337     // Look to see if cookies are turned on in the .dodsrc file. If so,
00338     // activate here. We honor 'session cookies' (cookies without an
00339     // expiration date) here so that session-base SSO systems will work as
00340     // expected.
00341     if (!d_cookie_jar.empty()) {
00342         DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
00343         curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
00344         curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
00345     }
00346 
00347     if (www_trace) {
00348         cerr << "Curl version: " << curl_version() << endl;
00349         curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00350         curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00351     }
00352 }
00353 
00357 class BuildHeaders : public unary_function<const string &, void>
00358 {
00359     struct curl_slist *d_cl;
00360 
00361 public:
00362     BuildHeaders() : d_cl(0)
00363     {}
00364 
00365     void operator()(const string &header)
00366     {
00367         DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00368             << endl);
00369         d_cl = curl_slist_append(d_cl, header.c_str());
00370     }
00371 
00372     struct curl_slist *get_headers()
00373     {
00374         return d_cl;
00375     }
00376 };
00377 
00392 long
00393 HTTPConnect::read_url(const string &url, FILE *stream,
00394                       vector<string> *resp_hdrs,
00395                       const vector<string> *headers)
00396 {
00397     curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00398 
00399 #ifdef WIN32
00400     //  See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
00401     //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
00402     //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
00403     //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
00404     //  this issue is that one should not pass a FILE * to a windows DLL.  Close
00405     //  inspection of libcurl yields that their default write function when using
00406     //  the CURLOPT_WRITEDATA is just "fwrite".
00407     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00408     curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00409 #else
00410     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00411 #endif
00412 
00413     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00414              ostream_iterator<string>(cerr, "\n")));
00415 
00416     BuildHeaders req_hdrs;
00417     req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00418                         req_hdrs);
00419     if (headers)
00420         req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00421     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00422 
00423     // Turn off the proxy for this URL?
00424     bool temporary_proxy = false;
00425     if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00426         DBG(cerr << "Suppress proxy for url: " << url << endl);
00427         curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00428     }
00429 
00430     string::size_type at_sign = url.find('@');
00431     // Assume username:password present *and* assume it's an HTTP URL; it *is*
00432     // HTTPConnect, after all. 7 is position after "http://"; the second arg
00433     // to substr() is the sub string length.
00434     if (at_sign != url.npos)
00435         d_upstring = url.substr(7, at_sign - 7);
00436 
00437     if (!d_upstring.empty())
00438         curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00439 
00440     // Pass save_raw_http_headers() a pointer to the vector<string> where the
00441     // response headers may be stored. Callers can use the resp_hdrs
00442     // value/result parameter to get the raw response header information .
00443     curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00444 
00445     CURLcode res = curl_easy_perform(d_curl);
00446 
00447     // Free the header list and null the value in d_curl.
00448     curl_slist_free_all(req_hdrs.get_headers());
00449     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00450 
00451     // Reset the proxy?
00452     if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00453         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00454                          d_rcr->get_proxy_server_host().c_str());
00455 
00456     if (res != 0)
00457         throw Error(d_error_buffer);
00458 
00459     long status;
00460     res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00461     if (res != 0)
00462         throw Error(d_error_buffer);
00463 
00464     return status;
00465 }
00466 
00470 bool
00471 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00472 {
00473     if (d_rcr->is_proxy_for_used()) {
00474         Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00475         int index = 0, matchlen;
00476         return host_regex.search(url.c_str(), url.size(), matchlen, index)
00477                != -1;
00478     }
00479 
00480     return false;
00481 }
00482 
00486 bool
00487 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00488 {
00489     return d_rcr->is_no_proxy_for_used()
00490            && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00491 }
00492 
00493 // Public methods. Mostly...
00494 
00501 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
00502                                           d_cookie_jar(""),
00503                                           d_dap_client_protocol_major(2),
00504                                           d_dap_client_protocol_minor(0)
00505 
00506 {
00507     d_accept_deflate = rcr->get_deflate();
00508     d_rcr = rcr;
00509 
00510     // Load in the default headers to send with a request. The empty Pragma
00511     // headers overrides libcurl's default Pragma: no-cache header (which
00512     // will disable caching by Squid, et c.). The User-Agent header helps
00513     // make server logs more readable. 05/05/03 jhrg
00514     d_request_headers.push_back(string("Pragma:"));
00515     string user_agent = string("User-Agent: ") + string(CNAME)
00516                         + string("/") + string(CVER);
00517     d_request_headers.push_back(user_agent);
00518     if (d_accept_deflate)
00519         d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00520 
00521     // HTTPCache::instance returns a valid ptr or 0.
00522     if (d_rcr->get_use_cache())
00523         d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00524                                            true);
00525     else
00526         d_http_cache = 0;
00527 
00528     DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00529          << ")" << endl);
00530 
00531     if (d_http_cache) {
00532         d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00533         d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00534         d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00535         d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00536         d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00537         d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00538     }
00539 
00540     d_cookie_jar = rcr->get_cookie_jar();
00541 
00542     www_lib_init();  // This may throw either Error or InternalErr
00543 }
00544 
00545 HTTPConnect::~HTTPConnect()
00546 {
00547     DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00548 
00549     curl_easy_cleanup(d_curl);
00550 
00551     DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00552 }
00553 
00566 HTTPResponse *
00567 HTTPConnect::fetch_url(const string &url)
00568 {
00569 #ifdef HTTP_TRACE
00570     cout << "GET " << url << " HTTP/1.0" << endl;
00571 #endif
00572 
00573     HTTPResponse *stream;
00574 
00575     if (d_http_cache && d_http_cache->is_cache_enabled()) {
00576         stream = caching_fetch_url(url);
00577     }
00578     else {
00579         stream = plain_fetch_url(url);
00580     }
00581 
00582 #ifdef HTTP_TRACE
00583     stringstream ss;
00584     ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00585     for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00586         ss << stream->get_headers()->at(i) << endl;
00587     }
00588     cout << ss.str();
00589 #endif
00590 
00591     ParseHeader parser;
00592 
00593     parser = for_each(stream->get_headers()->begin(),
00594                       stream->get_headers()->end(), ParseHeader());
00595 
00596 #ifdef HTTP_TRACE
00597     cout << endl << endl;
00598 #endif
00599 
00600     // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
00601     if (parser.get_location() != "" &&
00602         url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00603            return fetch_url(parser.get_location());
00604     }
00605 
00606     stream->set_type(parser.get_object_type());
00607     stream->set_version(parser.get_server());
00608     stream->set_protocol(parser.get_protocol());
00609 
00610     return stream;
00611 }
00612 
00613 // Look around for a reasonable place to put a temporary file. Check first
00614 // the value of the TMPDIR env var. If that does not yeild a path that's
00615 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00616 // defined in stdio.h. If both come up empty, then use `./'.
00617 //
00618 // This function allocates storage using new. The caller must delete the char
00619 // array.
00620 
00621 // Change this to a version that either returns a string or an open file
00622 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00623 // (see open()) to make it more secure. Ideal solution: get deserialize()
00624 // methods to read from a stream returned by libcurl, not from a temporary
00625 // file. 9/21/07 jhrg
00626 static char *
00627 get_tempfile_template(const char *file_template)
00628 {
00629     const char *c;
00630 
00631 #ifdef WIN32
00632     // whitelist for a WIN32 directory
00633     Regex directory("[-a-zA-Z0-9_\\]*");
00634 
00635     c = getenv("TEMP");
00636     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00637         goto valid_temp_directory;
00638 
00639     c= getenv("TMP");
00640     if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
00641         goto valid_temp_directory;
00642 #else
00643         // whitelist for a directory
00644         Regex directory("[-a-zA-Z0-9_/]*");
00645 
00646         c = getenv("TMPDIR");
00647         if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
00648         goto valid_temp_directory;
00649 
00650 #ifdef P_tmpdir
00651         if (access(P_tmpdir, W_OK | R_OK) == 0) {
00652         c = P_tmpdir;
00653         goto valid_temp_directory;
00654         }
00655 #endif
00656 
00657 #endif  // WIN32
00658 
00659     c = ".";
00660 
00661 valid_temp_directory:
00662         // Sanitize allocation
00663         int size = strlen(c) + strlen(file_template) + 2;
00664         if (!size_ok(1, size))
00665                 throw Error("Bad temporary file name.");
00666 
00667     char *temp = new char[size];
00668     strncpy(temp, c, size-2);
00669     strcat(temp, "/");
00670 
00671     strcat(temp, file_template);
00672 
00673     return temp;
00674 }
00675 
00694 string
00695 get_temp_file(FILE *&stream) throw(InternalErr)
00696 {
00697     // get_tempfile_template() uses new, must call delete
00698     char *dods_temp = get_tempfile_template("dodsXXXXXX");
00699 
00700     // Open truncated for update. NB: mkstemp() returns a file descriptor.
00701 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00702     stream = fopen(_mktemp(dods_temp), "w+b");
00703 #else
00704     stream = fdopen(mkstemp(dods_temp), "w+");
00705 #endif
00706 
00707     if (!stream)
00708         throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
00709 
00710     string dods_temp_s = dods_temp;
00711     delete[] dods_temp; dods_temp = 0;
00712 
00713     return dods_temp_s;
00714 }
00715 
00717 void
00718 close_temp(FILE *s, const string &name)
00719 {
00720     int res = fclose(s);
00721     if (res) {
00722         DBG(cerr << "Counld not close the temporary file: " << name << endl);
00723     }
00724 
00725     unlink(name.c_str());
00726 }
00727 
00749 HTTPResponse *
00750 HTTPConnect::caching_fetch_url(const string &url)
00751 {
00752     DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00753 
00754     vector<string> *headers = new vector<string> ;
00755     FILE *s = d_http_cache->get_cached_response(url, *headers);
00756     if (!s) {
00757         // url not in cache; get it and cache it
00758         DBGN(cerr << "no; getting response and caching." << endl);
00759         time_t now = time(0);
00760         HTTPResponse *rs = plain_fetch_url(url);
00761         d_http_cache->cache_response(url, now, *(rs->get_headers()),
00762                 rs->get_stream());
00763 
00764         return rs;
00765     }
00766     else { // url in cache
00767         DBGN(cerr << "yes... ");
00768 
00769         if (d_http_cache->is_url_valid(url)) { // url in cache and valid
00770             DBGN(cerr << "and it's valid; using cached response." << endl);
00771             HTTPCacheResponse *crs =
00772                     new HTTPCacheResponse(s, 200, headers, d_http_cache);
00773             return crs;
00774         }
00775         else { // url in cache but not valid; validate
00776             DBGN(cerr << "but it's not valid; validating... ");
00777 
00778             d_http_cache->release_cached_response(s);
00779 
00780             vector<string> *resp_hdrs = new vector<string> ;
00781             vector<string> cond_hdrs =
00782                     d_http_cache->get_conditional_request_headers(url);
00783             FILE *body = 0;
00784             string dods_temp = get_temp_file(body);
00785             time_t now = time(0); // When was the request made (now).
00786             long http_status;
00787 
00788             try {
00789                 http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
00790                 rewind(body);
00791             }
00792             catch (Error &e) {
00793                 close_temp(body, dods_temp);
00794                 throw ;
00795             }
00796 
00797             switch (http_status) {
00798                 case 200: { // New headers and new body
00799                     DBGN(cerr << "read a new response; caching." << endl);
00800 
00801                     d_http_cache->cache_response(url, now, *resp_hdrs, body);
00802                     HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
00803                             dods_temp);
00804 
00805                     return rs;
00806                 }
00807 
00808                 case 304: { // Just new headers, use cached body
00809                     DBGN(cerr << "cached response valid; updating." << endl);
00810 
00811                     close_temp(body, dods_temp);
00812                     d_http_cache->update_response(url, now, *resp_hdrs);
00813 
00814                     vector<string> *headers = new vector<string>;
00815                     FILE *hs = d_http_cache->get_cached_response(url, *headers);
00816                     HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, d_http_cache);
00817                     return crs;
00818                 }
00819 
00820                 default: { // Oops.
00821                     close_temp(body, dods_temp);
00822                     if (http_status >= 400) {
00823                         string msg = "Error while reading the URL: ";
00824                         msg += url;
00825                         msg
00826                         += ".\nThe OPeNDAP server returned the following message:\n";
00827                         msg += http_status_to_string(http_status);
00828                         throw Error(msg);
00829                     }
00830                     else {
00831                         throw InternalErr(__FILE__, __LINE__,
00832                                 "Bad response from the HTTP server: " + long_to_string(http_status));
00833                     }
00834                 }
00835             }
00836         }
00837     }
00838 
00839     throw InternalErr(__FILE__, __LINE__, "Should never get here");
00840 }
00841 
00853 HTTPResponse *
00854 HTTPConnect::plain_fetch_url(const string &url)
00855 {
00856     DBG(cerr << "Getting URL: " << url << endl);
00857     FILE *stream = 0;
00858     string dods_temp = get_temp_file(stream);
00859     vector<string> *resp_hdrs = new vector<string>;
00860 
00861     int status = -1;
00862     try {
00863         status = read_url(url, stream, resp_hdrs); // Throws Error.
00864         if (status >= 400) {
00865             string msg = "Error while reading the URL: ";
00866             msg += url;
00867             msg += ".\nThe OPeNDAP server returned the following message:\n";
00868             msg += http_status_to_string(status);
00869             throw Error(msg);
00870         }
00871     }
00872 
00873     catch (Error &e) {
00874         close_temp(stream, dods_temp);
00875         throw e;
00876     }
00877 
00878     rewind(stream);
00879 
00880     return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00881 }
00882 
00894 void
00895 HTTPConnect::set_accept_deflate(bool deflate)
00896 {
00897     d_accept_deflate = deflate;
00898 
00899     if (d_accept_deflate) {
00900         if (find(d_request_headers.begin(), d_request_headers.end(),
00901                  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00902             d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00903         DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00904                  ostream_iterator<string>(cerr, "\n")));
00905     }
00906     else {
00907         vector<string>::iterator i;
00908         i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00909                       bind2nd(equal_to<string>(),
00910                               string("Accept-Encoding: deflate, gzip, compress")));
00911         d_request_headers.erase(i, d_request_headers.end());
00912     }
00913 }
00914 
00916 class HeaderMatch : public unary_function<const string &, bool> {
00917     const string &d_header;
00918     public:
00919         HeaderMatch(const string &header) : d_header(header) {}
00920         bool operator()(const string &arg) { return arg.find(d_header) == 0; }
00921 };
00922 
00931 void
00932 HTTPConnect::set_xdap_protocol(int major, int minor)
00933 {
00934     // Look for, and remove if one exists, an XDAP-Accept header
00935     vector<string>::iterator i;
00936     i = find_if(d_request_headers.begin(), d_request_headers.end(),
00937                 HeaderMatch("XDAP-Accept:"));
00938     if (i != d_request_headers.end())
00939         d_request_headers.erase(i);
00940 
00941     // Record and add the new header value
00942     d_dap_client_protocol_major = major;
00943     d_dap_client_protocol_minor = minor;
00944     ostringstream xdap_accept;
00945     xdap_accept << "XDAP-Accept: " << major << "." << minor;
00946 
00947     d_request_headers.push_back(xdap_accept.str());
00948 
00949     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00950              ostream_iterator<string>(cerr, "\n")));
00951 }
00952 
00968 void
00969 HTTPConnect::set_credentials(const string &u, const string &p)
00970 {
00971     if (u.empty())
00972         return;
00973 
00974     // Store the credentials locally.
00975     d_username = u;
00976     d_password = p;
00977 
00978     d_upstring = u + ":" + p;
00979 }
00980 
00981 } // namespace libdap