bes  Updated for version 3.20.8
retriever.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 
5 // Copyright (c) 2016 OPeNDAP, Inc.
6 // Author: Nathan Potter <ndp@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 
25 #include "config.h"
26 
27 #include <fcntl.h>
28 
29 #include <unistd.h>
30 #include <time.h>
31 
32 #include <cstdlib>
33 #include <cstring>
34 #include <cassert>
35 #include <cerrno>
36 #include <sstream>
37 #include <iostream>
38 #include <fstream>
39 #include <GetOpt.h>
40 
41 #include <curl/curl.h>
42 
43 
44 #include "D4Dimensions.h"
45 #include "D4StreamMarshaller.h"
46 
47 #include "BESInternalError.h"
48 #include "BESUtil.h"
49 #include "CurlUtils.h"
50 #include "TheBESKeys.h"
51 #include "BESLog.h"
52 #include "BESDebug.h"
53 #include "BESStopWatch.h"
54 
55 #include "awsv4.h"
56 #include "HttpNames.h"
57 #include "EffectiveUrl.h"
58 #include "EffectiveUrlCache.h"
59 #include "RemoteResource.h"
60 
61 #include "Chunk.h"
62 #include "CredentialsManager.h"
63 #include "AccessCredentials.h"
64 #include "CredentialsManager.h"
65 #include "CurlHandlePool.h"
66 #include "DmrppCommon.h"
67 #include "DmrppRequestHandler.h"
68 #include "DmrppByte.h"
69 #include "DmrppArray.h"
70 #include "DMRpp.h"
71 #include "DmrppTypeFactory.h"
72 #include "DmrppD4Group.h"
73 #include "DmrppParserSax2.h"
74 
75 //#include <memory>
76 //#include <iterator>
77 //#include <algorithm>
78 
79 
80 bool Debug = false;
81 bool debug = false;
82 bool bes_debug = false;
83 
84 using std::cerr;
85 using std::endl;
86 using std::string;
87 
88 #define prolog std::string("retriever::").append(__func__).append("() - ")
89 
90 #define NULL_BODY_HASH "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
91 
92 
97 string get_errno() {
98  char *s_err = strerror(errno);
99  if (s_err)
100  return s_err;
101  else
102  return "Unknown error.";
103 }
104 
105 
115 dmrpp::DmrppRequestHandler *bes_setup(
116  const string &bes_config_file,
117  const string &bes_log_file,
118  const string &bes_debug_log_file,
119  const string &bes_debug_keys,
120  const string &http_netrc_file,
121  const string &http_cache_dir
122 ) {
123  if (debug) cerr << prolog << "BEGIN" << endl;
124 
125  TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
126  TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
127  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
128  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
129 
130  if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
131 
132 
133  if (!http_netrc_file.empty()) {
134  TheBESKeys::TheKeys()->set_key(HTTP_NETRC_FILE_KEY, http_netrc_file, false); // Set the netrc file
135  }
136 
137  if (!http_cache_dir.empty()) {
138  TheBESKeys::TheKeys()->set_key(HTTP_CACHE_DIR_KEY, http_cache_dir, false); // Set the netrc file
139  }
140 
141  // Initialize the dmr++ goodness.
142  auto foo = new dmrpp::DmrppRequestHandler("Chaos");
143 
144  if (debug) cerr << prolog << "END" << endl;
145  return foo;
146 }
147 
148 curl_slist *aws_sign_request_url(const string &target_url, curl_slist *request_headers) {
149 
150  if (debug) cerr << prolog << "BEGIN" << endl;
151 
152  AccessCredentials *credentials = CredentialsManager::theCM()->get(target_url);
153  if (credentials && credentials->is_s3_cred()) {
154  if (debug)
155  cerr << prolog << "Got AWS S3 AccessCredentials instance: " << endl << credentials->to_json() << endl;
156  // If there are available credentials, and they are S3 credentials then we need to sign
157  // the request
158  const std::time_t request_time = std::time(0);
159 
160  const std::string auth_header =
161  AWSV4::compute_awsv4_signature(
162  target_url,
163  request_time,
164  credentials->get(AccessCredentials::ID_KEY),
165  credentials->get(AccessCredentials::KEY_KEY),
166  credentials->get(AccessCredentials::REGION_KEY),
167  "s3");
168 
169  // passing nullptr for the first call allocates the curl_slist
170  // The following code builds the slist that holds the headers. This slist is freed
171  // once the URL is dereferenced in dmrpp_easy_handle::read_data(). jhrg 11/26/19
172  request_headers = curl::append_http_header(request_headers, "Authorization", auth_header);
173 
174  // We pre-compute the sha256 hash of a null message body
175  request_headers = curl::append_http_header(request_headers, "x-amz-content-sha256", NULL_BODY_HASH);
176  request_headers = curl::append_http_header(request_headers, "x-amz-date", AWSV4::ISO8601_date(request_time));
177  }
178  if (debug) cerr << prolog << "END" << endl;
179  return request_headers;
180 }
181 
187 size_t get_remote_size(string url, bool aws_signing) {
188  if (debug) cerr << prolog << "BEGIN" << endl;
189 
190  char error_buffer[CURL_ERROR_SIZE];
191  std::vector<std::string> resp_hdrs;
192  curl_slist *request_headers = NULL;
193 
194  request_headers = curl::add_auth_headers(request_headers);
195 
196  if (aws_signing)
197  request_headers = aws_sign_request_url(url, request_headers);
198 
199  CURL *ceh = curl::init(url, request_headers, &resp_hdrs);
200  curl::set_error_buffer(ceh, error_buffer);
201 
202  // In cURLville, CURLOPT_NOBODY means a HEAD request i.e. Don't send the response body a.k.a. "NoBody"
203  CURLcode curl_status = curl_easy_setopt(ceh, CURLOPT_NOBODY, 1L);
204  curl::eval_curl_easy_setopt_result(curl_status, prolog, "CURLOPT_NOBODY", error_buffer, __FILE__, __LINE__);
205 
206  if (Debug) cerr << prolog << "cURL HEAD request is configured" << endl;
207 
208  curl::super_easy_perform(ceh);
209 
210  curl::unset_error_buffer(ceh);
211  if (request_headers)
212  curl_slist_free_all(request_headers);
213  if (ceh)
214  curl_easy_cleanup(ceh);
215 
216  bool done = false;
217  size_t how_big_it_is = 0;
218  string content_length_hdr_key("content-length: ");
219  for (size_t i = 0; !done && i < resp_hdrs.size(); i++) {
220  if (Debug) cerr << prolog << "HEADER[" << i << "]: " << resp_hdrs[i] << endl;
221  string lc_header = BESUtil::lowercase(resp_hdrs[i]);
222  size_t index = lc_header.find(content_length_hdr_key);
223  if (index == 0) {
224  string value = lc_header.substr(content_length_hdr_key.size());
225  how_big_it_is = stol(value);
226  done = true;
227  }
228  }
229  if (!done)
230  throw BESInternalError(prolog + "Failed to determine size of target resource: " + url, __FILE__, __LINE__);
231 
232  if (debug) cerr << prolog << "END" << endl;
233 
234  return how_big_it_is;
235 }
236 size_t get_max_retrival_size(const size_t &max_target_size, const string &effectiveUrl) {
237  size_t target_size = max_target_size;
238  if (max_target_size == 0) {
239  target_size = get_remote_size(effectiveUrl, true);
240  if (debug) cerr << prolog << "Remote resource size is " << max_target_size << " bytes. " << endl;
241  }
242  return target_size;
243 }
244 
250 void simple_get(const string target_url, const string output_file_base) {
251 
252  string output_file = output_file_base + "_simple_get.out";
253  vector<string> resp_hdrs;
254  mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
255  int fd;
256  if ((fd = open(output_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode)) < 0) {
257  throw BESInternalError(get_errno(), __FILE__, __LINE__);
258  }
259  {
260  BESStopWatch sw;
261  sw.start(prolog + "url: " + target_url);
262  curl::http_get_and_write_resource(target_url, fd,
263  &resp_hdrs); // Throws BESInternalError if there is a curl error.
264  }
265  close(fd);
266 
267  if (Debug) {
268  for (size_t i = 0; i < resp_hdrs.size(); i++) {
269  cerr << prolog << "ResponseHeader[" << i << "]: " << resp_hdrs[i] << endl;
270  }
271  }
272 }
273 
274 
282 void make_chunks(const string &target_url, const size_t &target_size, const size_t &chunk_count,
283  vector<dmrpp::Chunk *> &chunks) {
284  if (debug) cerr << prolog << "BEGIN" << endl;
285  size_t chunk_size = target_size / chunk_count;
286  size_t chunk_start = 0;
287  size_t chunk_index;
288  for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
289  vector<unsigned int> position_in_array;
290  position_in_array.push_back(chunk_index);
291  if (debug)
292  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
293  << chunk_size << endl;
294  auto chunk = new dmrpp::Chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
295  chunk_start += chunk_size;
296  chunks.push_back(chunk);
297  }
298  if (target_size % chunk_size) {
299  // So there's a remainder and we should make a final chunk for it too.
300  size_t last_chunk_size = target_size - chunk_start;
301  if (debug)
302  cerr << prolog << "Remainder chunk. chunk[" << chunks.size() << "] last_chunk_size: " << last_chunk_size
303  << endl;
304  if (debug)
305  cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
306  << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
307  if (last_chunk_size > 0) {
308  vector<unsigned int> position_in_array;
309  position_in_array.push_back(chunk_index);
310  if (debug)
311  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
312  << last_chunk_size << endl;
313  auto last_chunk = new dmrpp::Chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
314  chunks.push_back(last_chunk);
315  }
316  }
317  if (debug) cerr << prolog << "END chunks: " << chunks.size() << endl;
318 }
319 
320 
327 void serial_chunky_get(const string &target_url, const size_t target_size, const unsigned long chunk_count,
328  const string &output_file_base) {
329 
330  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
331  if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
332  size_t retrieval_size = get_max_retrival_size(target_size, effectiveUrl);
333 
334  string output_file = output_file_base + "_serial_chunky_get.out";
335  vector<dmrpp::Chunk *> chunks;
336  make_chunks(target_url, retrieval_size, chunk_count, chunks);
337 
338  std::ofstream ofs;
339  ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
340  if (ofs.fail())
341  throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
342 
343  for (size_t i = 0; i < chunks.size(); i++) {
344  stringstream ss;
345  ss << prolog << "chunk={index: " << i << ", offset: " << chunks[i]->get_offset() << ", size: "
346  << chunks[i]->get_size() << "}";
347 
348  {
349  BESStopWatch sw;
350  sw.start(ss.str());
351  chunks[i]->read_chunk();
352  }
353 
354  if (debug) cerr << ss.str() << " retrieval from: " << target_url << " completed, timing finished." << endl;
355  ofs.write(chunks[i]->get_rbuf(), chunks[i]->get_rbuf_size());
356  if (debug) cerr << ss.str() << " has been written to: " << output_file << endl;
357  }
358  auto itr = chunks.begin();
359  while (itr != chunks.end()) {
360  delete *itr;
361  *itr = 0;
362  itr++;
363  }
364 
365 }
366 
367 
368 void parse_dmrpp(const string &dmrpp_filename_url){
369  if(debug) cerr << prolog << "BEGIN" << endl;
370 
371  dmrpp::DmrppParserSax2 parser;
372  string target_file_url = dmrpp_filename_url;
373  string target_file;
374 
375  const string http_protocol("http://");
376  const string https_protocol("https://");
377  const string file_protocol("file://");
378 
379  if(debug) cerr << prolog << "dmrpp_filename_url: " << dmrpp_filename_url << endl;
380 
381  if(target_file_url.empty())
382  throw BESInternalError(prolog + "The dmr++ filename was empty.", __FILE__, __LINE__);
383 
384 
385  if(target_file_url.rfind(http_protocol,0)==0 || target_file_url.rfind(https_protocol,0)==0 ){
386  // Use RemoteResource to get the thing.
387  http::RemoteResource target_resource(target_file_url,prolog+"Timer");
388  target_resource.retrieveResource();
389  target_file = target_resource.getCacheFileName();
390  }
391  else if(target_file_url.rfind(file_protocol,0)==0){
392  target_file = target_file_url.substr(file_protocol.length());
393  }
394  else {
395  target_file_url = file_protocol + target_file_url;
396  }
397 
398  if(debug) cerr << prolog << " target_file: " << target_file << endl;
399 
400  ifstream ifs(target_file);
401  if(ifs.fail())
402  throw BESInternalError(prolog + "Failed open to dmr++ file: " + dmrpp_filename_url, __FILE__, __LINE__);
403 
404  dmrpp::DmrppTypeFactory factory;
405  dmrpp::DMRpp dmr(&factory);
406  dmr.set_href(target_file_url);
407  stringstream msg;
408  msg << prolog << dmrpp_filename_url;
409  {
410  BESStopWatch sw;
411  sw.start(msg.str());
412  parser.intern(ifs, &dmr);
413  }
414 
415  if (Debug) {
416  cerr << prolog << "Built dataset: " << endl;
418  libdap::XMLWriter xmlWriter;
419  dmr.print_dmrpp(xmlWriter, dmr.get_href());
420  cerr << xmlWriter.get_doc() << endl;
421  }
422  if(debug) cerr << prolog << "END" << endl;
423 
424 
425 }
426 
427 
428 
435 void add_chunks(const string &target_url, const size_t &target_size, const size_t &chunk_count,
436  dmrpp::DmrppArray *target_array) {
437 
438  if (debug) cerr << prolog << "BEGIN" << endl;
439 
440  size_t chunk_size = target_size / chunk_count;
441  if (chunk_size == 0)
442  throw BESInternalError(prolog + "Chunk size was zero.", __FILE__, __LINE__);
443  stringstream chunk_dim_size;
444  chunk_dim_size << chunk_size;
445  target_array->parse_chunk_dimension_sizes(chunk_dim_size.str());
446 
447  size_t chunk_start = 0;
448  size_t chunk_index;
449  for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
450  vector<unsigned int> position_in_array;
451  position_in_array.push_back(chunk_start);
452  if (debug)
453  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
454  << chunk_size << " chunk_poa: " << position_in_array[0] << endl;
455  target_array->add_chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
456  chunk_start += chunk_size;
457  }
458  if (target_size % chunk_size) {
459  // So there's a remainder and we should make a final chunk for it too.
460  size_t last_chunk_size = target_size - chunk_start;
461  if (debug)
462  cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
463  << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
464  if (last_chunk_size > 0) {
465  vector<unsigned int> position_in_array;
466  position_in_array.push_back(chunk_start);
467  if (debug)
468  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
469  << last_chunk_size << " chunk_poa: " << position_in_array[0] << endl;
470  target_array->add_chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
471  }
472  }
473  if (debug) cerr << prolog << "END" << endl;
474 }
475 
476 
477 
485 size_t array_get(const string &target_url, const size_t &target_size, const size_t &chunk_count,
486  const string &output_file_base) {
487 
488  if (debug) cerr << prolog << "BEGIN" << endl;
489  string output_file = output_file_base + "_array_get.out";
490  std::ofstream ofs;
491  ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
492  if (ofs.fail())
493  throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
494 
495  auto *tmplt = new dmrpp::DmrppByte("data");
496  auto *target_array = new dmrpp::DmrppArray("data", tmplt);
497  delete tmplt; // Because the Vector() constructor made a copy and it's our problem...
498 
499  target_array->append_dim(target_size);
500  add_chunks(target_url, target_size, chunk_count, target_array);
501  target_array->set_send_p(true); // Mark it to be sent so that it will be read.
502 
503  dmrpp::DmrppTypeFactory factory;
504  dmrpp::DMRpp dmr(&factory);
505  dmr.set_href(target_url);
506  dmrpp::DmrppD4Group *root = dynamic_cast<dmrpp::DmrppD4Group *>(dmr.root());
507  root->add_var_nocopy(target_array);
508  root->set_in_selection(true);
509 
510  if (debug) {
511  cerr << prolog << "Built dataset: " << endl;
513  libdap::XMLWriter xmlWriter;
514  dmr.print_dmrpp(xmlWriter, dmr.get_href());
515  cerr << xmlWriter.get_doc() << endl;
516  }
517 
518  {
519  stringstream timer_msg;
520  timer_msg << prolog << "DmrppD4Group.intern_data() for " << target_size << " bytes in " << chunk_count <<
521  " chunks, parallel transfers ";
522  if (dmrpp::DmrppRequestHandler::d_use_parallel_transfers) {
523  timer_msg << "enabled. (max: " << dmrpp::DmrppRequestHandler::d_max_parallel_transfers << ")";
524  } else {
525  timer_msg << "disabled.";
526  }
527  BESStopWatch sw;
528  sw.start(timer_msg.str());
529  root->intern_data();
530  }
531 
532  size_t started = ofs.tellp();
533  libdap::D4StreamMarshaller streamMarshaller(ofs);
534  root->serialize(streamMarshaller, dmr);
535 
536  size_t stopped = ofs.tellp();
537  size_t numberOfBytesWritten = stopped - started;
538  if (debug) cerr << prolog << "target_size: " << target_size << " numberOfBytesWritten: " << numberOfBytesWritten << endl;
539 
540  // delete target_array; // Don't have to delete this because we added it to the DMR using add_var_nocopy()
541  if (debug) cerr << prolog << "END" << endl;
542  return numberOfBytesWritten;
543 
544 }
545 
546 
547 
576 #if 0
577 int test_plan_01(const string &target_url,
578  const string &output_prefix,
579  const unsigned int reps,
580  const size_t retrieval_size,
581  const unsigned int power_of_two_chunk_count,
582  const unsigned int power_of_two_threads_max,
583  const string &output_file_base
584  ) {
585  int result = 0;
586  if (debug)
587  cerr << prolog << "BEGIN" << endl;
588 
589  try {
590  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
591  if (debug)
592  cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
593  size_t target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
594 
595  // Outer loop on chunk size
596  size_t chunk_count = 2;
597  for (size_t chunk_pwr = 1; chunk_pwr <= power_of_two_chunk_count; chunk_pwr++) {
598 
599  // We turn off parallel transfers to get a baseline that is the single threaded, serial retrieval of the chunks.
600  dmrpp::DmrppRequestHandler::d_use_parallel_transfers = false;
601  for ( unsigned int rep = 0; rep < reps; rep++) {
602  array_get(effectiveUrl, target_size, chunk_count, output_file_base );
603  }
604 
605  // Now we enable threads and starting with 2 work up to power_of_two_threads_max
606  dmrpp::DmrppRequestHandler::d_use_parallel_transfers = true;
607  unsigned int thread_count = 2;
608  for ( unsigned int tpwr = 1; tpwr <= power_of_two_threads_max; tpwr++) {
609  dmrpp::DmrppRequestHandler::d_max_parallel_transfers = thread_count;
610  for ( unsigned int rep = 0; rep < reps; rep++) {
611  array_get(effectiveUrl, target_size, chunk_count, output_file_base);
612  }
613  thread_count *= 2;
614  }
615  chunk_count *= 2;
616  }
617  }
618  catch (
619  BESError e
620  ) {
621  cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file()<< ":" << e. get_line() << endl;
622  result = 1;
623  }
624  catch (...) {
625  cerr << prolog << "Caught Unknown Exception." <<
626  endl;
627  result = 2;
628  }
629  cerr << prolog << "END" << endl;
630  return result;
631 }
632 #endif
633 
640 int main(int argc, char *argv[]) {
641 
642  int result = 0;
643  string bes_log_file;
644  string bes_debug_log_file = "cerr";
645  string bes_debug_keys = "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr";
646  string target_url = "https://www.opendap.org/pub/binary/hyrax-1.16/centos-7.x/bes-debuginfo-3.20.7-1.static.el7.x86_64.rpm";
647  string output_file_base("retriever");
648  string http_cache_dir;
649  string prefix;
650  size_t pwr2_number_o_chunks = 18;
651  size_t max_target_size = 0;
652  string http_netrc_file;
653  unsigned int reps=10;
654  unsigned pwr2_parallel_reads = 0;
655  bool aws_sign_request_url = false;
656 
657  char *prefixCstr = getenv("prefix");
658  if (prefixCstr) {
659  prefix = prefixCstr;
660  } else {
661  prefix = "/";
662  }
663  auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
664 
665 
666  GetOpt getopt(argc, argv, "h:r:n:C:c:o:u:l:S:dbDp:A");
667  int option_char;
668  while ((option_char = getopt()) != -1) {
669  switch (option_char) {
670  case 'D':
671  Debug = true;
672  debug = true;
673  break;
674  case 'd':
675  debug = true;
676  break;
677  case 'b':
678  bes_debug = true;
679  break;
680  case 'A':
681  aws_sign_request_url = true;
682  break;
683  case 'c':
684  bes_config_file = getopt.optarg;
685  break;
686  case 'u':
687  target_url = getopt.optarg;
688  break;
689  case 'l':
690  bes_log_file = getopt.optarg;
691  break;
692  case 'n':
693  http_netrc_file = getopt.optarg;
694  break;
695  case 'o':
696  output_file_base = getopt.optarg;
697  break;
698  case 'C':
699  pwr2_number_o_chunks = atol(getopt.optarg);
700  break;
701  case 'S':
702  max_target_size = atol(getopt.optarg);
703  break;
704  case 'p':
705  pwr2_parallel_reads = atol(getopt.optarg);
706  break;
707  case 'r':
708  reps = atol(getopt.optarg);
709  break;
710  case 'h':
711  http_cache_dir = getopt.optarg;
712  break;
713 
714  default:
715  break;
716  }
717  }
718 
719  if (bes_log_file.empty()) {
720  bes_log_file = output_file_base + "_bes.log";
721  }
722 
723  cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
724  cerr << prolog << "debug: " << (debug ? "true" : "false") << endl;
725  cerr << prolog << "Debug: " << (Debug ? "true" : "false") << endl;
726  cerr << prolog << "bes_debug: " << (bes_debug ? "true" : "false") << endl;
727  cerr << prolog << "output_file_base: '" << output_file_base << "'" << endl;
728  cerr << prolog << "bes_config_file: '" << bes_config_file << "'" << endl;
729  cerr << prolog << "bes_log_file: '" << bes_log_file << "'" << endl;
730  cerr << prolog << "bes_debug_log_file: '" << bes_debug_log_file << "'" << endl;
731  cerr << prolog << "bes_debug_keys: '" << bes_debug_keys << "'" << endl;
732  cerr << prolog << "http_netrc_file: '" << http_netrc_file << "'" << endl;
733  cerr << prolog << "target_url: '" << target_url << "'" << endl;
734  cerr << prolog << "max_target_size: '" << max_target_size << "'" << endl;
735  cerr << prolog << "number_o_chunks: 2^" << pwr2_number_o_chunks << endl;
736  cerr << prolog << "reps: " << reps << endl;
737  if (pwr2_parallel_reads)
738  cerr << prolog << "parallel_reads: ENABLED (max: 2^" << pwr2_parallel_reads << ")" << endl;
739  else
740  cerr << prolog << "parallel_reads: DISABLED" << endl;
741  cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
742 
743 
744  try {
745  if(pwr2_parallel_reads){
746  unsigned long long int max_threads = 1ULL << pwr2_parallel_reads;
747  dmrpp::DmrppRequestHandler::d_use_parallel_transfers = true;
748  dmrpp::DmrppRequestHandler::d_max_parallel_transfers = max_threads;
749  }
750  else {
751  dmrpp::DmrppRequestHandler::d_use_parallel_transfers = false;
752  dmrpp::DmrppRequestHandler::d_max_parallel_transfers = 1;
753  }
754 
755  dmrpp::DmrppRequestHandler *dmrppRH = bes_setup(bes_config_file, bes_log_file, bes_debug_log_file,
756  bes_debug_keys, http_netrc_file,http_cache_dir);
757 
758  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
759  if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
760  size_t target_size = get_max_retrival_size(max_target_size, effectiveUrl);
761 
762  unsigned long long int chunks = 1ULL << pwr2_number_o_chunks;
763  if (debug) cerr << prolog << "Dividing target into " << chunks << " chunks." << endl;
764 
765 
766 
767  array_get(effectiveUrl, target_size, chunks, output_file_base);
768 
769 
770 #if 0 // these work but are parked a.t.m.
771  result = test_plan_01(
772  target_url,
773  output_file_base,
774  reps,
775  max_target_size,
776  pwr2_number_o_chunks,
777  pwr2_parallel_reads,
778  output_file_base) ;
779 
780  simple_get(effectiveUrl, output_file_base);
781  serial_chunky_get( effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
782 
783  parse_dmrpp(target_url);
784 
785 
786  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
787  if (debug)
788  cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
789  target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
790  array_get(effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
791 #endif
792 
793  curl_global_cleanup();
794  delete dmrppRH;
795  }
796  catch (BESError e) {
797  cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file() << ":" << e.get_line()
798  << endl;
799  result = 1;
800  }
801  catch (...) {
802  cerr << prolog << "Caught Unknown Exception." << endl;
803  result = 2;
804  }
805 
806  return result;
807 }
virtual std::string get(const std::string &key)
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:97
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
AccessCredentials * get(const std::string &url)
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:205
static std::string ConfigFile
Definition: TheBESKeys.h:184
Provide a way to print the DMR++ response.
Definition: DMRpp.h:42
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:64
static bool d_print_chunks
if true, print_dap4() prints chunk elements
Definition: DmrppCommon.h:103
virtual unsigned long add_chunk(const std::string &data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array="")
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:199
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:105
void intern(std::istream &f, libdap::DMR *dest_dmr)
static EffectiveUrlCache * TheCache()
Get the singleton BESCatalogList instance.
std::string get_effective_url(const std::string &source_url)