bes  Updated for version 3.20.8
build_dmrpp.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the Hyrax data server.
4 
5 // Copyright (c) 2018 OPeNDAP, Inc.
6 // Author: James Gallagher <jgallagher@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 #include <memory>
28 #include <iterator>
29 #include <algorithm>
30 
31 #include <cstdlib>
32 
33 #include <Array.h>
34 
35 //#define H5D_FRIEND // Workaround, needed to use H5D_chunk_rec_t
36 //#include <H5Dpkg.h>
37 #define H5S_MAX_RANK 32
38 #define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1)
39 
40 #include <H5Ppublic.h>
41 #include <H5Dpublic.h>
42 #include <H5Epublic.h>
43 #include <H5Zpublic.h> // Constants for compression filters
44 #include <H5Spublic.h>
45 #include "h5common.h"
46 
47 /*
48  * "Generic" chunk record. Each chunk is keyed by the minimum logical
49  * N-dimensional coordinates and the datatype size of the chunk.
50  * The fastest-varying dimension is assumed to reference individual bytes of
51  * the array, so a 100-element 1-D array of 4-byte integers would really be a
52  * 2-D array with the slow varying dimension of size 100 and the fast varying
53  * dimension of size 4 (the storage dimensionality has very little to do with
54  * the real dimensionality).
55  *
56  * The chunk's file address, filter mask and size on disk are not key values.
57  */
58 typedef struct H5D_chunk_rec_t {
59  hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset to start */
60  uint32_t nbytes; /* Size of stored data */
61  uint32_t filter_mask; /* Excluded filters */
62  haddr_t chunk_addr; /* Address of chunk in file */
63 } H5D_chunk_rec_t;
64 
65 #include <DMRpp.h>
66 #include <D4Attributes.h>
67 #include <BaseType.h>
68 #include <D4ParserSax2.h>
69 #include <GetOpt.h>
70 
71 #include <TheBESKeys.h>
72 #include <BESUtil.h>
73 #include <BESDebug.h>
74 
75 #include <BESError.h>
76 #include <BESNotFoundError.h>
77 #include <BESInternalError.h>
78 #include <BESDataHandlerInterface.h>
79 
80 #include "DmrppTypeFactory.h"
81 #include "DmrppD4Group.h"
82 #include "DmrppMetadataStore.h"
83 #include "BESDapNames.h"
84 
85 using namespace std;
86 using namespace libdap;
87 using namespace dmrpp;
88 
89 static bool verbose = false;
90 #define VERBOSE(x) do { if (verbose) x; } while(false)
91 
92 #define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp"
93 #define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory"
94 
95 
100 // variable length string is handled by function read_vlen_string.
105 /*
106 void get_data(hid_t dset, void *buf)
107 {
108  BESDEBUG("h5", ">get_data()" << endl);
109 
110  hid_t dtype = -1;
111  if ((dtype = H5Dget_type(dset)) < 0) {
112  throw InternalErr(__FILE__, __LINE__, "Failed to get the datatype of the dataset");
113  }
114  hid_t dspace = -1;
115  if ((dspace = H5Dget_space(dset)) < 0) {
116  H5Tclose(dtype);
117  throw InternalErr(__FILE__, __LINE__, "Failed to get the data space of the dataset");
118  }
119  // Use HDF5 H5Tget_native_type API
120  hid_t memtype = H5Tget_native_type(dtype, H5T_DIR_ASCEND);
121  if (memtype < 0) {
122  H5Tclose(dtype);
123  H5Sclose(dspace);
124  throw InternalErr(__FILE__, __LINE__, "failed to get memory type");
125  }
126 
127  if (H5Dread(dset, memtype, dspace, dspace, H5P_DEFAULT, buf)
128  < 0) {
129  H5Tclose(dtype);
130  H5Tclose(memtype);
131  H5Sclose(dspace);
132  throw InternalErr(__FILE__, __LINE__, "failed to read data");
133  }
134 
135  if (H5Tclose(dtype) < 0){
136  H5Tclose(memtype);
137  H5Sclose(dspace);
138  throw InternalErr(__FILE__, __LINE__, "Unable to release the dtype.");
139  }
140 
141  if (H5Tclose(memtype) < 0){
142  H5Sclose(dspace);
143  throw InternalErr(__FILE__, __LINE__, "Unable to release the memtype.");
144  }
145 
146  if(H5Sclose(dspace)<0) {
147  throw InternalErr(__FILE__, __LINE__, "Unable to release the data space.");
148  }
149 #if 0
150  // Supposed to release the resource at the release at the HDF5Array destructor.
151  //if (H5Dclose(dset) < 0){
152  // throw InternalErr(__FILE__, __LINE__, "Unable to close the dataset.");
153  //}
154  }
155 #endif
156 
157  BESDEBUG("h5", "<get_data()" << endl);
158 }
159 
160 bool read_vlen_string(hid_t dsetid, int nelms, hsize_t *hoffset, hsize_t *hstep, hsize_t *hcount,vector<string> &finstrval)
161 {
162 
163  hid_t dspace = -1;
164  hid_t mspace = -1;
165  hid_t dtypeid = -1;
166  hid_t memtype = -1;
167  bool is_scalar = false;
168 
169 
170  if ((dspace = H5Dget_space(dsetid))<0) {
171  throw InternalErr (__FILE__, __LINE__, "Cannot obtain data space.");
172  }
173 
174  if(H5S_SCALAR == H5Sget_simple_extent_type(dspace))
175  is_scalar = true;
176 
177 
178  if (false == is_scalar) {
179  if (H5Sselect_hyperslab(dspace, H5S_SELECT_SET,
180  hoffset, hstep,
181  hcount, NULL) < 0) {
182  H5Sclose(dspace);
183  throw InternalErr (__FILE__, __LINE__, "Cannot generate the hyperslab of the HDF5 dataset.");
184  }
185 
186  int d_num_dim = H5Sget_simple_extent_ndims(dspace);
187  if(d_num_dim < 0) {
188  H5Sclose(dspace);
189  throw InternalErr (__FILE__, __LINE__, "Cannot obtain the number of dimensions of the data space.");
190  }
191 
192  mspace = H5Screate_simple(d_num_dim, hcount,NULL);
193  if (mspace < 0) {
194  H5Sclose(dspace);
195  throw InternalErr (__FILE__, __LINE__, "Cannot create the memory space.");
196  }
197  }
198 
199 
200  if ((dtypeid = H5Dget_type(dsetid)) < 0) {
201 
202  if (false == is_scalar)
203  H5Sclose(mspace);
204  H5Sclose(dspace);
205  throw InternalErr (__FILE__, __LINE__, "Cannot obtain the datatype.");
206 
207  }
208 
209  if ((memtype = H5Tget_native_type(dtypeid, H5T_DIR_ASCEND))<0) {
210 
211  if (false == is_scalar)
212  H5Sclose(mspace);
213  H5Tclose(dtypeid);
214  H5Sclose(dspace);
215  throw InternalErr (__FILE__, __LINE__, "Fail to obtain memory datatype.");
216 
217  }
218 
219  size_t ty_size = H5Tget_size(memtype);
220  if (ty_size == 0) {
221  if (false == is_scalar)
222  H5Sclose(mspace);
223  H5Tclose(memtype);
224  H5Tclose(dtypeid);
225  H5Sclose(dspace);
226  throw InternalErr (__FILE__, __LINE__,"Fail to obtain the size of HDF5 string.");
227  }
228 
229  vector <char> strval;
230  strval.resize(nelms*ty_size);
231  hid_t read_ret = -1;
232  if (true == is_scalar)
233  read_ret = H5Dread(dsetid,memtype,H5S_ALL,H5S_ALL,H5P_DEFAULT,(void*)&strval[0]);
234  else
235  read_ret = H5Dread(dsetid,memtype,mspace,dspace,H5P_DEFAULT,(void*)&strval[0]);
236 
237  if (read_ret < 0) {
238  if (false == is_scalar)
239  H5Sclose(mspace);
240  H5Tclose(memtype);
241  H5Tclose(dtypeid);
242  H5Sclose(dspace);
243  throw InternalErr (__FILE__, __LINE__, "Fail to read the HDF5 variable length string dataset.");
244  }
245 
246  // For scalar, nelms is 1.
247  char*temp_bp = &strval[0];
248  char*onestring = NULL;
249  for (int i =0;i<nelms;i++) {
250  onestring = *(char**)temp_bp;
251  if(onestring!=NULL )
252  finstrval[i] =string(onestring);
253  else // We will add a NULL if onestring is NULL.
254  finstrval[i]="";
255  temp_bp +=ty_size;
256  }
257 
258  if (false == strval.empty()) {
259  herr_t ret_vlen_claim;
260  if (true == is_scalar)
261  ret_vlen_claim = H5Dvlen_reclaim(memtype,dspace,H5P_DEFAULT,(void*)&strval[0]);
262  else
263  ret_vlen_claim = H5Dvlen_reclaim(memtype,mspace,H5P_DEFAULT,(void*)&strval[0]);
264  if (ret_vlen_claim < 0){
265  if (false == is_scalar)
266  H5Sclose(mspace);
267  H5Tclose(memtype);
268  H5Tclose(dtypeid);
269  H5Sclose(dspace);
270  throw InternalErr (__FILE__, __LINE__, "Cannot reclaim the memory buffer of the HDF5 variable length string.");
271 
272  }
273  }
274 
275  if (false == is_scalar)
276  H5Sclose(mspace);
277  H5Tclose(memtype);
278  H5Tclose(dtypeid);
279  H5Sclose(dspace);
280 
281  return true;
282 
283 }
284 */
285 
286 
297 static void print_dataset_type_info(hid_t dataset, uint8_t layout_type) {
298  hid_t dtype_id = H5Dget_type(dataset);
299  if (dtype_id < 0) {
300  throw BESInternalError("Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
301  }
302 
303  if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
304  hid_t dcpl_id = H5Dget_create_plist(dataset);
305  if (dcpl_id < 0) {
306  throw BESInternalError("Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
307  }
308 
309  try {
310  // Wrap the resources like dcpl_id in try/catch blocks so that the
311  // calls to H5Pclose(dcpl_id) for each error can be removed. jhrg 5/7/18
312  H5D_fill_value_t fvalue_status;
313  if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
314  H5Pclose(dcpl_id);
315  throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
316  }
317  if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
318  // Replace with switch(), here and elsewhere. jhrg 5/7/18
319  if (layout_type == 1)
320  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
321  else if (layout_type == 2)
322  cerr << " The storage size is 0 and the storage type is chunking." << endl;
323  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
324 
325  cerr << " The Fillvalue is undefined ." << endl;
326  } else {
327  if (layout_type == 1)
328  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
329  else if (layout_type == 2)
330  cerr << " The storage size is 0 and the storage type is chunking." << endl;
331  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
332 
333  char *fvalue = NULL;
334  size_t fv_size = H5Tget_size(dtype_id);
335  if (fv_size == 1)
336  fvalue = (char *) (malloc(1));
337  else if (fv_size == 2)
338  fvalue = (char *) (malloc(2));
339  else if (fv_size == 4)
340  fvalue = (char *) (malloc(4));
341  else if (fv_size == 8) fvalue = (char *) (malloc(8));
342 
343  if (fv_size <= 8) {
344  if (H5Pget_fill_value(dcpl_id, dtype_id, (void *) (fvalue)) < 0) {
345  H5Pclose(dcpl_id);
346  throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
347  }
348  if (H5Tget_class(dtype_id) == H5T_INTEGER) {
349  H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
350  if (fv_size == 1) {
351  if (fv_sign == H5T_SGN_NONE) {
352  cerr << "This dataset's datatype is unsigned char " << endl;
353  cerr << "and the fillvalue is " << *fvalue << endl;
354  } else {
355  cerr << "This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
356  }
357  } else if (fv_size == 2) {
358  if (fv_sign == H5T_SGN_NONE) {
359  cerr << "This dataset's datatype is unsigned short and the fillvalue is " << *fvalue
360  << endl;
361  } else {
362  cerr << "This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
363  }
364  } else if (fv_size == 4) {
365  if (fv_sign == H5T_SGN_NONE) {
366  cerr << "This dataset's datatype is unsigned int and the fillvalue is " << *fvalue
367  << endl;
368  } else {
369  cerr << "This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
370  }
371  } else if (fv_size == 8) {
372  if (fv_sign == H5T_SGN_NONE) {
373  cerr << "This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue
374  << endl;
375  } else {
376  cerr << "This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
377  }
378  }
379  }
380  if (H5Tget_class(dtype_id) == H5T_FLOAT) {
381  if (fv_size == 4) {
382  cerr << "This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
383  } else if (fv_size == 8) {
384  cerr << "This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
385  }
386  }
387 
388  if (fvalue != NULL) free(fvalue);
389  } else
390  cerr
391  << "The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
392  << endl;
393  }
394  }
395  catch (...) {
396  H5Pclose(dcpl_id);
397  throw;
398  }
399  H5Pclose(dcpl_id);
400  } else {
401  if (layout_type == 1)
402  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
403  else if (layout_type == 2)
404  cerr << " The storage size is 0 and the storage type is chunking." << endl;
405  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
406 
407  cerr
408  << "The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
409  << endl;
410  }
411 }
412 
413 // FYI: Filter IDs
414 // H5Z_FILTER_ERROR (-1) no filter
415 // H5Z_FILTER_NONE 0 reserved indefinitely
416 // H5Z_FILTER_DEFLATE 1 deflation like gzip
417 // H5Z_FILTER_SHUFFLE 2 shuffle the data
418 // H5Z_FILTER_FLETCHER32 3 fletcher32 checksum of EDC
419 // H5Z_FILTER_SZIP 4 szip compression
420 // H5Z_FILTER_NBIT 5 nbit compression
421 // H5Z_FILTER_SCALEOFFSET 6 scale+offset compression
422 // H5Z_FILTER_RESERVED 256 filter ids below this value are reserved for library use
423 
430 static void set_filter_information(hid_t dataset_id, DmrppCommon *dc) {
431  hid_t plist_id = H5Dget_create_plist(dataset_id);
432 
433  try {
434  int numfilt = H5Pget_nfilters(plist_id);
435  VERBOSE(cerr << "Number of filters associated with dataset: " << numfilt << endl);
436 
437  for (int filter = 0; filter < numfilt; filter++) {
438  size_t nelmts = 0;
439  unsigned int flags, filter_info;
440  H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
441  VERBOSE(cerr << "Filter Type: ");
442 
443  switch (filter_type) {
444  case H5Z_FILTER_DEFLATE:
445  VERBOSE(cerr << "H5Z_FILTER_DEFLATE" << endl);
446  dc->set_deflate(true);
447  break;
448  case H5Z_FILTER_SHUFFLE:
449  VERBOSE(cerr << "H5Z_FILTER_SHUFFLE" << endl);
450  dc->set_shuffle(true);
451  break;
452  default: {
453  ostringstream oss("Unsupported HDF5 filter: ", std::ios::ate);
454  oss << filter_type;
455  throw BESInternalError(oss.str(), __FILE__, __LINE__);
456  }
457  }
458  }
459  }
460  catch (...) {
461  H5Pclose(plist_id);
462  throw;
463  }
464 
465  H5Pclose(plist_id);
466 }
467 
478 static void get_variable_chunk_info(hid_t dataset, DmrppCommon *dc) {
479  std::string byteOrder = "";
480  H5T_order_t byte_order = H5T_ORDER_ERROR;
481 
482  try {
483  hid_t dcpl = H5Dget_create_plist(dataset);
484  uint8_t layout_type = H5Pget_layout(dcpl);
485 
486  hid_t fspace_id = H5Dget_space(dataset);
487  hid_t ftype_id = H5Dget_type(dataset);
488 
489  byte_order = H5Tget_order(ftype_id);
490  switch (byte_order) {
491  case H5T_ORDER_LE:
492  byteOrder = "LE";
493  break;
494  case H5T_ORDER_BE:
495  byteOrder = "BE";
496  break;
497  case H5T_ORDER_NONE:
498  break;
499  default:
500  ostringstream oss("Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
501  oss << byte_order << ".";
502  BESInternalError(oss.str(), __FILE__, __LINE__);
503  break; // unsupported enumerations: H5T_ORDER_[ERROR,VAX,MIXED,NONE]
504  }
505 
506  unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
507 
508  hid_t dtypeid = H5Dget_type(dataset);
509 
510  size_t dsize = H5Tget_size(dtypeid);
511 
512  /* layout_type: 1 contiguous 2 chunk 3 compact */
513  switch (layout_type) {
514 
515  case H5D_CONTIGUOUS: { /* Contiguous storage */
516  haddr_t cont_addr = 0;
517  hsize_t cont_size = 0;
518 
519  VERBOSE(cerr << "Storage: contiguous" << endl);
520 
521  cont_addr = H5Dget_offset(dataset);
522  /* if statement never less than zero due to cont_addr being unsigned int. SBL 1.29.20
523  if (cont_addr < 0) {
524  throw BESInternalError("Cannot obtain the offset.", __FILE__, __LINE__);
525  }*/
526  cont_size = H5Dget_storage_size(dataset);
527  /* if statement never less than zero due to cont_size being unsigned int. SBL 1.29.20
528  if (cont_size < 0) {
529  throw BESInternalError("Cannot obtain the storage size.", __FILE__, __LINE__);
530  }*/
531 
532 
533  VERBOSE(cerr << " Addr: " << cont_addr << endl);
534  VERBOSE(cerr << " Size: " << cont_size << endl);
535  VERBOSE(cerr << "byteOrder: " << byteOrder << endl);
536 
537  if (cont_size > 0) {
538  if (dc) dc->add_chunk("", byteOrder, cont_size, cont_addr, "" /*pos in array*/);
539  }
540  break;
541  }
542  case H5D_CHUNKED: { /*chunking storage */
543  hsize_t num_chunks = 0;
544  herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
545  if (status < 0) {
546  throw BESInternalError("Could not get the number of chunks",
547  __FILE__, __LINE__);
548  }
549 
550  VERBOSE(cerr << "Storage: chunked." << endl);
551  VERBOSE(cerr << "Number of chunks is: " << num_chunks << endl);
552 
553  if (dc)
554  set_filter_information(dataset, dc);
555 
556  // Get chunking information: rank and dimensions
557  vector<size_t> chunk_dims(dataset_rank);
558  unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t *) &chunk_dims[0]);
559  if (chunk_rank != dataset_rank)
560  throw BESNotFoundError(
561  "Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__,
562  __LINE__);
563 
564  if (dc) dc->set_chunk_dimension_sizes(chunk_dims);
565 
566  for (unsigned int i = 0; i < num_chunks; ++i) {
567 
568  vector<hsize_t> temp_coords(dataset_rank);
569  vector<unsigned int> chunk_coords(dataset_rank); //FIXME - see below
570 
571  haddr_t addr = 0;
572  hsize_t size = 0;
573 
574  //H5_DLL herr_t H5Dget_chunk_info(hid_t dset_id, hid_t fspace_id, hsize_t chk_idx, hsize_t *coord, unsigned *filter_mask, haddr_t *addr, hsize_t *size);
575  status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
576  if (status < 0) {
577  VERBOSE(cerr << "ERROR" << endl);
578  throw BESInternalError("Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
579  }
580 
581  VERBOSE(cerr << "chk_idk: " << i << ", addr: " << addr << ", size: " << size << endl);
582 
583  //The coords need to be of type 'unsigned int' when passed into add_chunk()
584  // This loop simply copies the values from the temp_coords to chunk_coords - kln 5/1/19
585  for (unsigned int j = 0; j < chunk_coords.size(); ++j) {
586  chunk_coords[j] = temp_coords[j];
587  }
588 
589  // FIXME Modify add_chunk so that it takes a vector<unsigned long long> or <unsined long>
590  // (depending on the machine/OS/compiler). Limiting the offset to 32-bits won't work
591  // for large files. jhrg 5/21/19
592  if (dc) dc->add_chunk("", byteOrder, size, addr, chunk_coords);
593  }
594 
595  break;
596  }
597 
598  case H5D_COMPACT: { /* Compact storage */
599  //else if (layout_type == 3) {
600  VERBOSE(cerr << "Storage: compact" << endl);
601 
602  size_t comp_size = H5Dget_storage_size(dataset);
603  VERBOSE(cerr << " Size: " << comp_size << endl);
604 
605  if (comp_size == 0) {
606  throw BESInternalError("Cannot obtain the compact storage size.",
607  __FILE__, __LINE__);
608  }
609 
610  vector<uint8_t> values;
611 
612  Array *btp = dynamic_cast<Array *>(dc);
613  if (btp != NULL) {
614  dc->set_compact(true);
615  size_t memRequired = btp->length() * dsize;
616 
617  if (comp_size != memRequired) {
618  throw BESInternalError("Compact storage size does not match D4Array.",
619  __FILE__, __LINE__);
620  }
621 
622  switch (btp->var()->type()) {
623  case dods_byte_c:
624  case dods_char_c:
625  case dods_int8_c:
626  case dods_uint8_c:
627  case dods_int16_c:
628  case dods_uint16_c:
629  case dods_int32_c:
630  case dods_uint32_c:
631  case dods_float32_c:
632  case dods_float64_c:
633  case dods_int64_c:
634  case dods_uint64_c: {
635  values.resize(memRequired);
636  get_data(dataset, reinterpret_cast<void *>(&values[0]));
637  btp->set_read_p(true);
638  btp->val2buf(reinterpret_cast<void *>(&values[0]));
639  break;
640 
641  }
642 
643  case dods_str_c: {
644  if (H5Tis_variable_str(dtypeid) > 0) {
645  vector<string> finstrval = {""}; // passed by reference to read_vlen_string
646  read_vlen_string(dataset, 1, NULL, NULL, NULL, finstrval);
647  btp->set_value(finstrval, finstrval.size());
648  btp->set_read_p(true);
649  } else {
650  // For this case, the Array is really a single string - check for that
651  // with the following assert - but is an Array because the string data
652  // is stored as an array of chars (hello, FORTRAN). Read the chars, make
653  // a string and load that into a vector<string> (which will be a vector
654  // of length one). Set that as the value of the Array. Really, this
655  // value could be stored as a scalar, but that's complicated and client
656  // software might be expecting an array, so better to handle it this way.
657  // jhrg 9/17/20
658  assert(btp->length() == 1);
659  values.resize(memRequired);
660  get_data(dataset, reinterpret_cast<void *>(&values[0]));
661  string str(values.begin(), values.end());
662  vector<string> strings = {str};
663  btp->set_value(strings, strings.size());
664  btp->set_read_p(true);
665  }
666  break;
667  }
668 
669  default:
670  throw BESInternalError("Unsupported compact storage variable type.", __FILE__, __LINE__);
671  }
672 
673  } else {
674  throw BESInternalError("Compact storage variable is not a D4Array.",
675  __FILE__, __LINE__);
676  }
677  break;
678  }
679 
680  default: {
681  ostringstream oss("Unsupported HDF5 dataset layout type: ", std::ios::ate);
682  oss << layout_type << ".";
683  BESInternalError(oss.str(), __FILE__, __LINE__);
684  break;
685  }
686  }
687  }
688  catch (...) {
689  H5Dclose(dataset);
690  throw;
691  }
692 
693  H5Dclose(dataset);
694 }
695 
703 static void get_chunks_for_all_variables(hid_t file, D4Group *group) {
704  // variables in the group
705  for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
706  // if this variable has a 'fullnamepath' attribute, use that and not the
707  // FQN value.
708  D4Attributes *d4_attrs = (*v)->attributes();
709  if (!d4_attrs)
710  throw BESInternalError("Expected to find an attribute table for " + (*v)->name() + " but did not.",
711  __FILE__, __LINE__);
712 
713  // Look for the full name path for this variable
714  // If one was not given via an attribute, use BaseType::FQN() which
715  // relies on the variable's position in the DAP dataset hierarchy.
716  D4Attribute *attr = d4_attrs->get("fullnamepath");
717  string FQN;
718  // I believe the logic is more clear in this way:
719  // If fullnamepath exists and the H5Dopen2 fails to open, it should throw an error.
720  // If fullnamepath doesn't exist, we should ignore the error as the reason described below:
721  // (However, we should suppress the HDF5 dataset open error message.) KY 2019-12-02
722  // It's not an error if a DAP variable in a DMR from the hdf5 handler
723  // doesn't exist in the file _if_ there's no 'fullnamepath' because
724  // that variable was synthesized (likely for CF compliance)
725  hid_t dataset = -1;
726  if (attr) {
727  if (attr->num_values() == 1)
728  FQN = attr->value(0);
729  else
730  FQN = (*v)->FQN();
731  BESDEBUG("dmrpp", "Working on: " << FQN << endl);
732  dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
733  if (dataset < 0)
734  throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
735 
736  } else {
737  // The current design seems to still prefer to open the dataset when the fullnamepath doesn't exist
738  // So go ahead to open the dataset. Continue even if the dataset cannot be open. KY 2019-12-02
739  H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
740  FQN = (*v)->FQN();
741  BESDEBUG("dmrpp", "Working on: " << FQN << endl);
742  dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
743  if (dataset < 0)
744  continue;
745  }
746 
747 
748 #if 0
749  if (attr && attr->num_values() == 1)
750  FQN = attr->value(0);
751  else
752  FQN = (*v)->FQN();
753 
754  VERBOSE(cerr << "Working on: " << FQN << endl);
755  hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
756  // It's not an error if a DAP variable in a DMR from the hdf5 handler
757  // doesn't exist in the file _if_ there's no 'fullnamepath' because
758  // that variable was synthesized (likely for CF compliance)
759  if (dataset < 0 && attr == 0) {
760  cerr<<"Unable to open dataset name "<<FQN <<endl;
761  continue;
762  }
763  else if (dataset < 0)
764  throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
765 #endif
766  get_variable_chunk_info(dataset, dynamic_cast<DmrppCommon *>(*v));
767  }
768 
769  // all groups in the group
770  D4Group::groupsIter g = group->grp_begin();
771  D4Group::groupsIter ge = group->grp_end();
772  while (g != ge)
773  get_chunks_for_all_variables(file, *g++);
774 }
775 
776 
777 int main(int argc, char *argv[]) {
778  string h5_file_name = "";
779  string h5_dset_path = "";
780  string dmr_name = "";
781  string url_name = "";
782  int status = 0;
783 
784  GetOpt getopt(argc, argv, "c:f:r:u:dhv");
785  int option_char;
786  while ((option_char = getopt()) != -1) {
787  switch (option_char) {
788  case 'v':
789  verbose = true; // verbose hdf5 errors
790  break;
791 
792  case 'd':
793  BESDebug::SetUp(string("cerr,").append(DEBUG_KEY));
794  break;
795 
796  case 'f':
797  h5_file_name = getopt.optarg;
798  break;
799  case 'r':
800  dmr_name = getopt.optarg;
801  break;
802  case 'u':
803  url_name = getopt.optarg;
804  break;
805  case 'c':
806  TheBESKeys::ConfigFile = getopt.optarg;
807  break;
808  case 'h':
809  cerr
810  << "build_dmrpp [-v] -c <bes.conf> -f <data file> [-u <href url>] | build_dmrpp -f <data file> -r <dmr file> | build_dmrpp -h"
811  << endl;
812  exit(1);
813  default:
814  break;
815  }
816  }
817 
818  if (h5_file_name.empty()) {
819  cerr << "HDF5 file name must be given (-f <input>)." << endl;
820  return 1;
821  }
822 
823  hid_t file = 0;
824  try {
825  // Turn off automatic hdf5 error printing.
826  // See: https://support.hdfgroup.org/HDF5/doc1.8/RM/RM_H5E.html#Error-SetAuto2
827  //if (!verbose) H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
828 
829  // For a given HDF5, get info for all the HDF5 datasets in a DMR or for a
830  // given HDF5 dataset
831  if (!dmr_name.empty()) {
832  // Get dmr:
833  unique_ptr<DMRpp> dmrpp(new DMRpp);
834  DmrppTypeFactory dtf;
835  dmrpp->set_factory(&dtf);
836 
837  ifstream in(dmr_name.c_str());
838  D4ParserSax2 parser;
839  parser.intern(in, dmrpp.get(), false);
840 
841  // Open the hdf5 file
842  file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
843  if (file < 0) {
844  cerr << "Error: HDF5 file '" + h5_file_name + "' cannot be opened." << endl;
845  return 1;
846  }
847 
848  // iterate over all the variables in the DMR
849  get_chunks_for_all_variables(file, dmrpp->root());
850 
851  XMLWriter writer;
852  dmrpp->print_dmrpp(writer, url_name);
853 
854  cout << writer.get_doc();
855  } else {
856  bool found;
857  string bes_data_root;
858  try {
859  TheBESKeys::TheKeys()->get_value(ROOT_DIRECTORY, bes_data_root, found);
860  if (!found) {
861  cerr << "Error: Could not find the BES root directory key." << endl;
862  return 1;
863  }
864  }
865  catch (BESError &e) {
866  cerr << "BESError: " << e.get_message() << endl;
867  return 1;
868  }
869 
870  // Use the values from the bes.conf file... jhrg 5/21/18
872  if (!mds) {
873  cerr << "The Metadata Store (MDS) must be configured for this command to work." << endl;
874  return 1;
875  }
876 
877  // Use the full path to open the file, but use the 'name' (which is the
878  // path relative to the BES Data Root) with the MDS.
879  // Changed this to utilze assmeblePath() because simply concatenating the strings
880  // is fragile. - ndp 6/6/18
881  string h5_file_path = BESUtil::assemblePath(bes_data_root, h5_file_name);
882 
883  //bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_name /*h5_file_path*/);
884  bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_path, h5_file_name, "h5");
885  if (lock()) {
886  // parse the DMR into a DMRpp (that uses the DmrppTypes)
887  unique_ptr<DMRpp> dmrpp(dynamic_cast<DMRpp *>(mds->get_dmr_object(h5_file_name /*h5_file_path*/)));
888  if (!dmrpp.get()) {
889  cerr << "Expected a DMR++ object from the DmrppMetadataStore." << endl;
890  return 1;
891  }
892 
893  // Open the hdf5 file
894  file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
895  if (file < 0) {
896  cerr << "Error: HDF5 file '" + h5_file_path + "' cannot be opened." << endl;
897  return 1;
898  }
899 
900  get_chunks_for_all_variables(file, dmrpp->root());
901 
902  dmrpp->set_href(url_name);
903 
904  mds->add_dmrpp_response(dmrpp.get(), h5_file_name /*h5_file_path*/);
905 
906  XMLWriter writer;
907  dmrpp->set_print_chunks(true);
908  dmrpp->print_dap4(writer);
909 
910  cout << writer.get_doc();
911  } else {
912  cerr << "Error: Could not get a lock on the DMR for '" + h5_file_path + "'." << endl;
913  return 1;
914  }
915  }
916  }
917  catch (BESError &e) {
918  cerr << "BESError: " << e.get_message() << endl;
919  status = 1;
920  }
921  catch (std::exception &e) {
922  cerr << "std::exception: " << e.what() << endl;
923  status = 1;
924  }
925  catch (...) {
926  cerr << "Unknown error." << endl;
927  status = 1;
928  }
929 
930  H5Fclose(file);
931 
932  return status;
933 }
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:97
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:339
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static std::string ConfigFile
Definition: TheBESKeys.h:184
Store the DAP DMR++ metadata responses.
virtual libdap::DMR * get_dmr_object(const string &name)
Use the DMR response to build a DMR with Dmrpp Types.
static DmrppMetadataStore * get_instance()
virtual MDSReadLock is_dmr_available(const std::string &name)
Is the DMR response for.
Provide a way to print the DMR++ response.
Definition: DMRpp.h:42
Size and offset information of data included in DMR++ files.
Definition: DmrppCommon.h:68
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
Definition: DmrppCommon.h:183
virtual unsigned long add_chunk(const std::string &data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array="")
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:199
void set_deflate(bool value)
Set the value of the deflate property.
Definition: DmrppCommon.h:124
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:144
void set_shuffle(bool value)
Set the value of the shuffle property.
Definition: DmrppCommon.h:134
void get_data(hid_t dset, void *buf)
Definition: h5common.cc:50
Unlock and close the MDS item when the ReadLock goes out of scope.