37 #define H5S_MAX_RANK 32
38 #define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1)
40 #include <H5Ppublic.h>
41 #include <H5Dpublic.h>
42 #include <H5Epublic.h>
43 #include <H5Zpublic.h>
44 #include <H5Spublic.h>
58 typedef struct H5D_chunk_rec_t {
59 hsize_t scaled[H5O_LAYOUT_NDIMS];
66 #include <D4Attributes.h>
68 #include <D4ParserSax2.h>
71 #include <TheBESKeys.h>
76 #include <BESNotFoundError.h>
77 #include <BESInternalError.h>
78 #include <BESDataHandlerInterface.h>
80 #include "DmrppTypeFactory.h"
81 #include "DmrppD4Group.h"
82 #include "DmrppMetadataStore.h"
83 #include "BESDapNames.h"
87 using namespace dmrpp;
89 static bool verbose =
false;
90 #define VERBOSE(x) do { if (verbose) x; } while(false)
92 #define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp"
93 #define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory"
297 static void print_dataset_type_info(hid_t dataset, uint8_t layout_type) {
298 hid_t dtype_id = H5Dget_type(dataset);
300 throw BESInternalError(
"Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
303 if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
304 hid_t dcpl_id = H5Dget_create_plist(dataset);
306 throw BESInternalError(
"Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
312 H5D_fill_value_t fvalue_status;
313 if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
315 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
317 if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
319 if (layout_type == 1)
320 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
321 else if (layout_type == 2)
322 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
323 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
325 cerr <<
" The Fillvalue is undefined ." << endl;
327 if (layout_type == 1)
328 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
329 else if (layout_type == 2)
330 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
331 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
334 size_t fv_size = H5Tget_size(dtype_id);
336 fvalue = (
char *) (malloc(1));
337 else if (fv_size == 2)
338 fvalue = (
char *) (malloc(2));
339 else if (fv_size == 4)
340 fvalue = (
char *) (malloc(4));
341 else if (fv_size == 8) fvalue = (
char *) (malloc(8));
344 if (H5Pget_fill_value(dcpl_id, dtype_id, (
void *) (fvalue)) < 0) {
346 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
348 if (H5Tget_class(dtype_id) == H5T_INTEGER) {
349 H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
351 if (fv_sign == H5T_SGN_NONE) {
352 cerr <<
"This dataset's datatype is unsigned char " << endl;
353 cerr <<
"and the fillvalue is " << *fvalue << endl;
355 cerr <<
"This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
357 }
else if (fv_size == 2) {
358 if (fv_sign == H5T_SGN_NONE) {
359 cerr <<
"This dataset's datatype is unsigned short and the fillvalue is " << *fvalue
362 cerr <<
"This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
364 }
else if (fv_size == 4) {
365 if (fv_sign == H5T_SGN_NONE) {
366 cerr <<
"This dataset's datatype is unsigned int and the fillvalue is " << *fvalue
369 cerr <<
"This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
371 }
else if (fv_size == 8) {
372 if (fv_sign == H5T_SGN_NONE) {
373 cerr <<
"This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue
376 cerr <<
"This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
380 if (H5Tget_class(dtype_id) == H5T_FLOAT) {
382 cerr <<
"This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
383 }
else if (fv_size == 8) {
384 cerr <<
"This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
388 if (fvalue != NULL) free(fvalue);
391 <<
"The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
401 if (layout_type == 1)
402 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
403 else if (layout_type == 2)
404 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
405 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
408 <<
"The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
430 static void set_filter_information(hid_t dataset_id,
DmrppCommon *dc) {
431 hid_t plist_id = H5Dget_create_plist(dataset_id);
434 int numfilt = H5Pget_nfilters(plist_id);
435 VERBOSE(cerr <<
"Number of filters associated with dataset: " << numfilt << endl);
437 for (
int filter = 0; filter < numfilt; filter++) {
439 unsigned int flags, filter_info;
440 H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
441 VERBOSE(cerr <<
"Filter Type: ");
443 switch (filter_type) {
444 case H5Z_FILTER_DEFLATE:
445 VERBOSE(cerr <<
"H5Z_FILTER_DEFLATE" << endl);
448 case H5Z_FILTER_SHUFFLE:
449 VERBOSE(cerr <<
"H5Z_FILTER_SHUFFLE" << endl);
453 ostringstream oss(
"Unsupported HDF5 filter: ", std::ios::ate);
478 static void get_variable_chunk_info(hid_t dataset,
DmrppCommon *dc) {
479 std::string byteOrder =
"";
480 H5T_order_t byte_order = H5T_ORDER_ERROR;
483 hid_t dcpl = H5Dget_create_plist(dataset);
484 uint8_t layout_type = H5Pget_layout(dcpl);
486 hid_t fspace_id = H5Dget_space(dataset);
487 hid_t ftype_id = H5Dget_type(dataset);
489 byte_order = H5Tget_order(ftype_id);
490 switch (byte_order) {
500 ostringstream oss(
"Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
501 oss << byte_order <<
".";
506 unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
508 hid_t dtypeid = H5Dget_type(dataset);
510 size_t dsize = H5Tget_size(dtypeid);
513 switch (layout_type) {
515 case H5D_CONTIGUOUS: {
516 haddr_t cont_addr = 0;
517 hsize_t cont_size = 0;
519 VERBOSE(cerr <<
"Storage: contiguous" << endl);
521 cont_addr = H5Dget_offset(dataset);
526 cont_size = H5Dget_storage_size(dataset);
533 VERBOSE(cerr <<
" Addr: " << cont_addr << endl);
534 VERBOSE(cerr <<
" Size: " << cont_size << endl);
535 VERBOSE(cerr <<
"byteOrder: " << byteOrder << endl);
538 if (dc) dc->
add_chunk(
"", byteOrder, cont_size, cont_addr,
"" );
543 hsize_t num_chunks = 0;
544 herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
550 VERBOSE(cerr <<
"Storage: chunked." << endl);
551 VERBOSE(cerr <<
"Number of chunks is: " << num_chunks << endl);
554 set_filter_information(dataset, dc);
557 vector<size_t> chunk_dims(dataset_rank);
558 unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t *) &chunk_dims[0]);
559 if (chunk_rank != dataset_rank)
561 "Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__,
566 for (
unsigned int i = 0; i < num_chunks; ++i) {
568 vector<hsize_t> temp_coords(dataset_rank);
569 vector<unsigned int> chunk_coords(dataset_rank);
575 status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
577 VERBOSE(cerr <<
"ERROR" << endl);
578 throw BESInternalError(
"Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
581 VERBOSE(cerr <<
"chk_idk: " << i <<
", addr: " << addr <<
", size: " << size << endl);
585 for (
unsigned int j = 0; j < chunk_coords.size(); ++j) {
586 chunk_coords[j] = temp_coords[j];
592 if (dc) dc->
add_chunk(
"", byteOrder, size, addr, chunk_coords);
600 VERBOSE(cerr <<
"Storage: compact" << endl);
602 size_t comp_size = H5Dget_storage_size(dataset);
603 VERBOSE(cerr <<
" Size: " << comp_size << endl);
605 if (comp_size == 0) {
610 vector<uint8_t> values;
612 Array *btp =
dynamic_cast<Array *
>(dc);
615 size_t memRequired = btp->length() * dsize;
617 if (comp_size != memRequired) {
622 switch (btp->var()->type()) {
634 case dods_uint64_c: {
635 values.resize(memRequired);
636 get_data(dataset,
reinterpret_cast<void *
>(&values[0]));
637 btp->set_read_p(
true);
638 btp->val2buf(
reinterpret_cast<void *
>(&values[0]));
644 if (H5Tis_variable_str(dtypeid) > 0) {
645 vector<string> finstrval = {
""};
646 read_vlen_string(dataset, 1, NULL, NULL, NULL, finstrval);
647 btp->set_value(finstrval, finstrval.size());
648 btp->set_read_p(
true);
658 assert(btp->length() == 1);
659 values.resize(memRequired);
660 get_data(dataset,
reinterpret_cast<void *
>(&values[0]));
661 string str(values.begin(), values.end());
662 vector<string> strings = {str};
663 btp->set_value(strings, strings.size());
664 btp->set_read_p(
true);
670 throw BESInternalError(
"Unsupported compact storage variable type.", __FILE__, __LINE__);
681 ostringstream oss(
"Unsupported HDF5 dataset layout type: ", std::ios::ate);
682 oss << layout_type <<
".";
703 static void get_chunks_for_all_variables(hid_t file, D4Group *group) {
705 for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
708 D4Attributes *d4_attrs = (*v)->attributes();
710 throw BESInternalError(
"Expected to find an attribute table for " + (*v)->name() +
" but did not.",
716 D4Attribute *attr = d4_attrs->get(
"fullnamepath");
727 if (attr->num_values() == 1)
728 FQN = attr->value(0);
731 BESDEBUG(
"dmrpp",
"Working on: " << FQN << endl);
732 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
734 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
739 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
741 BESDEBUG(
"dmrpp",
"Working on: " << FQN << endl);
742 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
749 if (attr && attr->num_values() == 1)
750 FQN = attr->value(0);
754 VERBOSE(cerr <<
"Working on: " << FQN << endl);
755 hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
759 if (dataset < 0 && attr == 0) {
760 cerr<<
"Unable to open dataset name "<<FQN <<endl;
763 else if (dataset < 0)
764 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
766 get_variable_chunk_info(dataset,
dynamic_cast<DmrppCommon *
>(*v));
770 D4Group::groupsIter g = group->grp_begin();
771 D4Group::groupsIter ge = group->grp_end();
773 get_chunks_for_all_variables(file, *g++);
777 int main(
int argc,
char *argv[]) {
778 string h5_file_name =
"";
779 string h5_dset_path =
"";
780 string dmr_name =
"";
781 string url_name =
"";
784 GetOpt getopt(argc, argv,
"c:f:r:u:dhv");
786 while ((option_char = getopt()) != -1) {
787 switch (option_char) {
797 h5_file_name = getopt.optarg;
800 dmr_name = getopt.optarg;
803 url_name = getopt.optarg;
810 <<
"build_dmrpp [-v] -c <bes.conf> -f <data file> [-u <href url>] | build_dmrpp -f <data file> -r <dmr file> | build_dmrpp -h"
818 if (h5_file_name.empty()) {
819 cerr <<
"HDF5 file name must be given (-f <input>)." << endl;
831 if (!dmr_name.empty()) {
833 unique_ptr<DMRpp> dmrpp(
new DMRpp);
835 dmrpp->set_factory(&dtf);
837 ifstream in(dmr_name.c_str());
839 parser.intern(in, dmrpp.get(),
false);
842 file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
844 cerr <<
"Error: HDF5 file '" + h5_file_name +
"' cannot be opened." << endl;
849 get_chunks_for_all_variables(file, dmrpp->root());
852 dmrpp->print_dmrpp(writer, url_name);
854 cout << writer.get_doc();
857 string bes_data_root;
861 cerr <<
"Error: Could not find the BES root directory key." << endl;
873 cerr <<
"The Metadata Store (MDS) must be configured for this command to work." << endl;
889 cerr <<
"Expected a DMR++ object from the DmrppMetadataStore." << endl;
894 file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
896 cerr <<
"Error: HDF5 file '" + h5_file_path +
"' cannot be opened." << endl;
900 get_chunks_for_all_variables(file, dmrpp->root());
902 dmrpp->set_href(url_name);
904 mds->add_dmrpp_response(dmrpp.get(), h5_file_name );
907 dmrpp->set_print_chunks(
true);
908 dmrpp->print_dap4(writer);
910 cout << writer.get_doc();
912 cerr <<
"Error: Could not get a lock on the DMR for '" + h5_file_path +
"'." << endl;
921 catch (std::exception &e) {
922 cerr <<
"std::exception: " << e.what() << endl;
926 cerr <<
"Unknown error." << endl;
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Abstract exception class for the BES with basic string message.
virtual std::string get_message()
get the error message for this exception
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
static std::string ConfigFile
Provide a way to print the DMR++ response.
Size and offset information of data included in DMR++ files.
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
virtual unsigned long add_chunk(const std::string &data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array="")
Add a new chunk as defined by an h4:byteStream element.
void set_deflate(bool value)
Set the value of the deflate property.
void set_compact(bool value)
Set the value of the compact property.
void set_shuffle(bool value)
Set the value of the shuffle property.
void get_data(hid_t dset, void *buf)