bes  Updated for version 3.20.8
DmrppParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 #include <iostream>
28 #include <sstream>
29 
30 #include <cstring>
31 #include <cstdarg>
32 #include <cassert>
33 
34 #include <libxml/parserInternals.h>
35 
36 #include <DMR.h>
37 
38 #include <BaseType.h>
39 #include <Array.h>
40 #include <D4Group.h>
41 #include <D4Attributes.h>
42 #include <D4Maps.h>
43 #include <D4Enum.h>
44 #include <D4BaseTypeFactory.h>
45 
46 #include <DapXmlNamespaces.h>
47 #include <util.h>
48 
49 #include <BESInternalError.h>
50 #include <BESDebug.h>
51 #include <BESCatalog.h>
52 #include <BESCatalogUtils.h>
53 #include <BESCatalogList.h>
54 #include <BESUtil.h>
55 #include <TheBESKeys.h>
56 #include <BESRegex.h>
57 
58 #include "DmrppParserSax2.h"
59 #include "DmrppCommon.h"
60 #include "DmrppStr.h"
61 #include "DmrppNames.h"
62 #include "DmrppArray.h"
63 
64 #include "CurlUtils.h"
65 
66 #include "Base64.h"
67 
68 #define FIVE_12K 524288;
69 #define ONE_MB 1048576;
70 #define MAX_INPUT_LINE_LENGTH ONE_MB;
71 
72 #define prolog std::string("DmrppParserSax2::").append(__func__).append("() - ")
73 
74 static const string dmrpp_namespace = "http://xml.opendap.org/dap/dmrpp/1.0.0#";
75 
76 
77 using namespace libdap;
78 using namespace std;
80 
81 namespace dmrpp {
82 
83 static const char *states[] = {
84  "parser_start",
85  "inside_dataset",
86  // inside_group is the state just after parsing the start of a Group
87  // element.
88  "inside_group",
89  "inside_attribute_container",
90  "inside_attribute",
91  "inside_attribute_value",
92  "inside_other_xml_attribute",
93  "inside_enum_def",
94  "inside_enum_const",
95  "inside_dim_def",
96  // This covers Byte, ..., Url, Opaque
97  "inside_simple_type",
98  // "inside_array",
99  "inside_dim",
100  "inside_map",
101  "inside_constructor",
102  "not_dap4_element",
103  "inside_dmrpp_object",
104  "inside_dmrpp_chunkDimensionSizes_element",
105  "inside_dmrpp_compact_element",
106  "parser_unknown",
107  "parser_error",
108  "parser_fatal_error",
109  "parser_end"
110  };
111 
112 static bool is_not(const char *name, const char *tag)
113 {
114  return strcmp(name, tag) != 0;
115 }
116 
117 
126 D4EnumDef *
127 DmrppParserSax2::enum_def()
128 {
129  if (!d_enum_def) d_enum_def = new D4EnumDef;
130 
131  return d_enum_def;
132 }
133 
140 D4Dimension *
141 DmrppParserSax2::dim_def()
142 {
143  if (!d_dim_def) d_dim_def = new D4Dimension;
144 
145  return d_dim_def;
146 }
147 
148 /* Search through the attribute array for a given attribute name.
149  * If the name is found, return the string value for that attribute
150  * @param name: Search for this name
151  * @param attributes: Array that holds the attribute values to search
152  * @param num_attributes: Number of attributes
153  * @return string value of attribute; the empty string if the name was not found
154  */
155 string DmrppParserSax2::get_attribute_val(const string &name, const xmlChar **attributes, int num_attributes)
156 {
157  unsigned int index = 0;
158  for (int i = 0; i < num_attributes; ++i, index += 5) {
159  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
160  return string((const char *)attributes[index+3], (const char *)attributes[index+4]);
161  }
162  }
163  return "";
164 }
165 
166 #if 0
172 void DmrppParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
173 {
174  if (!xml_attrs.empty()) xml_attrs.clear(); // erase old attributes
175 
176  // Make a value using the attribute name and the prefix, namespace URI
177  // and the value. The prefix might be null.
178  unsigned int index = 0;
179  for (int i = 0; i < nb_attributes; ++i, index += 5) {
180  xml_attrs.insert(
181  map<string, XMLAttribute>::value_type(string((const char *) attributes[index]),
182  XMLAttribute(attributes + index + 1)));
183 
184  BESDEBUG(PARSER, prolog <<
185  "XML Attribute '" << (const char *)attributes[index] << "': " << xml_attrs[(const char *)attributes[index]].value << endl);
186  }
187 }
188 #endif
189 
196 void DmrppParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
197 {
198  // make a value with the prefix and namespace URI. The prefix might be null.
199  for (int i = 0; i < nb_namespaces; ++i) {
200  namespace_table.insert(
201  map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *) namespaces[i * 2] : "",
202  (const char *) namespaces[i * 2 + 1]));
203  }
204 }
205 
206 #if 0
213 bool DmrppParserSax2::check_required_attribute(const string & attr)
214 {
215  if (xml_attrs.find(attr) == xml_attrs.end()) {
216  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
217  return false;
218  }
219  else
220  return true;
221 }
222 #endif
223 
224 /*
225  * An improved version of the previous check_required_attribute.
226  * Searches for an attribute name within the attribute array.
227  * @param name: The attribute name to search for
228  * @param attributes: The attribute array
229  * @param num_attributes: The number of attributes
230  * @return success: true
231  * failure: false
232  */
233 bool DmrppParserSax2::check_required_attribute(const string &name, const xmlChar **attributes, int num_attributes)
234 {
235  unsigned int index = 0;
236  for (int i = 0; i < num_attributes; ++i, index += 5) {
237  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
238  return true;
239  }
240  }
241 
242  dmr_error(this, "Required attribute '%s' not found.", name.c_str());
243  return false;
244 }
245 
246 #if 0
253 bool DmrppParserSax2::check_attribute(const string & attr)
254 {
255  return (xml_attrs.find(attr) != xml_attrs.end());
256 }
257 #endif
258 
269 bool DmrppParserSax2::check_attribute(const string &name, const xmlChar **attributes, int num_attributes)
270 {
271  unsigned int index = 0;
272  for (int i = 0; i < num_attributes; ++i, index += 5) {
273  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
274  return true;
275  }
276  }
277  return false;
278 }
279 
280 bool DmrppParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
281 {
282  if (is_not(name, "Dimension")) return false;
283 
284 #if 0
285  transfer_xml_attrs(attrs, nb_attributes);
286 #endif
287 
288 #if 0
289  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("size", attrs, nb_attributes))) {
290  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
291  return false;
292  }
293 #endif
294 
295  if (!check_required_attribute("name", attrs, nb_attributes)) {
296  dmr_error(this, "The required attribute 'name' was missing from a Dimension element.");
297  return false;
298  }
299 
300  if (!check_required_attribute("size", attrs, nb_attributes)) {
301  dmr_error(this, "The required attribute 'size' was missing from a Dimension element.");
302  return false;
303  }
304 
305  // This getter (dim_def) allocates a new object if needed.
306  dim_def()->set_name(get_attribute_val("name", attrs, nb_attributes));
307  try {
308  dim_def()->set_size(get_attribute_val("size", attrs, nb_attributes));
309  }
310  catch (Error &e) {
311  dmr_error(this, e.get_error_message().c_str());
312  return false;
313  }
314 
315  return true;
316 }
317 
335 bool DmrppParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
336 {
337  if (is_not(name, "Dim")) return false;
338 
339 #if 0
340  transfer_xml_attrs(attrs, nb_attributes);
341 #endif
342 #if 0
343  if (check_attribute("size", attrs, nb_attributes) && check_attribute("name", attrs, nb_attributes)) {
344  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
345  return false;
346  }
347  if (!(check_attribute("size", attrs, nb_attributes) || check_attribute("name", attrs, nb_attributes))) {
348  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
349  return false;
350  }
351 #endif
352  bool has_size = check_attribute("size", attrs, nb_attributes);
353  bool has_name = check_attribute("name", attrs, nb_attributes);
354  if (has_size && has_name) {
355  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
356  return false;
357  }
358  if (!has_size && !has_name) {
359  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
360  return false;
361  }
362 
363 
364  if (!top_basetype()->is_vector_type()) {
365  // Make the top BaseType* an array
366  BaseType *b = top_basetype();
367  pop_basetype();
368 
369  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
370  a->set_is_dap4(true);
371  a->add_var_nocopy(b);
372  a->set_attributes_nocopy(b->attributes());
373  // trick: instead of popping b's attributes, copying them and then pushing
374  // a's copy, just move the pointer (but make sure there's only one object that
375  // references that pointer).
376  b->set_attributes_nocopy(0);
377 
378  push_basetype(a);
379  }
380 
381  assert(top_basetype()->is_vector_type());
382 
383  Array *a = static_cast<Array*>(top_basetype());
384  if (has_size) {
385  size_t dim_size = stoi(get_attribute_val("size", attrs, nb_attributes));
386  BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << dim_size << endl);
387  a->append_dim(dim_size); // low budget code for now. jhrg 8/20/13, modified to use new function. kln 9/7/19
388  return true;
389  }
390  else if (has_name) {
391  string name = get_attribute_val("name", attrs, nb_attributes);
392  BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name << endl);
393 
394  D4Dimension *dim = 0;
395  if (name[0] == '/') // lookup the Dimension in the root group
396  dim = dmr()->root()->find_dim(name);
397  else
398  // get enclosing Group and lookup Dimension there
399  dim = top_group()->find_dim(name);
400 
401  if (!dim)
402  throw BESInternalError("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
403  a->append_dim(dim);
404  return true;
405  }
406  return false;
407 }
408 
409 
410 bool DmrppParserSax2::process_dmrpp_compact_start(const char *name){
411  if ( strcmp(name, "compact") == 0) {
412  BESDEBUG(PARSER, prolog << "DMR++ compact element. localname: " << name << endl);
413  BaseType *bt = top_basetype();
414  if (!bt) throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
415  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
416  if (!dc)
417  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
418  dc->set_compact(true);
419  return true;
420  }
421  else {
422  return false;
423  }
424 }
425 
426 
427 void DmrppParserSax2::process_dmrpp_compact_end(const char *localname)
428 {
429  BESDEBUG(PARSER, prolog << "BEGIN DMR++ compact element. localname: " << localname << endl);
430  if (is_not(localname, "compact"))
431  return;
432 
433  BaseType *target = top_basetype();
434  if (!target)
435  throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
436  BESDEBUG(PARSER, prolog << "BaseType: " << target->type_name() << " " << target->name() << endl);
437 
438  if (target->type() != dods_array_c)
439  throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
440 
441  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(target); // Get the Dmrpp common info
442  if (!dc)
443  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
444 
445  dc->set_compact(true);
446 
447  // DmrppParserSax2::dmr_error(this, "Expected an end value tag; found '%s' instead.", localname);
448 
449  std::string data(char_data);
450  BESDEBUG(PARSER, prolog << "Read compact element text. size: " << data.size() << " length: " << data.length() << " value: '" << data << "'" << endl);
451 
452  std::vector <u_int8_t> decoded = base64::Base64::decode(data);
453 
454  switch (target->var()->type()) {
455  case dods_array_c:
456  throw BESInternalError("Parser state has been corrupted. An Array may not be the template for an Array.", __FILE__, __LINE__);
457  break;
458 
459  case dods_byte_c:
460  case dods_char_c:
461  case dods_int8_c:
462  case dods_uint8_c:
463  case dods_int16_c:
464  case dods_uint16_c:
465  case dods_int32_c:
466  case dods_uint32_c:
467  case dods_int64_c:
468  case dods_uint64_c:
469 
470  case dods_enum_c:
471 
472  case dods_float32_c:
473  case dods_float64_c:
474  target->val2buf(reinterpret_cast<void *>(&decoded[0]));
475  target->set_read_p(true);
476  break;
477 
478  case dods_str_c:
479  case dods_url_c:
480  {
481  std::string str(decoded.begin(), decoded.end());
482  DmrppArray *st = dynamic_cast<DmrppArray *>(target);
483  if(!st){
484  stringstream msg;
485  msg << prolog << "The target BaseType MUST be an array. and it's a " << target->type_name();
486  BESDEBUG(MODULE, msg.str() << endl);
487  throw BESInternalError(msg.str(),__FILE__,__LINE__);
488  }
489  st->val2buf(&str);
490  st->set_read_p(true);
491  }
492  break;
493 
494  default:
495  throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
496  break;
497  }
498  char_data = ""; // Null this after use.
499 
500  BESDEBUG(PARSER, prolog << "END" << endl);
501 }
502 
503 bool DmrppParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
504 {
505  if (is_not(name, "Map")) return false;
506 
507 #if 0
508  transfer_xml_attrs(attrs, nb_attributes);
509 #endif
510 
511  if (!check_attribute("name", attrs, nb_attributes)) {
512  dmr_error(this, "The 'name' attribute must be used in a Map element.");
513  return false;
514  }
515 
516  if (!top_basetype()->is_vector_type()) {
517  // Make the top BaseType* an array
518  BaseType *b = top_basetype();
519  pop_basetype();
520 
521  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
522  a->set_is_dap4(true);
523  a->add_var_nocopy(b);
524  a->set_attributes_nocopy(b->attributes());
525  // trick: instead of popping b's attributes, copying them and then pushing
526  // a's copy, just move the pointer (but make sure there's only one object that
527  // references that pointer).
528  b->set_attributes_nocopy(0);
529 
530  push_basetype(a);
531  }
532 
533  assert(top_basetype()->is_vector_type());
534 
535  Array *a = static_cast<Array*>(top_basetype());
536 
537  string map_name = get_attribute_val("name", attrs, nb_attributes);
538  if (get_attribute_val("name", attrs, nb_attributes).at(0) != '/') map_name = top_group()->FQN() + map_name;
539 
540  Array *map_source = 0; // The array variable that holds the data for the Map
541 
542  if (map_name[0] == '/') // lookup the Map in the root group
543  map_source = dmr()->root()->find_map_source(map_name);
544  else
545  // get enclosing Group and lookup Map there
546  map_source = top_group()->find_map_source(map_name);
547 
548  // Change: If the parser is in 'strict' mode (the default) and the Array named by
549  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
550  // mode), then this is not an error. However, the Array referenced by the Map will
551  // be null. This is a change in the parser's behavior to accommodate requests for
552  // Arrays that include Maps that do not also include the Map(s) in the request.
553  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
554  if (!map_source && d_strict)
555  throw BESInternalError("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
556 
557  a->maps()->add_map(new D4Map(map_name, map_source));
558 
559  return true;
560 }
561 
562 bool DmrppParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
563 {
564  if (is_not(name, "Group")) return false;
565 
566 #if 0
567  transfer_xml_attrs(attrs, nb_attributes);
568 #endif
569 
570  if (!check_required_attribute("name", attrs, nb_attributes)) {
571  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
572  return false;
573  }
574 
575  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val("name", attrs, nb_attributes));
576  if (!btp) {
577  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", get_attribute_val("name", attrs, nb_attributes).c_str());
578  return false;
579  }
580 
581  D4Group *grp = static_cast<D4Group*>(btp);
582 
583  // Need to set this to get the D4Attribute behavior in the type classes
584  // shared between DAP2 and DAP4. jhrg 4/18/13
585  grp->set_is_dap4(true);
586 
587  // link it up and change the current group
588  D4Group *parent = top_group();
589  if (!parent) {
590  dmr_fatal_error(this, "No Group on the Group stack.");
591  return false;
592  }
593 
594  grp->set_parent(parent);
595  parent->add_group_nocopy(grp);
596 
597  push_group(grp);
598  push_attributes(grp->attributes());
599  return true;
600 }
601 
608 inline bool DmrppParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
609 {
610  if (is_not(name, "Attribute")) return false;
611 
612 #if 0
613  // These methods set the state to parser_error if a problem is found.
614  transfer_xml_attrs(attrs, nb_attributes);
615 #endif
616 
617  // add error
618  if (!(check_required_attribute(string("name"), attrs, nb_attributes) && check_required_attribute(string("type"), attrs, nb_attributes))) {
619  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
620  return false;
621  }
622 
623  if (get_attribute_val("type", attrs, nb_attributes) == "Container") {
624  push_state(inside_attribute_container);
625 
626  BESDEBUG(PARSER, prolog << "Pushing attribute container " << get_attribute_val("name", attrs, nb_attributes) << endl);
627  D4Attribute *child = new D4Attribute(get_attribute_val("name", attrs, nb_attributes), attr_container_c);
628 
629  D4Attributes *tos = top_attributes();
630  // add return
631  if (!tos) {
632  delete child;
633  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
634  return false;
635  }
636 
637  tos->add_attribute_nocopy(child);
638  push_attributes(child->attributes());
639  }
640  else if (get_attribute_val("type", attrs, nb_attributes) == "OtherXML") {
641  push_state(inside_other_xml_attribute);
642 
643  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
644  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
645  }
646  else {
647  push_state(inside_attribute);
648 
649  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
650  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
651  }
652 
653  return true;
654 }
655 
661 inline bool DmrppParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
662 {
663  if (is_not(name, "Enumeration")) return false;
664 
665 #if 0
666  transfer_xml_attrs(attrs, nb_attributes);
667 #endif
668 
669  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("basetype", attrs, nb_attributes))) {
670  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
671  return false;
672  }
673 
674  Type t = get_type(get_attribute_val("basetype", attrs, nb_attributes).c_str());
675  if (!is_integer_type(t)) {
676  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
677  get_attribute_val("name", attrs, nb_attributes).c_str(), get_attribute_val("basetype", attrs, nb_attributes).c_str());
678  return false;
679  }
680 
681  // This getter allocates a new object if needed.
682  string enum_def_path = get_attribute_val("name", attrs, nb_attributes);
683 #if 0
684  // Use FQNs when things are referenced, not when they are defined
685  if (xml_attrs["name"].value[0] != '/')
686  enum_def_path = top_group()->FQN() + enum_def_path;
687 #endif
688  enum_def()->set_name(enum_def_path);
689  enum_def()->set_type(t);
690 
691  return true;
692 }
693 
694 inline bool DmrppParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
695 {
696  if (is_not(name, "EnumConst")) return false;
697 
698 #if 0
699  // These methods set the state to parser_error if a problem is found.
700  transfer_xml_attrs(attrs, nb_attributes);
701 #endif
702 
703  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("value", attrs, nb_attributes))) {
704  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
705  return false;
706  }
707 
708  istringstream iss(get_attribute_val("value", attrs, nb_attributes));
709  long long value = 0;
710  iss >> skipws >> value;
711  if (iss.fail() || iss.bad()) {
712  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
713  get_attribute_val("value", attrs, nb_attributes).c_str());
714  }
715  else if (!enum_def()->is_valid_enum_value(value)) {
716  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
717  get_attribute_val("value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
718  }
719  else {
720  // unfortunate choice of names... args are 'label' and 'value'
721  enum_def()->add_value(get_attribute_val("name", attrs, nb_attributes), value);
722  }
723 
724  return true;
725 }
726 
732 inline bool DmrppParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
733 {
734  Type t = get_type(name);
735  if (is_simple_type(t)) {
736  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
737  return true;
738  }
739  else {
740  switch (t) {
741  case dods_structure_c:
742  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
743  return true;
744 
745  case dods_sequence_c:
746  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
747  return true;
748 
749  default:
750  return false;
751  }
752  }
753 }
754 
762 void DmrppParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
763 {
764 #if 0
765  transfer_xml_attrs(attrs, nb_attributes);
766 #endif
767 
768  if (check_required_attribute("name", attrs, nb_attributes)) {
769  BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val("name", attrs, nb_attributes));
770  if (!btp) {
771  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
772  return;
773  }
774 
775  if ((t == dods_enum_c) && check_required_attribute("enum", attrs, nb_attributes)) {
776  D4EnumDef *enum_def = 0;
777  string enum_path = get_attribute_val("enum", attrs, nb_attributes);
778  if (enum_path[0] == '/')
779  enum_def = dmr()->root()->find_enum_def(enum_path);
780  else
781  enum_def = top_group()->find_enum_def(enum_path);
782 
783  if (!enum_def) dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
784 
785  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
786  }
787 
788  btp->set_is_dap4(true); // see comment above
789  push_basetype(btp);
790 
791  push_attributes(btp->attributes());
792 
793  push_state(s);
794  }
795 }
796 
807 void DmrppParserSax2::dmr_start_document(void * p)
808 {
809  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
810  parser->error_msg = "";
811  parser->char_data = "";
812 
813  // Set this in intern_helper so that the loop test for the parser_end
814  // state works for the first iteration. It seems like XMLParseChunk calls this
815  // function on it's first run. jhrg 9/16/13
816  // parser->push_state(parser_start);
817 
818  parser->push_attributes(parser->dmr()->root()->attributes());
819 
820  BESDEBUG(PARSER, prolog << "Parser start state: " << states[parser->get_state()] << endl);
821 }
822 
825 void DmrppParserSax2::dmr_end_document(void * p)
826 {
827  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
828 
829  BESDEBUG(PARSER, prolog << "Parser end state: " << states[parser->get_state()] << endl);
830 
831  if (parser->get_state() != parser_end)
832  DmrppParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
833 
834  // If we've found any sort of error, don't make the DMR; intern() will
835  // take care of the error.
836  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error) return;
837 
838  if (!parser->empty_basetype() || parser->empty_group())
839  DmrppParserSax2::dmr_error(parser,
840  "The document did not contain a valid root Group or contained unbalanced tags.");
841 
842  if(BESDebug::IsSet(PARSER)){
843  ostream *os = BESDebug::GetStrm();
844  *os << prolog << "parser->top_group() BEGIN " << endl;
845  parser->top_group()->dump(*os);
846  *os << endl << prolog << "parser->top_group() END " << endl;
847  }
848 
849  parser->pop_group(); // leave the stack 'clean'
850  parser->pop_attributes();
851 }
852 
853 void DmrppParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
854  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
855 {
856  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
857  const char *localname = reinterpret_cast<const char *>(l);
858 
859  string this_element_ns_name(URI ? (char *) URI : "null");
860 
861  if (parser->get_state() != parser_error) {
862  string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
863  BESDEBUG(PARSER, prolog << "dap4_ns_name: " << dap4_ns_name << endl);
864 
865  if (this_element_ns_name == dmrpp_namespace) {
866  if (strcmp(localname, "chunkDimensionSizes") == 0) {
867  BESDEBUG(PARSER, prolog << "Found dmrpp:chunkDimensionSizes element. Pushing state." << endl);
868  parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
869  }
870  else if (strcmp(localname, "compact") == 0) {
871  BESDEBUG(PARSER, prolog << "Found dmrpp:compact element. Pushing state." << endl);
872  parser->push_state(inside_dmrpp_compact_element);
873  }
874  else {
875  BESDEBUG(PARSER,
876  prolog << "Start of element in dmrpp namespace: " << localname << " detected." << endl);
877  parser->push_state(inside_dmrpp_object);
878  }
879  }
880  else if (this_element_ns_name != dap4_ns_name) {
881  BESDEBUG(PARSER, prolog << "Start of non DAP4 element: " << localname << " detected." << endl);
882  parser->push_state(not_dap4_element);
883  }
884  }
885 
886  BESDEBUG(PARSER, prolog << "Start element " << localname << " prefix: " << (prefix ? (char *) prefix : "null") << " ns: "
887  << this_element_ns_name << " (state: " << states[parser->get_state()] << ")" << endl);
888 
889  switch (parser->get_state()) {
890  case parser_start:
891  if (is_not(localname, "Dataset"))
892  DmrppParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.",
893  localname);
894 
895  parser->root_ns = URI ? (const char *) URI : "";
896 
897 #if 0
898  parser->transfer_xml_attrs(attributes, nb_attributes);
899 #endif
900 
901  if (parser->check_required_attribute(string("name"), attributes, nb_attributes)) parser->dmr()->set_name(parser->get_attribute_val("name", attributes, nb_attributes));
902 
903  if (parser->check_attribute("dapVersion", attributes, nb_attributes))
904  parser->dmr()->set_dap_version(parser->get_attribute_val("dapVersion", attributes, nb_attributes));
905 
906  if (parser->check_attribute("dmrVersion", attributes, nb_attributes))
907  parser->dmr()->set_dmr_version(parser->get_attribute_val("dmrVersion", attributes, nb_attributes));
908 
909  if (parser->check_attribute("base", attributes, nb_attributes)) {
910  parser->dmr()->set_request_xml_base(parser->get_attribute_val("base", attributes, nb_attributes));
911  }
912  BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << parser->dmr()->request_xml_base() << "'" << endl);
913 
914  if (parser->check_attribute("href", attributes, nb_attributes)) {
915  parser->dmrpp_dataset_href = parser->get_attribute_val("href", attributes, nb_attributes);
916  BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Dataset URL: " << parser->dmrpp_dataset_href << endl);
917  string effective_url = EffectiveUrlCache::TheCache()->get_effective_url(parser->dmrpp_dataset_href);
918  BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url << endl);
919  }
920  BESDEBUG(PARSER, prolog << "Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href << "'" << endl);
921 
922  if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
923 
924  // Push the root Group on the stack
925  parser->push_group(parser->dmr()->root());
926 
927  parser->push_state(inside_dataset);
928 
929  break;
930 
931  // Both inside dataset and inside group can have the same stuff.
932  // The difference is that the Dataset holds the root group, which
933  // must be present; other groups are optional
934  case inside_dataset:
935  case inside_group:
936  if (parser->process_enum_def(localname, attributes, nb_attributes))
937  parser->push_state(inside_enum_def);
938  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
939  parser->push_state(inside_dim_def);
940  else if (parser->process_group(localname, attributes, nb_attributes))
941  parser->push_state(inside_group);
942  else if (parser->process_variable(localname, attributes, nb_attributes))
943  // This will push either inside_simple_type or inside_structure
944  // onto the parser state stack.
945  break;
946  else if (parser->process_attribute(localname, attributes, nb_attributes))
947  // This will push either inside_attribute, inside_attribute_container
948  // or inside_otherxml_attribute onto the parser state stack
949  break;
950  else
951  DmrppParserSax2::dmr_error(parser,
952  "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
953  localname);
954  break;
955 
956  case inside_attribute_container:
957  if (parser->process_attribute(localname, attributes, nb_attributes))
958  break;
959  else
960  DmrppParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
961  break;
962 
963  case inside_attribute:
964  if (parser->process_attribute(localname, attributes, nb_attributes))
965  break;
966  else if (strcmp(localname, "Value") == 0)
967  parser->push_state(inside_attribute_value);
968  else
969  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
970  break;
971 
972  case inside_attribute_value:
973  // Attribute values are processed by the end element code.
974  break;
975 
976  case inside_other_xml_attribute:
977  parser->other_xml_depth++;
978 
979  // Accumulate the elements here
980  parser->other_xml.append("<");
981  if (prefix) {
982  parser->other_xml.append((const char *) prefix);
983  parser->other_xml.append(":");
984  }
985  parser->other_xml.append(localname);
986 
987  if (nb_namespaces != 0) {
988  parser->transfer_xml_ns(namespaces, nb_namespaces);
989 
990  for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
991  ++i) {
992  parser->other_xml.append(" xmlns");
993  if (!i->first.empty()) {
994  parser->other_xml.append(":");
995  parser->other_xml.append(i->first);
996  }
997  parser->other_xml.append("=\"");
998  parser->other_xml.append(i->second);
999  parser->other_xml.append("\"");
1000  }
1001  }
1002 
1003  if (nb_attributes != 0) {
1004 #if 0
1005  parser->transfer_xml_attrs(attributes, nb_attributes);
1006 #endif
1007  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
1008  parser->other_xml.append(" ");
1009  if (!i->second.prefix.empty()) {
1010  parser->other_xml.append(i->second.prefix);
1011  parser->other_xml.append(":");
1012  }
1013  parser->other_xml.append(i->first);
1014  parser->other_xml.append("=\"");
1015  parser->other_xml.append(i->second.value);
1016  parser->other_xml.append("\"");
1017  }
1018  }
1019 
1020  parser->other_xml.append(">");
1021  break;
1022 
1023  case inside_enum_def:
1024  // process an EnumConst element
1025  if (parser->process_enum_const(localname, attributes, nb_attributes))
1026  parser->push_state(inside_enum_const);
1027  else
1028  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
1029  break;
1030 
1031  case inside_enum_const:
1032  // No content; nothing to do
1033  break;
1034 
1035  case inside_dim_def:
1036  // No content; nothing to do
1037  break;
1038 
1039  case inside_dim:
1040  // No content.
1041  break;
1042 
1043  case inside_map:
1044  // No content.
1045  break;
1046 
1047  case inside_simple_type:
1048  if (parser->process_attribute(localname, attributes, nb_attributes))
1049  break;
1050  else if (parser->process_dimension(localname, attributes, nb_attributes))
1051  parser->push_state(inside_dim);
1052  else if (parser->process_map(localname, attributes, nb_attributes))
1053  parser->push_state(inside_map);
1054  else
1055  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
1056  break;
1057 
1058  case inside_constructor:
1059  if (parser->process_variable(localname, attributes, nb_attributes))
1060  // This will push either inside_simple_type or inside_structure
1061  // onto the parser state stack.
1062  break;
1063  else if (parser->process_attribute(localname, attributes, nb_attributes))
1064  break;
1065  else if (parser->process_dimension(localname, attributes, nb_attributes))
1066  parser->push_state(inside_dim);
1067  else if (parser->process_map(localname, attributes, nb_attributes))
1068  parser->push_state(inside_map);
1069  else
1070  DmrppParserSax2::dmr_error(parser,
1071  "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
1072  break;
1073 
1074  case not_dap4_element:
1075  BESDEBUG(PARSER, prolog << "SKIPPING unexpected element. localname: " << localname << "namespace: "
1076  << this_element_ns_name << endl);
1077  break;
1078 
1079  case inside_dmrpp_compact_element:
1080  if (parser->process_dmrpp_compact_start(localname)) {
1081  BESDEBUG(PARSER, prolog << "Call to parser->process_dmrpp_compact_start() completed." << endl);
1082  }
1083  break;
1084 
1085  case inside_dmrpp_object: {
1086  BESDEBUG(PARSER, prolog << "Inside dmrpp namespaced element. localname: " << localname << endl);
1087  assert(this_element_ns_name == dmrpp_namespace);
1088 
1089 #if 0
1090  parser->transfer_xml_attrs(attributes, nb_attributes); // load up xml_attrs
1091 #endif
1092 
1093  BaseType *bt = parser->top_basetype();
1094  if (!bt) throw BESInternalError("Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
1095 
1096  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
1097  if (!dc)
1098  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1099 
1100  // Ingest the dmrpp:chunks element and it attributes
1101  if (strcmp(localname, "chunks") == 0) {
1102  BESDEBUG(PARSER, prolog << "DMR++ chunks element. localname: " << localname << endl);
1103 
1104  if (parser->check_attribute("compressionType", attributes, nb_attributes)) {
1105  string compression_type_string(parser->get_attribute_val("compressionType", attributes, nb_attributes));
1106  dc->ingest_compression_type(compression_type_string);
1107 
1108  BESDEBUG(PARSER, prolog << "Processed attribute 'compressionType=\"" <<
1109  compression_type_string << "\"'" << endl);
1110  }
1111  else {
1112  BESDEBUG(PARSER, prolog << "There was no 'compressionType' attribute associated with the variable '"
1113  << bt->type_name() << " " << bt->name() << "'" << endl);
1114  }
1115 
1116  if (parser->check_attribute("byteOrder", attributes, nb_attributes)) {
1117  string byte_order_string(parser->get_attribute_val("byteOrder", attributes, nb_attributes));
1118  dc->ingest_byte_order(byte_order_string);
1119 
1120  BESDEBUG(PARSER, prolog << "Processed attribute 'byteOrder=\"" << byte_order_string << "\"'" << endl);
1121  }
1122  else {
1123  BESDEBUG(PARSER, prolog << "There was no 'byteOrder' attribute associated with the variable '" << bt->type_name()
1124  << " " << bt->name() << "'" << endl);
1125  }
1126  }
1127  // Ingest an dmrpp:chunk element and its attributes
1128  else if (strcmp(localname, "chunk") == 0) {
1129  string data_url = "unknown_data_location";
1130  if (parser->check_attribute("href", attributes, nb_attributes)) {
1131 #if 0
1132  istringstream data_url_ss(parser->xml_attrs["href"].value);
1133  data_url = data_url_ss.str();
1134  BESDEBUG(PARSER, prolog << "Processing 'href' value into data_url. href: " <<
1135  data_url_ss.str() << endl);
1136 #endif
1137 
1138  data_url = parser->get_attribute_val("href", attributes, nb_attributes);
1139  BESDEBUG(PARSER, prolog << "Processing 'href' value into data_url. href: " << data_url << endl);
1140  // We may have to cache the last accessed/redirect URL for data_url here because this URL
1141  // may be unique to this chunk.
1142  BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Chunk URL: " << parser->dmrpp_dataset_href << endl);
1143  string effective_url = EffectiveUrlCache::TheCache()->get_effective_url(data_url);
1144  BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url << endl);
1145  }
1146  else {
1147  BESDEBUG(PARSER, prolog << "No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl);
1148  // This bit of magic sets the URL used to get the data and it's
1149  // magic in part because it may be a file or an http URL
1150  data_url = parser->dmrpp_dataset_href;
1151  // We don't have to conditionally cache parser->dmrpp_dataset_href here because that was
1152  // done in the evaluation of the parser_start case.
1153  BESDEBUG(PARSER, prolog << "Processing dmrpp:href into data_url. dmrpp:href='" << data_url << "'" << endl);
1154  }
1155  // First we see if it's an HTTP URL, and if not we
1156  // make a local file url based on the Catalog Root
1157 #if 0
1158  std::string http("http://");
1159  std::string https("https://");
1160  std::string file("file://");
1161  if (data_url.compare(0, http.size(), http) && data_url.compare(0, https.size(), https)
1162  && data_url.compare(0, file.size(), file))
1163 #endif
1164 
1165  if (data_url.find("http://") != 0 && data_url.find("https://") != 0 && data_url.find("file://") != 0) {
1166  BESDEBUG(PARSER, prolog << "data_url does NOT start with 'http://', 'https://' or 'file://'. "
1167  "Retrieving default catalog root directory" << endl);
1168 
1169  // Now we try to find the default catalog. If we can't find it we punt and leave it be.
1171  if (!defcat) {
1172  BESDEBUG(PARSER, prolog << "Not able to find the default catalog." << endl);
1173  }
1174  else {
1175  // Found the catalog so we get the root dir; make a file URL.
1177 
1178  BESDEBUG(PARSER, prolog << "Found default catalog root_dir: '" << utils->get_root_dir() << "'" << endl);
1179 
1180  data_url = BESUtil::assemblePath(utils->get_root_dir(), data_url, true);
1181  data_url = "file://" + data_url;
1182  }
1183  }
1184 
1185  BESDEBUG(PARSER, prolog << "Processed data_url: '" << data_url << "'" << endl);
1186 
1187  unsigned long long offset = 0;
1188  unsigned long long size = 0;
1189  string chunk_position_in_array("");
1190  std::string byte_order = dc->get_byte_order();
1191 
1192  if (parser->check_required_attribute("offset", attributes, nb_attributes)) {
1193  istringstream offset_ss(parser->get_attribute_val("offset", attributes, nb_attributes));
1194  offset_ss >> offset;
1195  BESDEBUG(PARSER, prolog << "Processed attribute 'offset=\"" << offset << "\"'" << endl);
1196  }
1197  else {
1198  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'offset'.");
1199  }
1200 
1201  if (parser->check_required_attribute("nBytes", attributes, nb_attributes)) {
1202  istringstream size_ss(parser->get_attribute_val("nBytes", attributes, nb_attributes));
1203  size_ss >> size;
1204  BESDEBUG(PARSER, prolog << "Processed attribute 'nBytes=\"" << size << "\"'" << endl);
1205  }
1206  else {
1207  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'size'.");
1208  }
1209 
1210  if (parser->check_attribute("chunkPositionInArray", attributes, nb_attributes)) {
1211  istringstream chunk_position_ss(parser->get_attribute_val("chunkPositionInArray", attributes, nb_attributes));
1212  chunk_position_in_array = chunk_position_ss.str();
1213  BESDEBUG(PARSER, prolog << "Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl);
1214  }
1215  else {
1216  BESDEBUG(PARSER, prolog << "No attribute 'chunkPositionInArray' located" << endl);
1217  }
1218 
1219  dc->add_chunk(data_url, byte_order, size, offset, chunk_position_in_array);
1220  }
1221  }
1222  break;
1223 
1224  case inside_dmrpp_chunkDimensionSizes_element:
1225  // The dmrpp:chunkDimensionSizes value is processed by the end element code.
1226  break;
1227 
1228  case parser_unknown:
1229  case parser_error:
1230  case parser_fatal_error:
1231  break;
1232 
1233  case parser_end:
1234  // FIXME Error?
1235  break;
1236  }
1237 
1238  BESDEBUG(PARSER, prolog << "Start element exit state: " << states[parser->get_state()] << endl);
1239 }
1240 
1241 
1242 
1243 
1244 void DmrppParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
1245 {
1246  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1247  const char *localname = (const char *) l;
1248 
1249  BESDEBUG(PARSER, prolog << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl);
1250 
1251  switch (parser->get_state()) {
1252  case parser_start:
1253  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
1254  break;
1255 
1256  case inside_dataset:
1257  if (is_not(localname, "Dataset"))
1258  DmrppParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
1259 
1260  parser->pop_state();
1261  if (parser->get_state() != parser_start)
1262  dmr_fatal_error(parser, "Unexpected state, expected start state.");
1263  else {
1264  parser->pop_state();
1265  parser->push_state(parser_end);
1266  }
1267  break;
1268 
1269  case inside_group: {
1270  if (is_not(localname, "Group"))
1271  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
1272 
1273  if (!parser->empty_basetype() || parser->empty_group())
1274  DmrppParserSax2::dmr_error(parser,
1275  "The document did not contain a valid root Group or contained unbalanced tags.");
1276 
1277  parser->pop_group();
1278  parser->pop_state();
1279  break;
1280  }
1281 
1282  case inside_attribute_container:
1283  if (is_not(localname, "Attribute"))
1284  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1285 
1286  parser->pop_state();
1287  parser->pop_attributes();
1288  break;
1289 
1290  case inside_attribute:
1291  if (is_not(localname, "Attribute"))
1292  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1293 
1294  parser->pop_state();
1295  break;
1296 
1297  case inside_attribute_value: {
1298  if (is_not(localname, "Value"))
1299  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1300 
1301  parser->pop_state();
1302 
1303  // The old code added more values using the name and type as
1304  // indexes to find the correct attribute. Use get() for that
1305  // now. Or fix this code to keep a pointer to the to attribute...
1306  D4Attributes *attrs = parser->top_attributes();
1307  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1308  if (!attr) {
1309  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1310  attrs->add_attribute_nocopy(attr);
1311  }
1312  attr->add_value(parser->char_data);
1313 
1314  parser->char_data = ""; // Null this after use.
1315  break;
1316  }
1317 
1318  case inside_other_xml_attribute: {
1319  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
1320  parser->pop_state();
1321 
1322  // The old code added more values using the name and type as
1323  // indexes to find the correct attribute. Use get() for that
1324  // now. Or fix this code to keep a pointer to the to attribute...
1325  D4Attributes *attrs = parser->top_attributes();
1326  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1327  if (!attr) {
1328  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1329  attrs->add_attribute_nocopy(attr);
1330  }
1331  attr->add_value(parser->other_xml);
1332 
1333  parser->other_xml = ""; // Null this after use.
1334  }
1335  else {
1336  if (parser->other_xml_depth == 0) {
1337  DmrppParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
1338  localname);
1339  break;
1340  }
1341  parser->other_xml_depth--;
1342 
1343  parser->other_xml.append("</");
1344  if (prefix) {
1345  parser->other_xml.append((const char *) prefix);
1346  parser->other_xml.append(":");
1347  }
1348  parser->other_xml.append(localname);
1349  parser->other_xml.append(">");
1350  }
1351  break;
1352  }
1353 
1354  case inside_enum_def:
1355  if (is_not(localname, "Enumeration"))
1356  DmrppParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
1357  if (!parser->top_group())
1358  DmrppParserSax2::dmr_fatal_error(parser,
1359  "Expected a Group to be the current item, while finishing up an Enumeration.");
1360  else {
1361  // copy the pointer; not a deep copy
1362  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1363  // Set the enum_def to null; next call to enum_def() will
1364  // allocate a new object
1365  parser->clear_enum_def();
1366  parser->pop_state();
1367  }
1368  break;
1369 
1370  case inside_enum_const:
1371  if (is_not(localname, "EnumConst"))
1372  DmrppParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
1373 
1374  parser->pop_state();
1375  break;
1376 
1377  case inside_dim_def: {
1378  if (is_not(localname, "Dimension"))
1379  DmrppParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1380 
1381  if (!parser->top_group())
1382  DmrppParserSax2::dmr_error(parser,
1383  "Expected a Group to be the current item, while finishing up an Dimension.");
1384 
1385  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1386  // Set the dim_def to null; next call to dim_def() will
1387  // allocate a new object. Calling 'clear' is important because
1388  // the cleanup method will free dim_def if it's not null and
1389  // we just copied the pointer in the add_dim_nocopy() call
1390  // above.
1391  parser->clear_dim_def();
1392  parser->pop_state();
1393  break;
1394  }
1395 
1396  case inside_simple_type:
1397  if (is_simple_type(get_type(localname))) {
1398  BaseType *btp = parser->top_basetype();
1399  parser->pop_basetype();
1400  parser->pop_attributes();
1401  BaseType *parent = 0;
1402  if (!parser->empty_basetype())
1403  parent = parser->top_basetype();
1404  else if (!parser->empty_group())
1405  parent = parser->top_group();
1406  else {
1407  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1408  localname);
1409  delete btp;
1410  parser->pop_state();
1411  break;
1412  }
1413  if (parent->type() == dods_array_c)
1414  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1415  else
1416  parent->add_var_nocopy(btp);
1417  }
1418  else
1419  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1420 
1421  parser->pop_state();
1422  break;
1423 
1424  case inside_dim:
1425  if (is_not(localname, "Dim"))
1426  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1427 
1428  parser->pop_state();
1429  break;
1430 
1431  case inside_map:
1432  if (is_not(localname, "Map"))
1433  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1434 
1435  parser->pop_state();
1436  break;
1437 
1438  case inside_constructor: {
1439  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1440  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1441  return;
1442  }
1443  BaseType *btp = parser->top_basetype();
1444  parser->pop_basetype();
1445  parser->pop_attributes();
1446  BaseType *parent = 0;
1447  if (!parser->empty_basetype())
1448  parent = parser->top_basetype();
1449  else if (!parser->empty_group())
1450  parent = parser->top_group();
1451  else {
1452  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1453  localname);
1454  delete btp;
1455  parser->pop_state();
1456  break;
1457  }
1458  // TODO Why doesn't this code mirror the simple_var case and test
1459  // for the parent being an array? jhrg 10/13/13
1460  parent->add_var_nocopy(btp);
1461  parser->pop_state();
1462  break;
1463  }
1464 
1465  case not_dap4_element:
1466  BESDEBUG(PARSER, prolog << "End of non DAP4 element: " << localname << endl);
1467  parser->pop_state();
1468  break;
1469 
1470 #if 1
1471  case inside_dmrpp_compact_element: {
1472  parser->process_dmrpp_compact_end(localname);
1473  BESDEBUG(PARSER, prolog << "End of dmrpp compact element: " << localname << endl);
1474  parser->pop_state();
1475  break;
1476  }
1477 #endif
1478 
1479  case inside_dmrpp_object: {
1480  BESDEBUG(PARSER, prolog << "End of dmrpp namespace element: " << localname << endl);
1481  parser->pop_state();
1482  break;
1483  }
1484 
1485  case inside_dmrpp_chunkDimensionSizes_element: {
1486  BESDEBUG(PARSER, prolog << "End of chunkDimensionSizes element. localname: " << localname << endl);
1487 
1488  if (is_not(localname, "chunkDimensionSizes"))
1489  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1490  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(parser->top_basetype()); // Get the Dmrpp common info
1491  if (!dc)
1492  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1493  string element_text(parser->char_data);
1494  BESDEBUG(PARSER, prolog << "chunkDimensionSizes element_text: '" << element_text << "'" << endl);
1495  dc->parse_chunk_dimension_sizes(element_text);
1496  parser->char_data = ""; // Null this after use.
1497  parser->pop_state();
1498  break;
1499  }
1500 
1501  case parser_unknown:
1502  parser->pop_state();
1503  break;
1504 
1505  case parser_error:
1506  case parser_fatal_error:
1507  break;
1508 
1509  case parser_end:
1510  // FIXME Error?
1511  break;
1512  }
1513 
1514 
1515  BESDEBUG(PARSER, prolog << "End element exit state: " << states[parser->get_state()] <<
1516  " ("<<parser->get_state()<<")"<< endl);
1517 }
1518 
1522 void DmrppParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1523 {
1524  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1525 
1526  switch (parser->get_state()) {
1527  case inside_attribute_value:
1528  case inside_dmrpp_chunkDimensionSizes_element:
1529  case inside_dmrpp_compact_element:
1530  parser->char_data.append((const char *) (ch), len);
1531  BESDEBUG(PARSER, prolog << "Characters[" << parser->char_data.size() << "]" << parser->char_data << "'" << endl);
1532  break;
1533 
1534  case inside_other_xml_attribute:
1535  parser->other_xml.append((const char *) (ch), len);
1536  BESDEBUG(PARSER, prolog << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1537  break;
1538 
1539  default:
1540  break;
1541  }
1542 }
1543 
1548 void DmrppParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1549 {
1550  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1551 
1552  switch (parser->get_state()) {
1553  case inside_other_xml_attribute:
1554  parser->other_xml.append((const char *) (ch), len);
1555  break;
1556 
1557  default:
1558  break;
1559  }
1560 }
1561 
1567 void DmrppParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1568 {
1569  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1570 
1571  switch (parser->get_state()) {
1572  case inside_other_xml_attribute:
1573  parser->other_xml.append((const char *) (value), len);
1574  break;
1575 
1576  case parser_unknown:
1577  break;
1578 
1579  default:
1580  DmrppParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1581 
1582  break;
1583  }
1584 }
1585 
1590 xmlEntityPtr DmrppParserSax2::dmr_get_entity(void *, const xmlChar * name)
1591 {
1592  return xmlGetPredefinedEntity(name);
1593 }
1594 
1605 void DmrppParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1606 {
1607  va_list args;
1608  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1609 
1610  parser->push_state(parser_fatal_error);
1611 
1612  va_start(args, msg);
1613  char str[1024];
1614  vsnprintf(str, 1024, msg, args);
1615  va_end(args);
1616 
1617  int line = xmlSAX2GetLineNumber(parser->context);
1618 
1619  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1620  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1621 }
1622 
1623 void DmrppParserSax2::dmr_error(void *p, const char *msg, ...)
1624 {
1625  va_list args;
1626  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1627 
1628  parser->push_state(parser_error);
1629 
1630  va_start(args, msg);
1631  char str[1024];
1632  vsnprintf(str, 1024, msg, args);
1633  va_end(args);
1634 
1635  int line = xmlSAX2GetLineNumber(parser->context);
1636 
1637  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1638  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1639 }
1641 
1645 void DmrppParserSax2::cleanup_parse()
1646 {
1647  bool wellFormed = context->wellFormed;
1648  bool valid = context->valid;
1649 
1650  // context->sax = NULL;
1651  // Leak. Removed the above. jhrg 6/19/19
1652  xmlFreeParserCtxt(context);
1653 
1654  delete d_enum_def;
1655  d_enum_def = 0;
1656 
1657  delete d_dim_def;
1658  d_dim_def = 0;
1659 
1660  // If there's an error, there may still be items on the stack at the
1661  // end of the parse.
1662  while (!btp_stack.empty()) {
1663  delete top_basetype();
1664  pop_basetype();
1665  }
1666 
1667  if (!wellFormed)
1668  throw BESInternalError("The DMR was not well formed. " + error_msg,__FILE__,__LINE__);
1669  else if (!valid)
1670  throw BESInternalError("The DMR was not valid." + error_msg,__FILE__,__LINE__);
1671  else if (get_state() == parser_error)
1672  throw BESInternalError(error_msg,__FILE__,__LINE__);
1673  else if (get_state() == parser_fatal_error) throw BESInternalError(error_msg,__FILE__,__LINE__);
1674 }
1675 
1689 void DmrppParserSax2::intern(istream &f, DMR *dest_dmr)
1690 {
1691  // Code example from libxml2 docs re: read from a stream.
1692 
1693  if (!f.good()) throw BESInternalError(prolog + "ERROR - Supplied istream instance not open or read error",__FILE__,__LINE__);
1694  if (!dest_dmr) throw BESInternalError(prolog + "THe supplied DMR object pointer is null", __FILE__, __LINE__);
1695 
1696  d_dmr = dest_dmr; // dump values here
1697 
1698  int line_num = 1;
1699  string line;
1700 
1701  // Get the XML prolog line (looks like: <?xml ... ?> )
1702  getline(f, line);
1703  if (line.length() == 0) throw BESInternalError(prolog + "ERROR - No input found when parsing the DMR++",__FILE__,__LINE__);
1704 
1705  BESDEBUG(PARSER, prolog << "line: (" << line_num << "): " << endl << line << endl << endl);
1706 
1707  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, line.c_str(), line.length(), "stream");
1708  context->validate = true;
1709  push_state(parser_start);
1710 
1711  // Get the first chunk of the stuff
1712  long chunk_count = 0;
1713  long chunk_size = 0;
1714 
1715  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1716  chunk_size=f.gcount();
1717  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1718  BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1719  BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1720 
1721  while(!f.eof() && (get_state() != parser_end)){
1722 
1723  xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1724 
1725  // There is more to read. Get the next chunk
1726  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1727  chunk_size=f.gcount();
1728  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1729  BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1730  BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1731  }
1732 
1733  // This call ends the parse.
1734  xmlParseChunk(context, d_parse_buffer, chunk_size, 1/*terminate*/); // libxml2 call
1735 
1736  // This checks that the state on the parser stack is parser_end and throws
1737  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1738  cleanup_parse();
1739 }
1740 
1741 
1742 
1751 void DmrppParserSax2::intern(const string &document, DMR *dest_dmr)
1752 {
1753  intern(document.c_str(), document.length(), dest_dmr);
1754 }
1755 
1764 void DmrppParserSax2::intern(const char *buffer, int size, DMR *dest_dmr)
1765 {
1766  if (!(size > 0)) return;
1767 
1768  // Code example from libxml2 docs re: read from a stream.
1769 
1770  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1771  d_dmr = dest_dmr; // dump values in dest_dmr
1772 
1773  push_state(parser_start);
1774  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, buffer, size, "stream");
1775  context->validate = true;
1776 
1777  // This call ends the parse.
1778  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1779 
1780  // This checks that the state on the parser stack is parser_end and throws
1781  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1782  cleanup_parse();
1783 }
1784 
1785 } // namespace dmrpp
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
virtual BESCatalog * default_catalog() const
The the default catalog.
const std::string & get_root_dir() const
Get the root directory of the catalog.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
static std::ostream * GetStrm()
return the debug stream
Definition: BESDebug.h:179
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:160
exception thrown if internal error encountered
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
utility class for the HTTP catalog module
Definition: EffectiveUrl.cc:58
Type
Type of JSON value.
Definition: rapidjson.h:664