libdap++ Updated for version 3.8.2

DDXParserSAX2.h

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #ifndef ddx_parser_h
00027 #define ddx_parser_h
00028 
00029 #include <string>
00030 #include <map>
00031 #include <stack>
00032 
00033 #include <libxml/parserInternals.h>
00034 
00035 #ifndef ddx_exceptions_h
00036 #include "DDXExceptions.h"
00037 #endif
00038 
00039 #ifndef _dds_h
00040 #include "DDS.h"
00041 #endif
00042 
00043 #ifndef _basetype_h
00044 #include "BaseType.h"
00045 #endif
00046 
00047 #ifndef base_type_factory_h
00048 #include "BaseTypeFactory.h"
00049 #endif
00050 
00051 namespace libdap
00052 {
00053 
00079 class DDXParser
00080 {
00081 private:
00084     enum ParseState {
00085         parser_start,
00086 
00087         inside_dataset,
00088 
00089         inside_attribute_container,
00090         inside_attribute,
00091         inside_attribute_value,
00092         inside_other_xml_attribute,
00093 
00094         inside_alias,
00095 
00096         // This covers Byte, ..., Url.
00097         inside_simple_type,
00098 
00099         inside_array,
00100         inside_dimension,
00101 
00102         inside_grid,
00103         inside_map,
00104 
00105         inside_structure,
00106         inside_sequence,
00107 
00108         inside_blob_href,
00109 
00110         parser_unknown,
00111         parser_error
00112     };
00113 
00114     BaseTypeFactory *d_factory;
00115 
00116     // These stacks hold the state of the parse as it progresses.
00117     stack<ParseState> s; // Current parse state
00118     stack<BaseType*> bt_stack; // current variable(s)
00119     stack<AttrTable*> at_stack; // current attribute table
00120 
00121     // Accumulate stuff inside an 'OtherXML' DAP attribute here
00122     string other_xml;
00123 
00124     // When we're parsing unknown XML, how deeply is it nested? This is used
00125     // for the OtherXML DAP attributes.
00126     unsigned int other_xml_depth;
00127     unsigned int unknown_depth;
00128 
00129     // These are used for processing errors.
00130     string error_msg;  // Error message(s), if any.
00131     xmlParserCtxtPtr ctxt; // used for error message line numbers
00132 
00133     // The results of the parse operation are stored in these fields.
00134     DDS *dds;   // dump DDX here
00135     string *blob_href;  // put href to blob here
00136 
00137     // These hold temporary values read during the parse.
00138     string dods_attr_name; // DAP2 attributes, not XML attributes
00139     string dods_attr_type; // ... not XML ...
00140     string char_data;  // char data in value elements; null after use
00141     string root_ns;     // What is the namespace of the root node (Dataset)
00142 
00143     class XMLAttribute {
00144         public:
00145         string prefix;
00146         string nsURI;
00147         string value;
00148 
00149         void clone(const XMLAttribute &src) {
00150             prefix = src.prefix;
00151             nsURI = src.nsURI;
00152             value = src.value;
00153         }
00154 
00155         XMLAttribute() : prefix(""), nsURI(""), value("") {}
00156         XMLAttribute(const string &p, const string &ns, const string &v)
00157             : prefix(p), nsURI(ns), value(v) {}
00158         // 'attributes' as passed from libxml2 is a five element array but this
00159         // ctor gets the back four elements.
00160         XMLAttribute(const xmlChar **attributes/*[4]*/) {
00161             prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
00162             nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
00163             value = string((const char *)attributes[2], (const char *)attributes[3]);
00164         }
00165         XMLAttribute(const XMLAttribute &rhs) {
00166             clone(rhs);
00167         }
00168         XMLAttribute &operator=(const XMLAttribute &rhs) {
00169             if (this == &rhs)
00170                 return *this;
00171             clone(rhs);
00172             return *this;
00173         }
00174     };
00175 
00176     typedef map<string, XMLAttribute> XMLAttrMap;
00177     XMLAttrMap attribute_table; // dump XML attributes here
00178 
00179     XMLAttrMap::iterator attr_table_begin() {
00180         return attribute_table.begin();
00181     }
00182 
00183     XMLAttrMap::iterator attr_table_end() {
00184         return attribute_table.end();
00185     }
00186 
00187     map<string, string> namespace_table;
00188 
00189     // These are kind of silly...
00190     void set_state(DDXParser::ParseState state);
00191     DDXParser::ParseState get_state() const;
00192     void pop_state();
00193 
00194     // Glue for the BaseTypeFactory class.
00195     BaseType *factory(Type t, const string &name);
00196 
00197     // Common cleanup code for intern() and intern_stream()
00198     void cleanup_parse(xmlParserCtxtPtr &context) const;
00199 
00206     void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
00207     void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
00208     bool check_required_attribute(const string &attr);
00209     bool check_attribute(const string & attr);
00210 
00211     void process_attribute_element(const xmlChar **attrs, int nb_attrs);
00212     void process_attribute_alias(const xmlChar **attrs, int nb_attrs);
00213 
00214     void process_variable(Type t, ParseState s, const xmlChar **attrs,
00215             int nb_attributes);
00216 
00217     void process_dimension(const xmlChar **attrs, int nb_attrs);
00218     void process_blob(const xmlChar **attrs, int nb_attrs);
00219 
00220     bool is_attribute_or_alias(const char *name, const xmlChar **attrs,
00221             int nb_attributes);
00222     bool is_variable(const char *name, const xmlChar **attrs, int nb_attributes);
00223 
00224     void finish_variable(const char *tag, Type t, const char *expected);
00226 
00228     DDXParser() {}
00229     //{throw InternalErr(__FILE__, __LINE__, "DDXParser internal ctor called!");}
00230 
00231     friend class DDXParserTest;
00232 
00233 public:
00234     DDXParser(BaseTypeFactory *factory)
00235         : d_factory(factory),
00236         other_xml(""), other_xml_depth(0), unknown_depth(0),
00237         error_msg(""), ctxt(0), dds(0), dods_attr_name(""), dods_attr_type(""),
00238         char_data(""), root_ns("")
00239     {}
00240 
00241     void intern(const string &document, DDS *dest_dds, string &cid);
00242     void intern_stream(FILE *in, DDS *dds, string &cid,
00243                 const string &boundary = "");
00244 
00245     static void ddx_start_document(void *parser);
00246     static void ddx_end_document(void *parser);
00247 
00248     static void ddx_sax2_start_element(void *parser,
00249             const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
00250             int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
00251             int nb_defaulted, const xmlChar **attributes);
00252     static void ddx_sax2_end_element(void *parser, const xmlChar *localname,
00253             const xmlChar *prefix, const xmlChar *URI);
00254 
00255     static void ddx_get_characters(void *parser, const xmlChar *ch, int len);
00256     static void ddx_ignoreable_whitespace(void *parser,
00257             const xmlChar * ch, int len);
00258     static void ddx_get_cdata(void *parser, const xmlChar *value, int len);
00259 
00260     static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name);
00261     static void ddx_fatal_error(void *parser, const char *msg, ...);
00262 };
00263 
00264 } // namespace libdap
00265 
00266 #endif // ddx_parser_h