libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 //
25 // Portions of this code were taken verbatim from Josuttis,
26 // "The C++ Standard Library," p.672
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <byteswap.h>
32 #include <arpa/inet.h>
33 
34 #include <cstring>
35 #include <vector>
36 
37 #include "chunked_stream.h"
38 #include "chunked_istream.h"
39 
40 #include "Error.h"
41 
42 //#define DODS_DEBUG
43 //#define DODS_DEBUG2
44 #ifdef DODS_DEBUG
45 #include <iostream>
46 #endif
47 
48 #include "util.h"
49 #include "debug.h"
50 
51 namespace libdap {
52 
53 /*
54  This code does not use a 'put back' buffer, but here's a picture of the
55  d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
56  the I/O Stream library's streambuf class works. For the case with no
57  putback, just imagine it as zero and eliminate the leftmost extension. This
58  might also come in useful if the code was extended to support put back. I
59  removed that feature because I don't see it being used with our chunked
60  transmission protocol and it requires an extra call to memcopy() when data
61  are added to the internal buffer.
62 
63  d_buffer d_buffer + putBack
64  | |
65  v v
66  |---------|--------------------------------------------|....
67  | | | .
68  |---------|--------------------------------------------|....
69  ^ ^ ^
70  | | |
71  eback() gptr() egptr()
72 
73  */
74 
84 std::streambuf::int_type
86 {
87  DBG(cerr << "underflow..." << endl);
88  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
89 
90  // return the next character; uflow() increments the puffer pointer.
91  if (gptr() < egptr())
92  return traits_type::to_int_type(*gptr());
93 
94  // gptr() == egptr() so read more data from the underlying input source.
95 
96  // To read data from the chunked stream, first read the header
97  uint32_t header;
98  d_is.read((char *) &header, 4);
99 #if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
100  // When the endian nature of the server is encoded in the chunk header, the header is
101  // sent using network byte order
102  header = ntohl(header);
103 #endif
104 
105  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
106  // it holds data. In the latter case, bytes those will be read and moved into the
107  // buffer. Once those data are consumed, we'll be back here again and this read()
108  // will return EOF. See below for the other case...
109  if (d_is.eof()) return traits_type::eof();
110 #if BYTE_ORDER_PREFIX
111  if (d_twiddle_bytes) header = bswap_32(header);
112 #else
113  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
114  if (!d_set_twiddle) {
115  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
116  d_set_twiddle = true;
117  }
118 #endif
119  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
120 
121  DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
122  DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
123  DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
124 
125  // Handle the case where the buffer is not big enough to hold the incoming chunk
126  if (chunk_size > d_buf_size) {
127  d_buf_size = chunk_size;
128  m_buffer_alloc();
129  }
130 
131  // If the END chunk has zero bytes, return EOF. See above for more information
132  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
133 
134  // Read the chunk's data
135  d_is.read(d_buffer, chunk_size);
136  DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
137  if (d_is.bad()) return traits_type::eof();
138 
139  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
140  setg(d_buffer, // beginning of put back area
141  d_buffer, // read position (gptr() == eback())
142  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143 
144  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
145 
146  switch (header & CHUNK_TYPE_MASK) {
147  case CHUNK_END:
148  DBG2(cerr << "Found end chunk" << endl);
149  return traits_type::to_int_type(*gptr());
150  case CHUNK_DATA:
151  return traits_type::to_int_type(*gptr());
152 
153  case CHUNK_ERR:
154  // this is pretty much the end of the show... Assume the buffer/chunk holds
155  // the error message text.
156  d_error = true;
157  d_error_message = string(d_buffer, chunk_size);
158  return traits_type::eof();
159  default:
160  d_error = true;
161  d_error_message = "Failed to read known chunk header type.";
162  return traits_type::eof();
163  }
164 }
165 
182 std::streamsize
183 chunked_inbuf::xsgetn(char* s, std::streamsize num)
184 {
185  DBG(cerr << "xsgetn... num: " << num << endl);
186 
187  // if num is <= the chars currently in the buffer
188  if (num <= (egptr() - gptr())) {
189  memcpy(s, gptr(), num);
190  gbump(num);
191 
192  return traits_type::not_eof(num);
193  }
194 
195  // else they asked for more
196  uint32_t bytes_left_to_read = num;
197 
198  // are there any bytes in the buffer? if so grab them first
199  if (gptr() < egptr()) {
200  int bytes_to_transfer = egptr() - gptr();
201  memcpy(s, gptr(), bytes_to_transfer);
202  gbump(bytes_to_transfer);
203  s += bytes_to_transfer;
204  bytes_left_to_read -= bytes_to_transfer;
205  }
206 
207  // We need to get more bytes from the underlying stream; at this
208  // point the internal buffer is empty.
209 
210  // read the remaining bytes to transfer, a chunk at a time,
211  // and put any leftover stuff in the buffer.
212 
213  // note that when the code is here, gptr() == egptr(), so the
214  // next call to read() will fall through the previous tests and
215  // read at least one chunk here.
216  bool done = false;
217  while (!done) {
218  // Get a chunk header
219  uint32_t header;
220  d_is.read((char *) &header, 4);
221 
222 #if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
223  header = ntohl(header);
224 #endif
225 
226  // There are two EOF cases: One where the END chunk is zero bytes and one where
227  // it holds data. In the latter case, those will be read and moved into the
228  // buffer. Once those data are consumed, we'll be back here again and this read()
229  // will return EOF. See below for the other case...
230  if (d_is.eof()) return traits_type::eof();
231 #if BYTE_ORDER_PREFIX
232  if (d_twiddle_bytes) header = bswap_32(header);
233 #else
234  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
235  if (!d_set_twiddle) {
236  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
237  d_set_twiddle = true;
238  }
239 #endif
240 
241  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
242  DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
243  DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
244  DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
245 
246  // handle error chunks here
247  if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
248  d_error = true;
249  // Note that d_buffer is not used to avoid calling resize if it is too
250  // small to hold the error message. At this point, there's not much reason
251  // to optimize transport efficiency, however.
252  std::vector<char> message(chunk_size);
253  d_is.read(&message[0], chunk_size);
254  d_error_message = string(&message[0], chunk_size);
255  // leave the buffer and gptr(), ..., in a consistent state (empty)
256  setg(d_buffer, d_buffer, d_buffer);
257  }
258  // And zero-length END chunks here.
259  else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
260  return traits_type::not_eof(num-bytes_left_to_read);
261  }
262  // The next case is complicated because we read some data from the current
263  // chunk into 's' an some into the internal buffer.
264  else if (chunk_size > bytes_left_to_read) {
265  d_is.read(s, bytes_left_to_read);
266  if (d_is.bad()) return traits_type::eof();
267 
268  // Now slurp up the remain part of the chunk and store it in the buffer
269  uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
270  // expand the internal buffer if needed
271  if (bytes_leftover > d_buf_size) {
272  d_buf_size = chunk_size;
273  m_buffer_alloc();
274  }
275  // read the remain stuff in to d_buffer
276  d_is.read(d_buffer, bytes_leftover);
277  if (d_is.bad()) return traits_type::eof();
278 
279  setg(d_buffer, // beginning of put back area
280  d_buffer, // read position (gptr() == eback())
281  d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
282 
283  bytes_left_to_read = 0 /* -= d_is.gcount()*/;
284  }
285  else {
286  // expand the internal buffer if needed
287  if (chunk_size > d_buf_size) {
288  d_buf_size = chunk_size;
289  m_buffer_alloc();
290  }
291  // If we get a chunk that's zero bytes, Don't call read()
292  // to save the kernel context switch overhead.
293  if (chunk_size > 0) {
294  d_is.read(s, chunk_size);
295  if (d_is.bad()) return traits_type::eof();
296  bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
297  s += chunk_size;
298  }
299  }
300 
301  switch (header & CHUNK_TYPE_MASK) {
302  case CHUNK_END:
303  DBG(cerr << "Found end chunk" << endl);
304  // in this case bytes_left_to_read can be > 0 because we ran out of data
305  // before reading all the requested bytes. The next read() call will return
306  // eof; this call returns the number of bytes read and transferred to 's'.
307  done = true;
308  break;
309 
310  case CHUNK_DATA:
311  done = bytes_left_to_read == 0;
312  break;
313 
314  case CHUNK_ERR:
315  // this is pretty much the end of the show... The error message has
316  // already been read above
317  return traits_type::eof();
318 
319  default:
320  d_error = true;
321  d_error_message = "Failed to read known chunk header type.";
322  return traits_type::eof();
323  }
324  }
325 
326  return traits_type::not_eof(num-bytes_left_to_read);
327 }
328 
341 std::streambuf::int_type
343 {
344  // To read data from the chunked stream, first read the header
345  uint32_t header;
346  d_is.read((char *) &header, 4);
347 
348 #if !BYTE_ORDER_PREFIX && HEADER_IN_NETWORK_BYTE_ORDER
349  header = ntohl(header);
350 #endif
351 
352  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
353  // it holds data. In the latter case, bytes those will be read and moved into the
354  // buffer. Once those data are consumed, we'll be back here again and this read()
355  // will return EOF. See below for the other case...
356  if (d_is.eof()) return traits_type::eof();
357 #if BYTE_ORDER_PREFIX
358  if (d_twiddle_bytes) header = bswap_32(header);
359 #else
360  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
361  if (!d_set_twiddle) {
362  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
363  d_set_twiddle = true;
364  }
365 #endif
366 
367  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
368 
369  DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
370  DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
371  DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
372 
373  // Handle the case where the buffer is not big enough to hold the incoming chunk
374  if (chunk_size > d_buf_size) {
375  d_buf_size = chunk_size;
376  m_buffer_alloc();
377  }
378 
379  // If the END chunk has zero bytes, return EOF. See above for more information
380  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
381 
382  // Read the chunk's data
383  d_is.read(d_buffer, chunk_size);
384  DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
385  if (d_is.bad()) return traits_type::eof();
386 
387  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
388  setg(d_buffer, // beginning of put back area
389  d_buffer, // read position (gptr() == eback())
390  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
391 
392  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
393 
394  switch (header & CHUNK_TYPE_MASK) {
395  case CHUNK_END:
396  DBG(cerr << "Found end chunk" << endl);
397  return traits_type::not_eof(chunk_size);
398 
399  case CHUNK_DATA:
400  return traits_type::not_eof(chunk_size);
401 
402  case CHUNK_ERR:
403  // this is pretty much the end of the show... Assume the buffer/chunk holds
404  // the error message text.
405  d_error = true;
406  d_error_message = string(d_buffer, chunk_size);
407  return traits_type::eof();
408 
409  default:
410  d_error = true;
411  d_error_message = "Failed to read known chunk header type.";
412  return traits_type::eof();
413  }
414 }
415 
416 }
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:94