protozero  1.6.3
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <protozero/config.hpp>
20 #include <protozero/data_view.hpp>
21 #include <protozero/exception.hpp>
22 #include <protozero/iterators.hpp>
23 #include <protozero/types.hpp>
24 #include <protozero/varint.hpp>
25 
26 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
27 # include <protozero/byteswap.hpp>
28 #endif
29 
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <string>
34 #include <utility>
35 
36 namespace protozero {
37 
62 class pbf_reader {
63 
64  // A pointer to the next unread data.
65  const char* m_data = nullptr;
66 
67  // A pointer to one past the end of data.
68  const char* m_end = nullptr;
69 
70  // The wire type of the current field.
71  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
72 
73  // The tag of the current field.
74  pbf_tag_type m_tag = 0;
75 
76  template <typename T>
77  T get_fixed() {
78  T result;
79  const char* data = m_data;
80  skip_bytes(sizeof(T));
81  std::memcpy(&result, data, sizeof(T));
82 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
83  byteswap_inplace(&result);
84 #endif
85  return result;
86  }
87 
88  template <typename T>
90  protozero_assert(tag() != 0 && "call next() before accessing field value");
91  const auto len = get_len_and_skip();
92  if (len % sizeof(T) != 0) {
94  }
95  return {const_fixed_iterator<T>(m_data - len),
96  const_fixed_iterator<T>(m_data)};
97  }
98 
99  template <typename T>
100  T get_varint() {
101  const auto val = static_cast<T>(decode_varint(&m_data, m_end));
102  assert(m_data <= m_end);
103  return val;
104  }
105 
106  template <typename T>
107  T get_svarint() {
108  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
109  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
110  }
111 
112  pbf_length_type get_length() {
113  return get_varint<pbf_length_type>();
114  }
115 
116  void skip_bytes(pbf_length_type len) {
117  if (m_data + len > m_end) {
118  throw end_of_buffer_exception{};
119  }
120  m_data += len;
121 
122 #ifndef NDEBUG
123  // In debug builds reset the tag to zero so that we can detect (some)
124  // wrong code.
125  m_tag = 0;
126 #endif
127  }
128 
129  pbf_length_type get_len_and_skip() {
130  const auto len = get_length();
131  skip_bytes(len);
132  return len;
133  }
134 
135  template <typename T>
136  iterator_range<T> get_packed() {
137  protozero_assert(tag() != 0 && "call next() before accessing field value");
138  const auto len = get_len_and_skip();
139  return {T{m_data - len, m_data},
140  T{m_data, m_data}};
141  }
142 
143 public:
144 
155  explicit pbf_reader(const data_view& view) noexcept
156  : m_data(view.data()),
157  m_end(view.data() + view.size()) {
158  }
159 
170  pbf_reader(const char* data, std::size_t size) noexcept
171  : m_data(data),
172  m_end(data + size) {
173  }
174 
175 #ifndef PROTOZERO_STRICT_API
176 
187  explicit pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
188  : m_data(data.first),
189  m_end(data.first + data.second) {
190  }
191 #endif
192 
203  explicit pbf_reader(const std::string& data) noexcept
204  : m_data(data.data()),
205  m_end(data.data() + data.size()) {
206  }
207 
212  pbf_reader() noexcept = default;
213 
215  pbf_reader(const pbf_reader&) noexcept = default;
216 
218  pbf_reader(pbf_reader&&) noexcept = default;
219 
221  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
222 
224  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
225 
226  ~pbf_reader() = default;
227 
233  void swap(pbf_reader& other) noexcept {
234  using std::swap;
235  swap(m_data, other.m_data);
236  swap(m_end, other.m_end);
237  swap(m_wire_type, other.m_wire_type);
238  swap(m_tag, other.m_tag);
239  }
240 
246  operator bool() const noexcept { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
247  return m_data < m_end;
248  }
249 
259  std::size_t length() const noexcept {
260  return std::size_t(m_end - m_data);
261  }
262 
278  bool next() {
279  if (m_data == m_end) {
280  return false;
281  }
282 
283  const auto value = get_varint<uint32_t>();
284  m_tag = pbf_tag_type(value >> 3u);
285 
286  // tags 0 and 19000 to 19999 are not allowed as per
287  // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags
288  if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) {
289  throw invalid_tag_exception{};
290  }
291 
292  m_wire_type = pbf_wire_type(value & 0x07u);
293  switch (m_wire_type) {
294  case pbf_wire_type::varint:
295  case pbf_wire_type::fixed64:
296  case pbf_wire_type::length_delimited:
297  case pbf_wire_type::fixed32:
298  break;
299  default:
301  }
302 
303  return true;
304  }
305 
334  bool next(pbf_tag_type next_tag) {
335  while (next()) {
336  if (m_tag == next_tag) {
337  return true;
338  }
339  skip();
340  }
341  return false;
342  }
343 
372  bool next(pbf_tag_type next_tag, pbf_wire_type type) {
373  while (next()) {
374  if (m_tag == next_tag && m_wire_type == type) {
375  return true;
376  }
377  skip();
378  }
379  return false;
380  }
381 
391  pbf_tag_type tag() const noexcept {
392  return m_tag;
393  }
394 
410  pbf_wire_type wire_type() const noexcept {
411  return m_wire_type;
412  }
413 
436  uint32_t tag_and_type() const noexcept {
438  }
439 
446  bool has_wire_type(pbf_wire_type type) const noexcept {
447  return wire_type() == type;
448  }
449 
456  void skip() {
457  protozero_assert(tag() != 0 && "call next() before calling skip()");
458  switch (wire_type()) {
459  case pbf_wire_type::varint:
460  skip_varint(&m_data, m_end);
461  break;
462  case pbf_wire_type::fixed64:
463  skip_bytes(8);
464  break;
465  case pbf_wire_type::length_delimited:
466  skip_bytes(get_length());
467  break;
468  case pbf_wire_type::fixed32:
469  skip_bytes(4);
470  break;
471  default:
472  break;
473  }
474  assert(m_data <= m_end);
475  }
476 
478 
489  bool get_bool() {
490  protozero_assert(tag() != 0 && "call next() before accessing field value");
491  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
492  const auto data = m_data;
493  skip_varint(&m_data, m_end);
494  return data[0] != 0;
495  }
496 
504  int32_t get_enum() {
505  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
506  return get_varint<int32_t>();
507  }
508 
516  int32_t get_int32() {
517  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
518  return get_varint<int32_t>();
519  }
520 
528  int32_t get_sint32() {
529  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
530  return get_svarint<int32_t>();
531  }
532 
540  uint32_t get_uint32() {
541  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
542  return get_varint<uint32_t>();
543  }
544 
552  int64_t get_int64() {
553  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
554  return get_varint<int64_t>();
555  }
556 
564  int64_t get_sint64() {
565  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
566  return get_svarint<int64_t>();
567  }
568 
576  uint64_t get_uint64() {
577  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
578  return get_varint<uint64_t>();
579  }
580 
588  uint32_t get_fixed32() {
589  protozero_assert(tag() != 0 && "call next() before accessing field value");
590  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
591  return get_fixed<uint32_t>();
592  }
593 
601  int32_t get_sfixed32() {
602  protozero_assert(tag() != 0 && "call next() before accessing field value");
603  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
604  return get_fixed<int32_t>();
605  }
606 
614  uint64_t get_fixed64() {
615  protozero_assert(tag() != 0 && "call next() before accessing field value");
616  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
617  return get_fixed<uint64_t>();
618  }
619 
627  int64_t get_sfixed64() {
628  protozero_assert(tag() != 0 && "call next() before accessing field value");
629  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
630  return get_fixed<int64_t>();
631  }
632 
640  float get_float() {
641  protozero_assert(tag() != 0 && "call next() before accessing field value");
642  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
643  return get_fixed<float>();
644  }
645 
653  double get_double() {
654  protozero_assert(tag() != 0 && "call next() before accessing field value");
655  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
656  return get_fixed<double>();
657  }
658 
669  protozero_assert(tag() != 0 && "call next() before accessing field value");
670  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
671  const auto len = get_len_and_skip();
672  return {m_data - len, len};
673  }
674 
675 #ifndef PROTOZERO_STRICT_API
676 
684  std::pair<const char*, pbf_length_type> get_data() {
685  protozero_assert(tag() != 0 && "call next() before accessing field value");
686  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
687  const auto len = get_len_and_skip();
688  return {m_data - len, len};
689  }
690 #endif
691 
699  std::string get_bytes() {
700  return std::string(get_view());
701  }
702 
710  std::string get_string() {
711  return std::string(get_view());
712  }
713 
722  return pbf_reader{get_view()};
723  }
724 
726 
729 
732 
735 
738 
741 
744 
747 
750 
753 
756 
759 
762 
765 
768 
770 
784  return get_packed<pbf_reader::const_bool_iterator>();
785  }
786 
797  return get_packed<pbf_reader::const_enum_iterator>();
798  }
799 
810  return get_packed<pbf_reader::const_int32_iterator>();
811  }
812 
823  return get_packed<pbf_reader::const_sint32_iterator>();
824  }
825 
836  return get_packed<pbf_reader::const_uint32_iterator>();
837  }
838 
849  return get_packed<pbf_reader::const_int64_iterator>();
850  }
851 
862  return get_packed<pbf_reader::const_sint64_iterator>();
863  }
864 
875  return get_packed<pbf_reader::const_uint64_iterator>();
876  }
877 
888  return packed_fixed<uint32_t>();
889  }
890 
901  return packed_fixed<int32_t>();
902  }
903 
914  return packed_fixed<uint64_t>();
915  }
916 
927  return packed_fixed<int64_t>();
928  }
929 
940  return packed_fixed<float>();
941  }
942 
953  return packed_fixed<double>();
954  }
955 
957 
958 }; // class pbf_reader
959 
966 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
967  lhs.swap(rhs);
968 }
969 
970 } // end namespace protozero
971 
972 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:155
int64_t get_sfixed64()
Definition: pbf_reader.hpp:627
uint32_t get_uint32()
Definition: pbf_reader.hpp:540
uint64_t get_fixed64()
Definition: pbf_reader.hpp:614
int32_t get_sfixed32()
Definition: pbf_reader.hpp:601
Definition: exception.hpp:52
uint64_t get_uint64()
Definition: pbf_reader.hpp:576
constexpr int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:182
Definition: iterators.hpp:160
int32_t get_int32()
Definition: pbf_reader.hpp:516
Definition: exception.hpp:92
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:203
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:861
constexpr uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept
Definition: types.hpp:55
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:966
Contains macro checks for different configurations.
iterator_range< pbf_reader::const_float_iterator > get_packed_float()
Definition: pbf_reader.hpp:939
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:446
std::size_t length() const noexcept
Definition: pbf_reader.hpp:259
void skip()
Definition: pbf_reader.hpp:456
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:809
pbf_reader get_message()
Definition: pbf_reader.hpp:721
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:848
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
Contains the iterators for access to packed repeated fields.
pbf_wire_type
Definition: types.hpp:40
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:874
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:822
iterator_range< pbf_reader::const_sfixed32_iterator > get_packed_sfixed32()
Definition: pbf_reader.hpp:900
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:410
Contains the implementation of the data_view class.
Definition: exception.hpp:80
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:233
int64_t get_sint64()
Definition: pbf_reader.hpp:564
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:334
uint32_t tag_and_type() const noexcept
Definition: pbf_reader.hpp:436
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:684
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:783
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:528
std::string get_bytes()
Definition: pbf_reader.hpp:699
iterator_range< pbf_reader::const_double_iterator > get_packed_double()
Definition: pbf_reader.hpp:952
double get_double()
Definition: pbf_reader.hpp:653
bool get_bool()
Definition: pbf_reader.hpp:489
std::string get_string()
Definition: pbf_reader.hpp:710
uint32_t pbf_length_type
Definition: types.hpp:62
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:187
data_view get_view()
Definition: pbf_reader.hpp:668
iterator_range< pbf_reader::const_fixed32_iterator > get_packed_fixed32()
Definition: pbf_reader.hpp:887
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:170
uint32_t pbf_tag_type
Definition: types.hpp:33
uint32_t get_fixed32()
Definition: pbf_reader.hpp:588
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:796
Definition: data_view.hpp:39
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:391
Definition: iterators.hpp:367
iterator_range< pbf_reader::const_sfixed64_iterator > get_packed_sfixed64()
Definition: pbf_reader.hpp:926
Definition: iterators.hpp:286
int32_t get_enum()
Definition: pbf_reader.hpp:504
Definition: pbf_reader.hpp:62
bool next(pbf_tag_type next_tag, pbf_wire_type type)
Definition: pbf_reader.hpp:372
Definition: iterators.hpp:39
float get_float()
Definition: pbf_reader.hpp:640
Definition: exception.hpp:67
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:278
iterator_range< pbf_reader::const_fixed64_iterator > get_packed_fixed64()
Definition: pbf_reader.hpp:913
int64_t get_int64()
Definition: pbf_reader.hpp:552
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:835
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:23
void swap(data_view &lhs, data_view &rhs) noexcept
Definition: data_view.hpp:165