Orcus
yaml_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9 #define INCLUDED_ORCUS_YAML_PARSER_HPP
10 
11 #include "orcus/yaml_parser_base.hpp"
12 #include "orcus/parser_global.hpp"
13 
14 namespace orcus {
15 
16 template<typename _Handler>
18 {
19 public:
20  typedef _Handler handler_type;
21 
22  yaml_parser(const char* p, size_t n, handler_type& hdl);
23 
24  void parse();
25 
26 private:
27  size_t end_scope();
28  void check_or_begin_document();
29  void check_or_begin_map();
30  void check_or_begin_sequence();
31  void parse_value(const char* p, size_t len);
32  void push_value(const char* p, size_t len);
33  void parse_line(const char* p, size_t len);
34  void parse_map_key(const char* p, size_t len);
35 
36  void handler_begin_parse();
37  void handler_end_parse();
38  void handler_begin_document();
39  void handler_end_document();
40  void handler_begin_sequence();
41  void handler_end_sequence();
42  void handler_begin_map();
43  void handler_end_map();
44  void handler_begin_map_key();
45  void handler_end_map_key();
46  void handler_string(const char* p, size_t n);
47  void handler_number(double val);
48  void handler_boolean_true();
49  void handler_boolean_false();
50  void handler_null();
51 
52 private:
53  handler_type& m_handler;
54 };
55 
56 template<typename _Handler>
58 {
59  push_parse_token(yaml::detail::parse_token_t::begin_parse);
60  m_handler.begin_parse();
61 }
62 
63 template<typename _Handler>
64 void yaml_parser<_Handler>::handler_end_parse()
65 {
66  push_parse_token(yaml::detail::parse_token_t::end_parse);
67  m_handler.end_parse();
68 }
69 
70 template<typename _Handler>
71 void yaml_parser<_Handler>::handler_begin_document()
72 {
73  push_parse_token(yaml::detail::parse_token_t::begin_document);
74  m_handler.begin_document();
75 }
76 
77 template<typename _Handler>
78 void yaml_parser<_Handler>::handler_end_document()
79 {
80  push_parse_token(yaml::detail::parse_token_t::end_document);
81  m_handler.end_document();
82 }
83 
84 template<typename _Handler>
85 void yaml_parser<_Handler>::handler_begin_sequence()
86 {
87  push_parse_token(yaml::detail::parse_token_t::begin_sequence);
88  m_handler.begin_sequence();
89 }
90 
91 template<typename _Handler>
92 void yaml_parser<_Handler>::handler_end_sequence()
93 {
94  push_parse_token(yaml::detail::parse_token_t::end_sequence);
95  m_handler.end_sequence();
96 }
97 
98 template<typename _Handler>
99 void yaml_parser<_Handler>::handler_begin_map()
100 {
101  push_parse_token(yaml::detail::parse_token_t::begin_map);
102  m_handler.begin_map();
103 }
104 
105 template<typename _Handler>
106 void yaml_parser<_Handler>::handler_end_map()
107 {
108  push_parse_token(yaml::detail::parse_token_t::end_map);
109  m_handler.end_map();
110 }
111 
112 template<typename _Handler>
113 void yaml_parser<_Handler>::handler_begin_map_key()
114 {
115  push_parse_token(yaml::detail::parse_token_t::begin_map_key);
116  m_handler.begin_map_key();
117 }
118 
119 template<typename _Handler>
120 void yaml_parser<_Handler>::handler_end_map_key()
121 {
122  push_parse_token(yaml::detail::parse_token_t::end_map_key);
123  m_handler.end_map_key();
124 }
125 
126 template<typename _Handler>
127 void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
128 {
129  push_parse_token(yaml::detail::parse_token_t::string);
130  m_handler.string(p, n);
131 }
132 
133 template<typename _Handler>
134 void yaml_parser<_Handler>::handler_number(double val)
135 {
136  push_parse_token(yaml::detail::parse_token_t::number);
137  m_handler.number(val);
138 }
139 
140 template<typename _Handler>
141 void yaml_parser<_Handler>::handler_boolean_true()
142 {
143  push_parse_token(yaml::detail::parse_token_t::boolean_true);
144  m_handler.boolean_true();
145 }
146 
147 template<typename _Handler>
148 void yaml_parser<_Handler>::handler_boolean_false()
149 {
150  push_parse_token(yaml::detail::parse_token_t::boolean_false);
151  m_handler.boolean_false();
152 }
153 
154 template<typename _Handler>
155 void yaml_parser<_Handler>::handler_null()
156 {
157  push_parse_token(yaml::detail::parse_token_t::null);
158  m_handler.null();
159 }
160 
161 template<typename _Handler>
162 yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) :
163  yaml::parser_base(p, n), m_handler(hdl) {}
164 
165 template<typename _Handler>
166 void yaml_parser<_Handler>::parse()
167 {
168  handler_begin_parse();
169 
170  while (has_char())
171  {
172  reset_on_new_line();
173 
174  size_t indent = parse_indent();
175  if (indent == parse_indent_end_of_stream)
176  break;
177 
178  if (indent == parse_indent_blank_line)
179  continue;
180 
181  size_t cur_scope = get_scope();
182 
183  if (cur_scope <= indent)
184  {
185  if (in_literal_block())
186  {
187  handle_line_in_literal(indent);
188  continue;
189  }
190 
191  if (has_line_buffer())
192  {
193  // This line is part of multi-line string. Push the line to the
194  // buffer as-is.
195  handle_line_in_multi_line_string();
196  continue;
197  }
198  }
199 
200  if (cur_scope == scope_empty)
201  {
202  if (indent > 0)
203  throw yaml::parse_error(
204  "first node of the document should not be indented.", offset());
205 
206  push_scope(indent);
207  }
208  else if (indent > cur_scope)
209  {
210  push_scope(indent);
211  }
212  else if (indent < cur_scope)
213  {
214  // Current indent is less than the current scope level.
215  do
216  {
217  cur_scope = end_scope();
218  if (cur_scope < indent)
219  throw yaml::parse_error("parse: invalid indent level.", offset());
220  }
221  while (indent < cur_scope);
222  }
223 
224  // Parse the rest of the line.
225  pstring line = parse_to_end_of_line();
226  line = line.trim();
227 
228  assert(!line.empty());
229  parse_line(line.get(), line.size());
230  }
231 
232  // End all remaining scopes.
233  size_t cur_scope = get_scope();
234  while (cur_scope != scope_empty)
235  cur_scope = end_scope();
236 
237  if (get_doc_hash())
238  handler_end_document();
239 
240  handler_end_parse();
241 }
242 
243 template<typename _Handler>
244 size_t yaml_parser<_Handler>::end_scope()
245 {
246  switch (get_scope_type())
247  {
248  case yaml::detail::scope_t::map:
249  {
250  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
251  handler_null();
252 
253  handler_end_map();
254  break;
255  }
256  case yaml::detail::scope_t::sequence:
257  {
258  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
259  handler_null();
260 
261  handler_end_sequence();
262  break;
263  }
264  case yaml::detail::scope_t::multi_line_string:
265  {
266  pstring merged = merge_line_buffer();
267  handler_string(merged.get(), merged.size());
268  break;
269  }
270  default:
271  {
272  if (has_line_buffer())
273  {
274  assert(get_line_buffer_count() == 1);
275  pstring line = pop_line_front();
276  parse_value(line.get(), line.size());
277  }
278  }
279  }
280  return pop_scope();
281 }
282 
283 template<typename _Handler>
284 void yaml_parser<_Handler>::check_or_begin_document()
285 {
286  if (!get_doc_hash())
287  {
288  set_doc_hash(mp_char);
289  handler_begin_document();
290  }
291 }
292 
293 template<typename _Handler>
294 void yaml_parser<_Handler>::check_or_begin_map()
295 {
296  switch (get_scope_type())
297  {
298  case yaml::detail::scope_t::unset:
299  {
300  check_or_begin_document();
301  set_scope_type(yaml::detail::scope_t::map);
302  handler_begin_map();
303  break;
304  }
305  case yaml::detail::scope_t::map:
306  {
307  if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
308  handler_null();
309  break;
310  }
311  default:
312  ;
313  }
314 }
315 
316 template<typename _Handler>
317 void yaml_parser<_Handler>::check_or_begin_sequence()
318 {
319  switch (get_scope_type())
320  {
321  case yaml::detail::scope_t::unset:
322  {
323  check_or_begin_document();
324  set_scope_type(yaml::detail::scope_t::sequence);
325  handler_begin_sequence();
326  break;
327  }
328  case yaml::detail::scope_t::sequence:
329  {
330  if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
331  handler_null();
332  break;
333  }
334  default:
335  ;
336  }
337 
338  push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
339 }
340 
341 template<typename _Handler>
342 void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
343 {
344  check_or_begin_document();
345 
346  const char* p0 = p;
347  const char* p_end = p + len;
348  double val = parse_numeric(p, len);
349  if (p == p_end)
350  {
351  handler_number(val);
352  return;
353  }
354 
355  yaml::detail::keyword_t kw = parse_keyword(p0, len);
356 
357  if (kw != yaml::detail::keyword_t::unknown)
358  {
359  switch (kw)
360  {
361  case yaml::detail::keyword_t::null:
362  handler_null();
363  break;
364  case yaml::detail::keyword_t::boolean_true:
365  handler_boolean_true();
366  break;
367  case yaml::detail::keyword_t::boolean_false:
368  handler_boolean_false();
369  break;
370  default:
371  ;
372  }
373 
374  return;
375  }
376 
377  // Failed to parse it as a number or a keyword. It must be a string.
378  handler_string(p0, len);
379 }
380 
381 template<typename _Handler>
382 void yaml_parser<_Handler>::push_value(const char* p, size_t len)
383 {
384  check_or_begin_document();
385 
386  if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
387  set_scope_type(yaml::detail::scope_t::multi_line_string);
388 
389  push_line_back(p, len);
390 }
391 
392 template<typename _Handler>
393 void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
394 {
395  const char* p_end = p + len;
396  const char* p0 = p; // Save the original head position.
397 
398  if (*p == '-')
399  {
400  ++p;
401  if (p == p_end)
402  {
403  // List item start.
404  check_or_begin_sequence();
405  return;
406  }
407 
408  switch (*p)
409  {
410  case '-':
411  {
412  // start of a document
413  ++p;
414  if (p == p_end)
415  throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
416 
417  if (*p != '-')
418  yaml::parse_error::throw_with(
419  "parse_line: '-' expected but '", *p, "' found.",
420  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
421 
422  ++p; // Skip the '-'.
423  set_doc_hash(p);
424  handler_begin_document();
425  clear_scopes();
426 
427  if (p != p_end)
428  {
429  skip_blanks(p, p_end-p);
430 
431  // Whatever comes after '---' is equivalent of first node.
432  assert(p != p_end);
433  push_scope(0);
434  parse_line(p, p_end-p);
435  }
436  return;
437  }
438  case ' ':
439  {
440  check_or_begin_sequence();
441 
442  // list item start with inline first item content.
443  ++p;
444  if (p == p_end)
445  throw yaml::parse_error(
446  "parse_line: list item expected, but the line ended prematurely.",
447  offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
448 
449  skip_blanks(p, p_end-p);
450 
451  size_t scope_width = get_scope() + (p-p0);
452  push_scope(scope_width);
453  parse_line(p, p_end-p);
454  return;
455  }
456  default:
457  // It is none of the above.
458  p = p0;
459  }
460 
461  }
462 
463  if (get_scope_type() == yaml::detail::scope_t::sequence)
464  yaml::parse_error::throw_with(
465  "'-' was expected for a sequence element, but '", *p, "' was found.",
466  offset_last_char_of_line()-len+1);
467 
468  // If the line doesn't start with a "- ", it must be a dictionary key.
469  parse_map_key(p, len);
470 }
471 
472 template<typename _Handler>
473 void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
474 {
475  const char* p_end = p + len;
476  const char* p0 = p; // Save the original head position.
477 
478  switch (*p)
479  {
480  case '"':
481  {
482  pstring quoted_str = parse_double_quoted_string_value(p, len);
483 
484  if (p == p_end)
485  {
486  handler_string(quoted_str.get(), quoted_str.size());
487  return;
488  }
489 
490  skip_blanks(p, p_end-p);
491 
492  if (*p != ':')
493  throw yaml::parse_error(
494  "parse_map_key: ':' is expected after the quoted string key.",
495  offset() - std::ptrdiff_t(p_end-p+1));
496 
497  check_or_begin_map();
498  handler_begin_map_key();
499  handler_string(quoted_str.get(), quoted_str.size());
500  handler_end_map_key();
501 
502  ++p; // skip the ':'.
503  if (p == p_end)
504  return;
505 
506  // Skip all white spaces.
507  skip_blanks(p, p_end-p);
508  }
509  break;
510  case '\'':
511  {
512  pstring quoted_str = parse_single_quoted_string_value(p, len);
513 
514  if (p == p_end)
515  {
516  handler_string(quoted_str.get(), quoted_str.size());
517  return;
518  }
519 
520  skip_blanks(p, p_end-p);
521 
522  if (*p != ':')
523  throw yaml::parse_error(
524  "parse_map_key: ':' is expected after the quoted string key.",
525  offset() - std::ptrdiff_t(p_end-p+1));
526 
527  check_or_begin_map();
528  handler_begin_map_key();
529  handler_string(quoted_str.get(), quoted_str.size());
530  handler_end_map_key();
531 
532  ++p; // skip the ':'.
533  if (p == p_end)
534  return;
535 
536  skip_blanks(p, p_end-p);
537  }
538  break;
539  default:
540  {
541  key_value kv = parse_key_value(p, p_end-p);
542 
543  if (kv.key.empty())
544  {
545  // No map key found.
546  if (*p == '|')
547  {
548  start_literal_block();
549  return;
550  }
551 
552  push_value(p, len);
553  return;
554  }
555 
556  check_or_begin_map();
557  handler_begin_map_key();
558  parse_value(kv.key.get(), kv.key.size());
559  handler_end_map_key();
560 
561  if (kv.value.empty())
562  return;
563 
564  p = kv.value.get();
565  }
566  }
567 
568  if (*p == '|')
569  {
570  start_literal_block();
571  return;
572  }
573 
574  // inline map item.
575  if (*p == '-')
576  throw yaml::parse_error(
577  "parse_map_key: sequence entry is not allowed as an inline map item.",
578  offset() - std::ptrdiff_t(p_end-p+1));
579 
580  size_t scope_width = get_scope() + (p-p0);
581  push_scope(scope_width);
582  parse_line(p, p_end-p);
583 }
584 
585 }
586 
587 #endif
588 
589 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
orcus::yaml_parser
Definition: yaml_parser.hpp:18
orcus::yaml::parser_base
Definition: yaml_parser_base.hpp:76