Open Chinese Convert  1.0.2
A project for conversion between Traditional and Simplified Chinese
 All Classes Functions Typedefs Modules
Segments.hpp
1 /*
2  * Open Chinese Convert
3  *
4  * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #pragma once
20 
21 #include "Common.hpp"
22 
23 namespace opencc {
28 class OPENCC_EXPORT Segments {
29 public:
30  Segments() {
31  }
32 
33  Segments(std::initializer_list<const char*> initList) {
34  for (const string& item : initList) {
35  AddSegment(item);
36  }
37  }
38 
39  Segments(std::initializer_list<string> initList) {
40  for (const string& item : initList) {
41  AddSegment(item);
42  }
43  }
44 
45  void AddSegment(const char* unmanagedString) {
46  indexes.push_back(std::make_pair(unmanaged.size(), false));
47  unmanaged.push_back(unmanagedString);
48  }
49 
50  void AddSegment(const string& str) {
51  indexes.push_back(std::make_pair(managed.size(), true));
52  managed.push_back(str);
53  }
54 
55  class iterator : public std::iterator<std::input_iterator_tag, const char*> {
56  public:
57  iterator(const Segments* const _segments,
58  size_t _cursor)
59  : segments(_segments), cursor(_cursor) {
60  }
61 
62  iterator& operator++() {
63  cursor++;
64  return *this;
65  }
66 
67  bool operator==(const iterator& that) const {
68  return cursor == that.cursor && segments == that.segments;
69  }
70 
71  bool operator!=(const iterator& that) const {
72  return !this->operator==(that);
73  }
74 
75  const char* operator*() const {
76  return segments->At(cursor);
77  }
78 
79  private:
80  const Segments* const segments;
81  size_t cursor;
82  };
83 
84  const char* At(size_t cursor) const {
85  const auto& index = indexes[cursor];
86  if (index.second) {
87  return managed[index.first].c_str();
88  } else {
89  return unmanaged[index.first];
90  }
91  }
92 
93  size_t Length() const {
94  return indexes.size();
95  }
96 
97  iterator begin() const {
98  return iterator(this, 0);
99  }
100 
101  iterator end() const {
102  return iterator(this, indexes.size());
103  }
104 
105  string ToString() const {
106  // TODO implement a nested structure to reduce concatenation,
107  // like a purely functional differential list
108  std::ostringstream buffer;
109  for (const char* segment : *this) {
110  buffer << segment;
111  }
112  return buffer.str();
113  }
114 
115 private:
116  Segments(const Segments&) {
117  }
118 
119  vector<const char*> unmanaged;
120  vector<string> managed;
121  // index, managed
122  vector<std::pair<size_t, bool>> indexes;
123 };
124 }
Segmented text.
Definition: Segments.hpp:28
Definition: BinaryDict.hpp:24
Definition: Segments.hpp:55