Go to the documentation of this file.
21 #ifndef TLX_SORT_STRINGS_STRING_SET_HEADER
22 #define TLX_SORT_STRINGS_STRING_SET_HEADER
40 namespace sort_strings_detail {
47 template <
typename StringSet,
typename Traits>
52 typename Traits::String&
at(
size_t i)
const {
53 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
54 return *(ss.begin() + i);
61 bool is_equal(
const typename Traits::String& a,
62 const typename Traits::CharIterator& ai,
63 const typename Traits::String& b,
64 const typename Traits::CharIterator& bi)
const {
65 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
66 return !ss.is_end(a, ai) && !ss.is_end(b, bi) && (*ai == *bi);
70 bool is_less(
const typename Traits::String& a,
71 const typename Traits::CharIterator& ai,
72 const typename Traits::String& b,
73 const typename Traits::CharIterator& bi)
const {
74 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
75 return ss.is_end(a, ai) ||
76 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai < *bi);
80 bool is_leq(
const typename Traits::String& a,
81 const typename Traits::CharIterator& ai,
82 const typename Traits::String& b,
83 const typename Traits::CharIterator& bi)
const {
84 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
85 return ss.is_end(a, ai) ||
86 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai <= *bi);
95 get_char(
const typename Traits::String& s,
size_t depth)
const {
96 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
97 return *ss.get_chars(s, depth);
103 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
104 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
106 if (ss.is_end(s, i))
return 0;
113 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
114 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
117 if (ss.is_end(s, i))
return v;
118 v = (uint16_t(*i) << 8);
120 if (ss.is_end(s, i))
return v;
121 v |= (uint16_t(*i) << 0);
128 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
129 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
132 if (ss.is_end(s, i))
return v;
133 v = (uint32_t(*i) << 24);
135 if (ss.is_end(s, i))
return v;
136 v |= (uint32_t(*i) << 16);
138 if (ss.is_end(s, i))
return v;
139 v |= (uint32_t(*i) << 8);
141 if (ss.is_end(s, i))
return v;
142 v |= (uint32_t(*i) << 0);
149 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
150 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
153 if (ss.is_end(s, i))
return v;
154 v = (uint64_t(*i) << 56);
156 if (ss.is_end(s, i))
return v;
157 v |= (uint64_t(*i) << 48);
159 if (ss.is_end(s, i))
return v;
160 v |= (uint64_t(*i) << 40);
162 if (ss.is_end(s, i))
return v;
163 v |= (uint64_t(*i) << 32);
165 if (ss.is_end(s, i))
return v;
166 v |= (uint64_t(*i) << 24);
168 if (ss.is_end(s, i))
return v;
169 v |= (uint64_t(*i) << 16);
171 if (ss.is_end(s, i))
return v;
172 v |= (uint64_t(*i) << 8);
174 if (ss.is_end(s, i))
return v;
175 v |= (uint64_t(*i) << 0);
179 uint8_t
get_uint8(
const typename Traits::String& s,
size_t depth)
const {
180 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
181 return get_uint8(s, ss.get_chars(s, depth));
184 uint16_t
get_uint16(
const typename Traits::String& s,
size_t depth)
const {
185 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
189 uint32_t
get_uint32(
const typename Traits::String& s,
size_t depth)
const {
190 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
194 uint64_t
get_uint64(
const typename Traits::String& s,
size_t depth)
const {
195 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
202 StringSet
subi(
size_t begin,
size_t end)
const {
203 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
204 return ss.sub(ss.begin() + begin, ss.begin() + end);
208 const typename Traits::String& s2)
const {
209 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
211 typename StringSet::CharIterator c1 = ss.get_chars(s1, 0);
212 typename StringSet::CharIterator c2 = ss.get_chars(s2, 0);
214 while (ss.is_equal(s1, c1, s2, c2))
217 return ss.is_leq(s1, c1, s2, c2);
221 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
223 for (
typename Traits::Iterator pi = ss.begin();
224 pi + 1 != ss.end(); ++pi)
227 TLX_LOG1 <<
"check_order() failed at position " << pi - ss.begin();
235 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
237 for (
typename Traits::Iterator pi = ss.begin(); pi != ss.end(); ++pi)
239 TLX_LOG1 <<
"[" << i++ <<
"] = " << *pi
240 <<
" = " << ss.get_string(*pi, 0);
245 template <
typename Type,
typename StringSet>
247 typename enable_if<
sizeof(Type) == 4, uint32_t>::type
248 get_key(
const StringSet& strset,
249 const typename StringSet::String& s,
size_t depth) {
250 return strset.get_uint32(s, depth);
253 template <
typename Type,
typename StringSet>
255 typename enable_if<
sizeof(Type) == 8, uint64_t>::type
256 get_key(
const StringSet& strset,
257 const typename StringSet::String& s,
size_t depth) {
258 return strset.get_uint64(s, depth);
261 template <
typename Type,
typename StringSet>
263 Type
get_key_at(
const StringSet& strset,
size_t idx,
size_t depth) {
264 return get_key<Type>(strset, strset.at(idx), depth);
273 template <
typename CharType>
278 typedef CharType
Char;
290 typedef std::pair<Iterator, size_t>
Container;
296 template <
typename CharType>
300 GenericCharStringSetTraits<CharType> >
334 {
return s + depth; }
338 {
return (*i == 0); }
342 {
return std::string(
reinterpret_cast<const char*
>(s) + depth); }
350 {
return std::make_pair(
new String[n], n); }
354 {
delete[] c.first; c.first =
nullptr; }
388 typedef std::pair<Iterator, size_t>
Container;
422 {
return reinterpret_cast<CharIterator>(s.data()) + depth; }
426 {
return (i >=
reinterpret_cast<CharIterator>(s.data()) + s.size()); }
430 {
return s.substr(depth); }
438 {
return std::make_pair(
new String[n], n); }
442 {
delete[] c.first; c.first =
nullptr; }
462 typedef std::unique_ptr<std::string>
String;
471 typedef std::pair<Iterator, size_t>
Container;
506 {
return reinterpret_cast<CharIterator>(s->data()) + depth; }
510 {
return (i >=
reinterpret_cast<CharIterator>(s->data()) + s->size()); }
514 {
return s->substr(depth); }
522 {
return std::make_pair(
new String[n], n); }
526 {
delete[] c.first; c.first =
nullptr; }
532 TLX_LOG1 <<
"[" << i++ <<
"] = " << pi->get()
552 typedef std::string
Text;
555 typedef uint8_t
Char;
558 typedef typename Text::size_type
String;
562 typedef typename std::vector<String>::iterator
Iterator;
568 typedef std::pair<Text, std::vector<String> >
Container;
590 sa.resize(text.size());
591 for (
size_t i = 0; i < text.size(); ++i)
617 {
return text_->substr(s + depth); }
625 {
return std::make_pair(*
text_, std::vector<String>(n)); }
629 { std::vector<String> v; v.swap(c.second); }
653 #endif // !TLX_SORT_STRINGS_STRING_SET_HEADER
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Type get_key_at(const StringSet &strset, size_t idx, size_t depth)
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Class implementing StringSet concept for a std::string objects.
enable_if< sizeof(Type)==4, uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
UPtrStdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
static void deallocate(Container &c)
Deallocate a temporary string container.
Container allocate(size_t n) const
Allocate a new temporary string container with n empty Strings.
String * Iterator
Iterator over string references: pointer to std::string.
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
Iterator begin() const
Iterator representing first String position.
const Text * text_
reference to base text
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
static void deallocate(Container &c)
Deallocate a temporary string container.
static void deallocate(Container &c)
Deallocate a temporary string container.
Traits::CharIterator CharIterator
bool is_leq(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::pair< Text, std::vector< String > > Container
exported alias for assumed string container
Iterator begin_
vector of std::string objects
const Char * CharIterator
iterator of characters in a string
std::string String
String reference: std::string, which should be reference counted.
GenericCharStringSet< unsigned char > UCharStringSet
size_t size() const
Return size of string array.
Iterator begin_
array of string pointers
Traits::Container Container
StringSet subi(size_t begin, size_t end) const
Subset this string set using index range.
StdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Traits class implementing StringSet concept for char* and unsigned char* strings.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Iterator end() const
Iterator representing beyond last String position.
String * Iterator
Iterator over string references: pointer over pointers.
const Char * CharIterator
iterator of characters in a string
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
uint8_t Char
exported alias for character type
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
GenericCharStringSet< const unsigned char > CUCharStringSet
Iterator begin() const
Iterator representing first String position.
uint32_t get_uint32(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 4 characters of string s at iterator i packed into a uint32_t (only works correctly for ...
Traits::Char get_char(const typename Traits::String &s, size_t depth) const
UPtrStdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Traits::Iterator Iterator
size_t size() const
Return size of string array.
Iterator begin_
iterators inside the output suffix array.
bool is_equal(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check equality of two strings a and b at char iterators ai and bi.
Traits::String & at(size_t i) const
index-based array access (readable and writable) to String objects.
Class implementing StringSet concept for a std::vector containing std::string objects.
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
String * Iterator
Iterator over string references: using std::vector's iterator.
Iterator begin() const
Iterator representing first String position.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
GenericCharStringSet< char > CharStringSet
Class implementing StringSet concept for char* and unsigned char* strings.
String & operator[](Iterator i) const
Iterator-based array access (readable and writable) to String objects.
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
Char * String
String reference: pointer to first character.
Base class for common string set functions, included via CRTP.
CharType Char
exported alias for character type
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
static StringSuffixSet Initialize(const Text &text, std::vector< String > &sa)
Initializing constructor which fills output vector sa with indices.
size_t size() const
Return size of string array.
GenericCharStringSet(Iterator begin, Iterator end)
Construct from begin and end string pointers.
std::string Text
exported alias for assumed string container
Iterator end() const
Iterator representing beyond last String position.
GenericCharStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
uint8_t Char
exported alias for character type
std::vector< String >::iterator Iterator
Iterator over string references: using std::vector's iterator over suffix array vector.
StringSuffixSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Iterator begin_
pointers to std::string objects
std::unique_ptr< std::string > String
String reference: std::string, which should be reference counted.
Class implementing StringSet concept for a std::unique_ptr<std::string objects, which are non-copyabl...
Text::size_type String
String reference: suffix index of the text.
StringSuffixSet(const Text &text, const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
size_t size() const
Return size of string array.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
static void deallocate(Container &c)
Deallocate a temporary string container.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belong to this set.
const Char * CharIterator
iterator of characters in a string
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
StdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
const Char * CharIterator
iterator of characters in a string
GenericCharStringSetTraits< CharType > Traits
Class implementing StringSet concept for arrays of std::string objects.
uint8_t Char
exported alias for character type
Iterator end() const
Iterator representing beyond last String position.
Iterator end() const
Iterator representing beyond last String position.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
uint16_t get_uint16(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 2 characters of string s at iterator i packed into a uint16_t (only works correctly for ...
bool is_less(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
uint64_t get_uint64(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 8 characters of string s at iterator i packed into a uint64_t (only works correctly for ...
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Iterator begin() const
Iterator representing first String position.
GenericCharStringSet< const char > CCharStringSet
uint8_t get_uint8(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 1 characters of string s at iterator i packed into a uint8_t (only works correctly for 8...
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.