MyGUI 3.0.1

MyGUI_UString.h

Go to the documentation of this file.
00001 // Modified from OpenGUI under lenient license
00002 // Original copyright details and licensing below:
00003 // OpenGUI (http://opengui.sourceforge.net)
00004 // This source code is released under the BSD License
00005 
00006 // Permission is given to the Ogre project to use the contents of file within its
00007 // source and binary applications, as well as any derivative works, in accordance
00008 // with the terms of any license under which Ogre is or will be distributed.
00009 //
00010 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates
00011 // to this file, under any terms that it deems fit, and is not required to maintain
00012 // the original BSD licensing terms of this file, however OpenGUI retains the right
00013 // to present its copy of this file under the terms of any license under which
00014 // OpenGUI is distributed.
00015 //
00016 // Ogre is not required to release to OpenGUI any future changes that it makes to
00017 // this file, and understands and agrees that any such changes that are released
00018 // back to OpenGUI will become available under the terms of any license under which
00019 // OpenGUI is distributed.
00020 //
00021 // For brevity, this permission text may be removed from this file if desired.
00022 // The original record kept within the SourceForge (http://sourceforge.net/) tracker
00023 // is sufficient.
00024 //
00025 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007]
00026 
00027 #ifndef __MYGUI_U_STRING_H__
00028 #define __MYGUI_U_STRING_H__
00029 
00030 #include "MyGUI_Prerequest.h"
00031 #include "MyGUI_Types.h"
00032 #include "MyGUI_Diagnostic.h"
00033 #include "MyGUI_LogManager.h"
00034 
00035 // these are explained later
00036 #include <iterator>
00037 #include <string>
00038 #include <stdexcept>
00039 
00040 // this pragma used to avoid warnings from some advanced gcc warnings flags
00041 #if MYGUI_COMPILER == MYGUI_COMPILER_GNUC
00042 #pragma GCC system_header
00043 #endif
00044 
00045 // Workaround for VC7:
00046 //      when build with /MD or /MDd, VC7 have both std::basic_string<unsigned short> and
00047 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header
00048 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile
00049 // option). And since this file used both of them, causing compiler instantiating another
00050 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll.
00051 //
00052 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310)
00053 
00054 # if defined(_DLL_CPPLIB)
00055 
00056 namespace std
00057 {
00058     template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
00059         allocator<unsigned short> >;
00060 
00061     template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
00062         allocator<__wchar_t> >;
00063 }
00064 
00065 # endif // defined(_DLL_CPPLIB)
00066 
00067 #endif  // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300
00068 
00069 
00070 namespace MyGUI
00071 {
00072 
00073     /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS
00074     =NOTICE=
00075     This class is not a complete Unicode solution. It purposefully does not
00076     provide certain functionality, such as proper lexical sorting for
00077     Unicode values. It does provide comparison operators for the sole purpose
00078     of using UString as an index with std::map and other operator< sorted
00079     containers, but it should NOT be relied upon for meaningful lexical
00080     operations, such as alphabetical sorts. If you need this type of
00081     functionality, look into using ICU instead (http://icu.sourceforge.net/).
00082 
00083     =REQUIREMENTS=
00084     There are a few requirements for proper operation. They are fairly small,
00085     and shouldn't restrict usage on any reasonable target.
00086     * Compiler must support unsigned 16-bit integer types
00087     * Compiler must support signed 32-bit integer types
00088     * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such
00089         using the WCHAR_UTF16 macro as outlined below.
00090     * You must include <iterator>, <string>, and <wchar>. Probably more, but
00091         these are the most obvious.
00092 
00093     =REQUIRED PREPROCESSOR MACROS=
00094     This class requires two preprocessor macros to be defined in order to
00095     work as advertised.
00096     INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int)
00097     UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short)
00098 
00099     Additionally, a third macro should be defined to control the evaluation of wchar_t:
00100     WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points,
00101         such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit
00102         integer representing UTF-32 code points.
00103     */
00104 
00105     // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS
00106 #ifdef __STDC_ISO_10646__
00107 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger)
00108 // so we can safely skip the rest of the testing
00109 #else // #ifdef __STDC_ISO_10646__
00110 #if defined( __WIN32__ ) || defined( _WIN32 )
00111 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
00112 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
00113 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
00114 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
00115 #endif // #if WCHAR_MAX <= 0xFFFF
00116 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
00117 #endif // #ifdef __STDC_ISO_10646__
00118 
00119 
00120 // MYGUI_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of
00121 // uint16 or uint32.
00122 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00123 
00124 // Don't define wchar_t related functions since it'll duplicate
00125 // with UString::code_point related functions when compile
00126 // without /Zc:wchar_t, because in this case both of them are
00127 // a typedef of uint16.
00128 # if defined(_NATIVE_WCHAR_T_DEFINED)
00129 #   define MYGUI_IS_NATIVE_WCHAR_T      1
00130 # else
00131 #   define MYGUI_IS_NATIVE_WCHAR_T      0
00132 # endif
00133 
00134 #else   // MYGUI_COMPILER != MYGUI_COMPILER_MSVC
00135 
00136 // Assumed wchar_t is natively for other compilers
00137 #   define MYGUI_IS_NATIVE_WCHAR_T     1
00138 
00139 #endif  // MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00140 
00142 
00167     class UString
00168     {
00169         // constants used in UTF-8 conversions
00170         static const unsigned char _lead1 = 0xC0;      //110xxxxx
00171         static const unsigned char _lead1_mask = 0x1F; //00011111
00172         static const unsigned char _lead2 = 0xE0;      //1110xxxx
00173         static const unsigned char _lead2_mask = 0x0F; //00001111
00174         static const unsigned char _lead3 = 0xF0;      //11110xxx
00175         static const unsigned char _lead3_mask = 0x07; //00000111
00176         static const unsigned char _lead4 = 0xF8;      //111110xx
00177         static const unsigned char _lead4_mask = 0x03; //00000011
00178         static const unsigned char _lead5 = 0xFC;      //1111110x
00179         static const unsigned char _lead5_mask = 0x01; //00000001
00180         static const unsigned char _cont = 0x80;       //10xxxxxx
00181         static const unsigned char _cont_mask = 0x3F;  //00111111
00182 
00183     public:
00185         typedef size_t size_type;
00187         static const size_type npos = ~(size_t)0;
00188 
00190         typedef uint32 unicode_char;
00191 
00193         typedef uint16 code_point;
00194 
00196         typedef code_point value_type;
00197 
00198         typedef std::basic_string<code_point> dstring; // data string
00199 
00201         typedef std::basic_string<unicode_char> utf32string;
00202 
00204         class invalid_data: public std::runtime_error
00205         { /* i don't know why the beautifier is freaking out on this line */
00206         public:
00208             explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message )
00209             {
00210                 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */
00211             }
00212         };
00213 
00214         //#########################################################################
00216         class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type>
00217         { /* i don't know why the beautifier is freaking out on this line */
00218             friend class UString;
00219         protected:
00220             _base_iterator()
00221             {
00222                 mString = 0;
00223             }
00224 
00225             void _seekFwd( size_type c )
00226             {
00227                 mIter += c;
00228             }
00229             void _seekRev( size_type c )
00230             {
00231                 mIter -= c;
00232             }
00233             void _become( const _base_iterator& i )
00234             {
00235                 mIter = i.mIter;
00236                 mString = i.mString;
00237             }
00238             bool _test_begin() const
00239             {
00240                 return mIter == mString->mData.begin();
00241             }
00242             bool _test_end() const
00243             {
00244                 return mIter == mString->mData.end();
00245             }
00246             size_type _get_index() const
00247             {
00248                 return mIter - mString->mData.begin();
00249             }
00250             void _jump_to( size_type index )
00251             {
00252                 mIter = mString->mData.begin() + index;
00253             }
00254 
00255             unicode_char _getCharacter() const
00256             {
00257                 size_type current_index = _get_index();
00258                 return mString->getChar( current_index );
00259             }
00260             int _setCharacter( unicode_char uc )
00261             {
00262                 size_type current_index = _get_index();
00263                 int change = mString->setChar( current_index, uc );
00264                 _jump_to( current_index );
00265                 return change;
00266             }
00267 
00268             void _moveNext()
00269             {
00270                 _seekFwd( 1 ); // move 1 code point forward
00271                 if ( _test_end() ) return; // exit if we hit the end
00272                 if ( _utf16_surrogate_follow( mIter[0] ) )
00273                 {
00274                     // landing on a follow code point means we might be part of a bigger character
00275                     // so we test for that
00276                     code_point lead_half = 0;
00277                     //NB: we can't possibly be at the beginning here, so no need to test
00278                     lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
00279                     if ( _utf16_surrogate_lead( lead_half ) )
00280                     {
00281                         _seekFwd( 1 ); // if so, then advance 1 more code point
00282                     }
00283                 }
00284             }
00285             void _movePrev()
00286             {
00287                 _seekRev( 1 ); // move 1 code point backwards
00288                 if ( _test_begin() ) return; // exit if we hit the beginning
00289                 if ( _utf16_surrogate_follow( mIter[0] ) )
00290                 {
00291                     // landing on a follow code point means we might be part of a bigger character
00292                     // so we test for that
00293                     code_point lead_half = 0;
00294                     lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
00295                     if ( _utf16_surrogate_lead( lead_half ) )
00296                     {
00297                         _seekRev( 1 ); // if so, then rewind 1 more code point
00298                     }
00299                 }
00300             }
00301 
00302             dstring::iterator mIter;
00303             UString* mString;
00304         };
00305 
00306         //#########################################################################
00307         // FORWARD ITERATORS
00308         //#########################################################################
00309         class _const_fwd_iterator; // forward declaration
00310 
00312         class _fwd_iterator: public _base_iterator
00313         { /* i don't know why the beautifier is freaking out on this line */
00314             friend class _const_fwd_iterator;
00315         public:
00316             _fwd_iterator() { }
00317             _fwd_iterator( const _fwd_iterator& i )
00318             {
00319                 _become( i );
00320             }
00321 
00323             _fwd_iterator& operator++()
00324             {
00325                 _seekFwd( 1 );
00326                 return *this;
00327             }
00329             _fwd_iterator operator++( int )
00330             {
00331                 _fwd_iterator tmp( *this );
00332                 _seekFwd( 1 );
00333                 return tmp;
00334             }
00335 
00337             _fwd_iterator& operator--()
00338             {
00339                 _seekRev( 1 );
00340                 return *this;
00341             }
00343             _fwd_iterator operator--( int )
00344             {
00345                 _fwd_iterator tmp( *this );
00346                 _seekRev( 1 );
00347                 return tmp;
00348             }
00349 
00351             _fwd_iterator operator+( size_type n )
00352             {
00353                 _fwd_iterator tmp( *this );
00354                 tmp._seekFwd( n );
00355                 return tmp;
00356             }
00358             _fwd_iterator operator+( difference_type n )
00359             {
00360                 _fwd_iterator tmp( *this );
00361                 if ( n < 0 )
00362                     tmp._seekRev( -n );
00363                 else
00364                     tmp._seekFwd( n );
00365                 return tmp;
00366             }
00368             _fwd_iterator operator-( size_type n )
00369             {
00370                 _fwd_iterator tmp( *this );
00371                 tmp._seekRev( n );
00372                 return tmp;
00373             }
00375             _fwd_iterator operator-( difference_type n )
00376             {
00377                 _fwd_iterator tmp( *this );
00378                 if ( n < 0 )
00379                     tmp._seekFwd( -n );
00380                 else
00381                     tmp._seekRev( n );
00382                 return tmp;
00383             }
00384 
00386             _fwd_iterator& operator+=( size_type n )
00387             {
00388                 _seekFwd( n );
00389                 return *this;
00390             }
00392             _fwd_iterator& operator+=( difference_type n )
00393             {
00394                 if ( n < 0 )
00395                     _seekRev( -n );
00396                 else
00397                     _seekFwd( n );
00398                 return *this;
00399             }
00401             _fwd_iterator& operator-=( size_type n )
00402             {
00403                 _seekRev( n );
00404                 return *this;
00405             }
00407             _fwd_iterator& operator-=( difference_type n )
00408             {
00409                 if ( n < 0 )
00410                     _seekFwd( -n );
00411                 else
00412                     _seekRev( n );
00413                 return *this;
00414             }
00415 
00417             value_type& operator*() const
00418             {
00419                 return *mIter;
00420             }
00421 
00423             value_type& operator[]( size_type n ) const
00424             {
00425                 _fwd_iterator tmp( *this );
00426                 tmp += n;
00427                 return *tmp;
00428             }
00430             value_type& operator[]( difference_type n ) const
00431             {
00432                 _fwd_iterator tmp( *this );
00433                 tmp += n;
00434                 return *tmp;
00435             }
00436 
00438             _fwd_iterator& moveNext()
00439             {
00440                 _moveNext();
00441                 return *this;
00442             }
00444             _fwd_iterator& movePrev()
00445             {
00446                 _movePrev();
00447                 return *this;
00448             }
00450             unicode_char getCharacter() const
00451             {
00452                 return _getCharacter();
00453             }
00455             int setCharacter( unicode_char uc )
00456             {
00457                 return _setCharacter( uc );
00458             }
00459         };
00460 
00461 
00462         //#########################################################################
00464         class _const_fwd_iterator: public _base_iterator
00465         { /* i don't know why the beautifier is freaking out on this line */
00466         public:
00467             _const_fwd_iterator() { }
00468             _const_fwd_iterator( const _const_fwd_iterator& i )
00469             {
00470                 _become( i );
00471             }
00472             _const_fwd_iterator( const _fwd_iterator& i )
00473             {
00474                 _become( i );
00475             }
00476 
00478             _const_fwd_iterator& operator++()
00479             {
00480                 _seekFwd( 1 );
00481                 return *this;
00482             }
00484             _const_fwd_iterator operator++( int )
00485             {
00486                 _const_fwd_iterator tmp( *this );
00487                 _seekFwd( 1 );
00488                 return tmp;
00489             }
00490 
00492             _const_fwd_iterator& operator--()
00493             {
00494                 _seekRev( 1 );
00495                 return *this;
00496             }
00498             _const_fwd_iterator operator--( int )
00499             {
00500                 _const_fwd_iterator tmp( *this );
00501                 _seekRev( 1 );
00502                 return tmp;
00503             }
00504 
00506             _const_fwd_iterator operator+( size_type n )
00507             {
00508                 _const_fwd_iterator tmp( *this );
00509                 tmp._seekFwd( n );
00510                 return tmp;
00511             }
00513             _const_fwd_iterator operator+( difference_type n )
00514             {
00515                 _const_fwd_iterator tmp( *this );
00516                 if ( n < 0 )
00517                     tmp._seekRev( -n );
00518                 else
00519                     tmp._seekFwd( n );
00520                 return tmp;
00521             }
00523             _const_fwd_iterator operator-( size_type n )
00524             {
00525                 _const_fwd_iterator tmp( *this );
00526                 tmp._seekRev( n );
00527                 return tmp;
00528             }
00530             _const_fwd_iterator operator-( difference_type n )
00531             {
00532                 _const_fwd_iterator tmp( *this );
00533                 if ( n < 0 )
00534                     tmp._seekFwd( -n );
00535                 else
00536                     tmp._seekRev( n );
00537                 return tmp;
00538             }
00539 
00541             _const_fwd_iterator& operator+=( size_type n )
00542             {
00543                 _seekFwd( n );
00544                 return *this;
00545             }
00547             _const_fwd_iterator& operator+=( difference_type n )
00548             {
00549                 if ( n < 0 )
00550                     _seekRev( -n );
00551                 else
00552                     _seekFwd( n );
00553                 return *this;
00554             }
00556             _const_fwd_iterator& operator-=( size_type n )
00557             {
00558                 _seekRev( n );
00559                 return *this;
00560             }
00562             _const_fwd_iterator& operator-=( difference_type n )
00563             {
00564                 if ( n < 0 )
00565                     _seekFwd( -n );
00566                 else
00567                     _seekRev( n );
00568                 return *this;
00569             }
00570 
00572             const value_type& operator*() const
00573             {
00574                 return *mIter;
00575             }
00576 
00578             const value_type& operator[]( size_type n ) const
00579             {
00580                 _const_fwd_iterator tmp( *this );
00581                 tmp += n;
00582                 return *tmp;
00583             }
00585             const value_type& operator[]( difference_type n ) const
00586             {
00587                 _const_fwd_iterator tmp( *this );
00588                 tmp += n;
00589                 return *tmp;
00590             }
00591 
00593             _const_fwd_iterator& moveNext()
00594             {
00595                 _moveNext();
00596                 return *this;
00597             }
00599             _const_fwd_iterator& movePrev()
00600             {
00601                 _movePrev();
00602                 return *this;
00603             }
00605             unicode_char getCharacter() const
00606             {
00607                 return _getCharacter();
00608             }
00609 
00611             friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00613             friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00615             friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00617             friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00619             friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00621             friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00623             friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00624 
00625         };
00626 
00627         //#########################################################################
00628         // REVERSE ITERATORS
00629         //#########################################################################
00630         class _const_rev_iterator; // forward declaration
00632         class _rev_iterator: public _base_iterator
00633         { /* i don't know why the beautifier is freaking out on this line */
00634             friend class _const_rev_iterator;
00635         public:
00636             _rev_iterator() { }
00637             _rev_iterator( const _rev_iterator& i )
00638             {
00639                 _become( i );
00640             }
00641 
00643             _rev_iterator& operator++()
00644             {
00645                 _seekRev( 1 );
00646                 return *this;
00647             }
00649             _rev_iterator operator++( int )
00650             {
00651                 _rev_iterator tmp( *this );
00652                 _seekRev( 1 );
00653                 return tmp;
00654             }
00655 
00657             _rev_iterator& operator--()
00658             {
00659                 _seekFwd( 1 );
00660                 return *this;
00661             }
00663             _rev_iterator operator--( int )
00664             {
00665                 _rev_iterator tmp( *this );
00666                 _seekFwd( 1 );
00667                 return tmp;
00668             }
00669 
00671             _rev_iterator operator+( size_type n )
00672             {
00673                 _rev_iterator tmp( *this );
00674                 tmp._seekRev( n );
00675                 return tmp;
00676             }
00678             _rev_iterator operator+( difference_type n )
00679             {
00680                 _rev_iterator tmp( *this );
00681                 if ( n < 0 )
00682                     tmp._seekFwd( -n );
00683                 else
00684                     tmp._seekRev( n );
00685                 return tmp;
00686             }
00688             _rev_iterator operator-( size_type n )
00689             {
00690                 _rev_iterator tmp( *this );
00691                 tmp._seekFwd( n );
00692                 return tmp;
00693             }
00695             _rev_iterator operator-( difference_type n )
00696             {
00697                 _rev_iterator tmp( *this );
00698                 if ( n < 0 )
00699                     tmp._seekRev( -n );
00700                 else
00701                     tmp._seekFwd( n );
00702                 return tmp;
00703             }
00704 
00706             _rev_iterator& operator+=( size_type n )
00707             {
00708                 _seekRev( n );
00709                 return *this;
00710             }
00712             _rev_iterator& operator+=( difference_type n )
00713             {
00714                 if ( n < 0 )
00715                     _seekFwd( -n );
00716                 else
00717                     _seekRev( n );
00718                 return *this;
00719             }
00721             _rev_iterator& operator-=( size_type n )
00722             {
00723                 _seekFwd( n );
00724                 return *this;
00725             }
00727             _rev_iterator& operator-=( difference_type n )
00728             {
00729                 if ( n < 0 )
00730                     _seekRev( -n );
00731                 else
00732                     _seekFwd( n );
00733                 return *this;
00734             }
00735 
00737             value_type& operator*() const
00738             {
00739                 return mIter[-1];
00740             }
00741 
00743             value_type& operator[]( size_type n ) const
00744             {
00745                 _rev_iterator tmp( *this );
00746                 tmp -= n;
00747                 return *tmp;
00748             }
00750             value_type& operator[]( difference_type n ) const
00751             {
00752                 _rev_iterator tmp( *this );
00753                 tmp -= n;
00754                 return *tmp;
00755             }
00756         };
00757         //#########################################################################
00759         class _const_rev_iterator: public _base_iterator
00760         { /* i don't know why the beautifier is freaking out on this line */
00761         public:
00762             _const_rev_iterator() { }
00763             _const_rev_iterator( const _const_rev_iterator& i )
00764             {
00765                 _become( i );
00766             }
00767             _const_rev_iterator( const _rev_iterator& i )
00768             {
00769                 _become( i );
00770             }
00772             _const_rev_iterator& operator++()
00773             {
00774                 _seekRev( 1 );
00775                 return *this;
00776             }
00778             _const_rev_iterator operator++( int )
00779             {
00780                 _const_rev_iterator tmp( *this );
00781                 _seekRev( 1 );
00782                 return tmp;
00783             }
00784 
00786             _const_rev_iterator& operator--()
00787             {
00788                 _seekFwd( 1 );
00789                 return *this;
00790             }
00792             _const_rev_iterator operator--( int )
00793             {
00794                 _const_rev_iterator tmp( *this );
00795                 _seekFwd( 1 );
00796                 return tmp;
00797             }
00798 
00800             _const_rev_iterator operator+( size_type n )
00801             {
00802                 _const_rev_iterator tmp( *this );
00803                 tmp._seekRev( n );
00804                 return tmp;
00805             }
00807             _const_rev_iterator operator+( difference_type n )
00808             {
00809                 _const_rev_iterator tmp( *this );
00810                 if ( n < 0 )
00811                     tmp._seekFwd( -n );
00812                 else
00813                     tmp._seekRev( n );
00814                 return tmp;
00815             }
00817             _const_rev_iterator operator-( size_type n )
00818             {
00819                 _const_rev_iterator tmp( *this );
00820                 tmp._seekFwd( n );
00821                 return tmp;
00822             }
00824             _const_rev_iterator operator-( difference_type n )
00825             {
00826                 _const_rev_iterator tmp( *this );
00827                 if ( n < 0 )
00828                     tmp._seekRev( -n );
00829                 else
00830                     tmp._seekFwd( n );
00831                 return tmp;
00832             }
00833 
00835             _const_rev_iterator& operator+=( size_type n )
00836             {
00837                 _seekRev( n );
00838                 return *this;
00839             }
00841             _const_rev_iterator& operator+=( difference_type n )
00842             {
00843                 if ( n < 0 )
00844                     _seekFwd( -n );
00845                 else
00846                     _seekRev( n );
00847                 return *this;
00848             }
00850             _const_rev_iterator& operator-=( size_type n )
00851             {
00852                 _seekFwd( n );
00853                 return *this;
00854             }
00856             _const_rev_iterator& operator-=( difference_type n )
00857             {
00858                 if ( n < 0 )
00859                     _seekRev( -n );
00860                 else
00861                     _seekFwd( n );
00862                 return *this;
00863             }
00864 
00866             const value_type& operator*() const
00867             {
00868                 return mIter[-1];
00869             }
00870 
00872             const value_type& operator[]( size_type n ) const
00873             {
00874                 _const_rev_iterator tmp( *this );
00875                 tmp -= n;
00876                 return *tmp;
00877             }
00879             const value_type& operator[]( difference_type n ) const
00880             {
00881                 _const_rev_iterator tmp( *this );
00882                 tmp -= n;
00883                 return *tmp;
00884             }
00885 
00887             friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
00889             friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
00891             friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00893             friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
00895             friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00897             friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
00899             friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00900         };
00901         //#########################################################################
00902 
00903         typedef _fwd_iterator iterator;                     
00904         typedef _rev_iterator reverse_iterator;             
00905         typedef _const_fwd_iterator const_iterator;         
00906         typedef _const_rev_iterator const_reverse_iterator; 
00907 
00908 
00910 
00911 
00912         UString()
00913         {
00914             _init();
00915         }
00917         UString( const UString& copy )
00918         {
00919             _init();
00920             mData = copy.mData;
00921         }
00923         UString( size_type length, const code_point& ch )
00924         {
00925             _init();
00926             assign( length, ch );
00927         }
00929         UString( const code_point* str )
00930         {
00931             _init();
00932             assign( str );
00933         }
00935         UString( const code_point* str, size_type length )
00936         {
00937             _init();
00938             assign( str, length );
00939         }
00941         UString( const UString& str, size_type index, size_type length )
00942         {
00943             _init();
00944             assign( str, index, length );
00945         }
00946 #if MYGUI_IS_NATIVE_WCHAR_T
00947 
00948         UString( const wchar_t* w_str )
00949         {
00950             _init();
00951             assign( w_str );
00952         }
00954         UString( const wchar_t* w_str, size_type length )
00955         {
00956             _init();
00957             assign( w_str, length );
00958         }
00959 #endif
00960 
00961         UString( const std::wstring& wstr )
00962         {
00963             _init();
00964             assign( wstr );
00965         }
00967         UString( const char* c_str )
00968         {
00969             _init();
00970             assign( c_str );
00971         }
00973         UString( const char* c_str, size_type length )
00974         {
00975             _init();
00976             assign( c_str, length );
00977         }
00979         UString( const std::string& str )
00980         {
00981             _init();
00982             assign( str );
00983         }
00985         ~UString()
00986         {
00987             _cleanBuffer();
00988         }
00990 
00992 
00994 
00995 
00996         size_type size() const
00997         {
00998             return mData.size();
00999         }
01001         size_type length() const
01002         {
01003             return size();
01004         }
01006 
01007         size_type length_Characters() const
01008         {
01009             const_iterator i = begin(), ie = end();
01010             size_type c = 0;
01011             while ( i != ie )
01012             {
01013                 i.moveNext();
01014                 ++c;
01015             }
01016             return c;
01017         }
01019         size_type max_size() const
01020         {
01021             return mData.max_size();
01022         }
01024         void reserve( size_type size )
01025         {
01026             mData.reserve( size );
01027         }
01029         void resize( size_type num, const code_point& val = 0 )
01030         {
01031             mData.resize( num, val );
01032         }
01034         void swap( UString& from )
01035         {
01036             mData.swap( from.mData );
01037         }
01039         bool empty() const
01040         {
01041             return mData.empty();
01042         }
01044         const code_point* c_str() const
01045         {
01046             return mData.c_str();
01047         }
01049         const code_point* data() const
01050         {
01051             return c_str();
01052         }
01054         size_type capacity() const
01055         {
01056             return mData.capacity();
01057         }
01059         void clear()
01060         {
01061             mData.clear();
01062         }
01064 
01065         UString substr( size_type index, size_type num = npos ) const
01066         {
01067             // this could avoid the extra copy if we used a private specialty constructor
01068             dstring data = mData.substr( index, num );
01069             UString tmp;
01070             tmp.mData.swap( data );
01071             return tmp;
01072         }
01074         void push_back( unicode_char val )
01075         {
01076             code_point cp[2];
01077             size_t c = _utf32_to_utf16( val, cp );
01078             if ( c > 0 ) push_back( cp[0] );
01079             if ( c > 1 ) push_back( cp[1] );
01080         }
01081 #if MYGUI_IS_NATIVE_WCHAR_T
01082 
01083         void push_back( wchar_t val )
01084         {
01085             // we do this because the Unicode method still preserves UTF-16 code points
01086             mData.push_back( static_cast<unicode_char>( val ) );
01087         }
01088 #endif
01089 
01090 
01092         void push_back( code_point val )
01093         {
01094             mData.push_back( val );
01095         }
01097 
01098         void push_back( char val )
01099         {
01100             mData.push_back( static_cast<code_point>( val ) );
01101         }
01103         bool inString( unicode_char ch ) const
01104         {
01105             const_iterator i, ie = end();
01106             for ( i = begin(); i != ie; i.moveNext() )
01107             {
01108                 if ( i.getCharacter() == ch )
01109                     return true;
01110             }
01111             return false;
01112         }
01114 
01116 
01118 
01119 
01120         const std::string& asUTF8() const
01121         {
01122             _load_buffer_UTF8();
01123             return *m_buffer.mStrBuffer;
01124         }
01126         const char* asUTF8_c_str() const
01127         {
01128             _load_buffer_UTF8();
01129             return m_buffer.mStrBuffer->c_str();
01130         }
01132         const utf32string& asUTF32() const
01133         {
01134             _load_buffer_UTF32();
01135             return *m_buffer.mUTF32StrBuffer;
01136         }
01138         const unicode_char* asUTF32_c_str() const
01139         {
01140             _load_buffer_UTF32();
01141             return m_buffer.mUTF32StrBuffer->c_str();
01142         }
01144         const std::wstring& asWStr() const
01145         {
01146             _load_buffer_WStr();
01147             return *m_buffer.mWStrBuffer;
01148         }
01150         const wchar_t* asWStr_c_str() const
01151         {
01152             _load_buffer_WStr();
01153             return m_buffer.mWStrBuffer->c_str();
01154         }
01156 
01158 
01160 
01161 
01162         code_point& at( size_type loc )
01163         {
01164             return mData.at( loc );
01165         }
01167         const code_point& at( size_type loc ) const
01168         {
01169             return mData.at( loc );
01170         }
01172 
01176         unicode_char getChar( size_type loc ) const
01177         {
01178             const code_point* ptr = c_str();
01179             unicode_char uc;
01180             size_t len = _utf16_char_length( ptr[loc] );
01181             code_point cp[2] = { /* blame the code beautifier */ 0, 0 };
01182             cp[0] = ptr[loc];
01183 
01184             if ( len == 2 && ( loc + 1 ) < mData.length() )
01185             {
01186                 cp[1] = ptr[loc+1];
01187             }
01188             _utf16_to_utf32( cp, uc );
01189             return uc;
01190         }
01192 
01200         int setChar( size_type loc, unicode_char ch )
01201         {
01202             code_point cp[2] = { /* blame the code beautifier */ 0, 0 };
01203             size_t lc = _utf32_to_utf16( ch, cp );
01204             unicode_char existingChar = getChar( loc );
01205             size_t existingSize = _utf16_char_length( existingChar );
01206             size_t newSize = _utf16_char_length( ch );
01207 
01208             if ( newSize > existingSize )
01209             {
01210                 at( loc ) = cp[0];
01211                 insert( loc + 1, 1, cp[1] );
01212                 return 1;
01213             }
01214             if ( newSize < existingSize )
01215             {
01216                 erase( loc, 1 );
01217                 at( loc ) = cp[0];
01218                 return -1;
01219             }
01220 
01221             // newSize == existingSize
01222             at( loc ) = cp[0];
01223             if ( lc == 2 ) at( loc + 1 ) = cp[1];
01224             return 0;
01225         }
01227 
01229 
01231 
01232 
01233         iterator begin()
01234         {
01235             iterator i;
01236             i.mIter = mData.begin();
01237             i.mString = this;
01238             return i;
01239         }
01241         const_iterator begin() const
01242         {
01243             const_iterator i;
01244             i.mIter = const_cast<UString*>( this )->mData.begin();
01245             i.mString = const_cast<UString*>( this );
01246             return i;
01247         }
01249         iterator end()
01250         {
01251             iterator i;
01252             i.mIter = mData.end();
01253             i.mString = this;
01254             return i;
01255         }
01257         const_iterator end() const
01258         {
01259             const_iterator i;
01260             i.mIter = const_cast<UString*>( this )->mData.end();
01261             i.mString = const_cast<UString*>( this );
01262             return i;
01263         }
01265         reverse_iterator rbegin()
01266         {
01267             reverse_iterator i;
01268             i.mIter = mData.end();
01269             i.mString = this;
01270             return i;
01271         }
01273         const_reverse_iterator rbegin() const
01274         {
01275             const_reverse_iterator i;
01276             i.mIter = const_cast<UString*>( this )->mData.end();
01277             i.mString = const_cast<UString*>( this );
01278             return i;
01279         }
01281         reverse_iterator rend()
01282         {
01283             reverse_iterator i;
01284             i.mIter = mData.begin();
01285             i.mString = this;
01286             return i;
01287         }
01289         const_reverse_iterator rend() const
01290         {
01291             const_reverse_iterator i;
01292             i.mIter = const_cast<UString*>( this )->mData.begin();
01293             i.mString = const_cast<UString*>( this );
01294             return i;
01295         }
01297 
01299 
01301 
01302 
01303         UString& assign( iterator start, iterator end )
01304         {
01305             mData.assign( start.mIter, end.mIter );
01306             return *this;
01307         }
01309         UString& assign( const UString& str )
01310         {
01311             mData.assign( str.mData );
01312             return *this;
01313         }
01315         UString& assign( const code_point* str )
01316         {
01317             mData.assign( str );
01318             return *this;
01319         }
01321         UString& assign( const code_point* str, size_type num )
01322         {
01323             mData.assign( str, num );
01324             return *this;
01325         }
01327         UString& assign( const UString& str, size_type index, size_type len )
01328         {
01329             mData.assign( str.mData, index, len );
01330             return *this;
01331         }
01333         UString& assign( size_type num, const code_point& ch )
01334         {
01335             mData.assign( num, ch );
01336             return *this;
01337         }
01339         UString& assign( const std::wstring& wstr )
01340         {
01341             mData.clear();
01342             mData.reserve( wstr.length() ); // best guess bulk allocate
01343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
01344             code_point tmp;
01345             std::wstring::const_iterator i, ie = wstr.end();
01346             for ( i = wstr.begin(); i != ie; ++i )
01347             {
01348                 tmp = static_cast<code_point>( *i );
01349                 mData.push_back( tmp );
01350             }
01351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
01352             code_point cp[3] = { 0, 0, 0 };
01353             unicode_char tmp;
01354             std::wstring::const_iterator i, ie = wstr.end();
01355             for ( i = wstr.begin(); i != ie; i++ )
01356             {
01357                 tmp = static_cast<unicode_char>( *i );
01358                 size_t lc = _utf32_to_utf16( tmp, cp );
01359                 if ( lc > 0 ) mData.push_back( cp[0] );
01360                 if ( lc > 1 ) mData.push_back( cp[1] );
01361             }
01362 #endif
01363             return *this;
01364         }
01365 #if MYGUI_IS_NATIVE_WCHAR_T
01366 
01367         UString& assign( const wchar_t* w_str )
01368         {
01369             std::wstring tmp;
01370             tmp.assign( w_str );
01371             return assign( tmp );
01372         }
01374         UString& assign( const wchar_t* w_str, size_type num )
01375         {
01376             std::wstring tmp;
01377             tmp.assign( w_str, num );
01378             return assign( tmp );
01379         }
01380 #endif
01381 
01382         UString& assign( const std::string& str )
01383         {
01384             size_type len = _verifyUTF8( str );
01385             clear(); // empty our contents, if there are any
01386             reserve( len ); // best guess bulk capacity growth
01387 
01388             // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
01389             // then converting it to UTF-16, then finally appending the data buffer
01390 
01391             unicode_char uc;          // temporary Unicode character buffer
01392             unsigned char utf8buf[7]; // temporary UTF-8 buffer
01393             utf8buf[6] = 0;
01394             size_t utf8len;           // UTF-8 length
01395             code_point utf16buff[3];  // temporary UTF-16 buffer
01396             utf16buff[2] = 0;
01397             size_t utf16len;          // UTF-16 length
01398 
01399             std::string::const_iterator i, ie = str.end();
01400             for ( i = str.begin(); i != ie; ++i )
01401             {
01402                 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
01403                 for ( size_t j = 0; j < utf8len; j++ )
01404                 { // load the needed UTF-8 bytes
01405                     utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
01406                 }
01407                 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
01408                 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
01409                 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
01410 
01411                 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
01412                 append( utf16buff, utf16len ); // append the characters to the string
01413             }
01414             return *this;
01415         }
01417         UString& assign( const char* c_str )
01418         {
01419             std::string tmp( c_str );
01420             return assign( tmp );
01421         }
01423         UString& assign( const char* c_str, size_type num )
01424         {
01425             std::string tmp;
01426             tmp.assign( c_str, num );
01427             return assign( tmp );
01428         }
01430 
01432 
01434 
01435 
01436         UString& append( const UString& str )
01437         {
01438             mData.append( str.mData );
01439             return *this;
01440         }
01442         UString& append( const code_point* str )
01443         {
01444             mData.append( str );
01445             return *this;
01446         }
01448         UString& append( const UString& str, size_type index, size_type len )
01449         {
01450             mData.append( str.mData, index, len );
01451             return *this;
01452         }
01454         UString& append( const code_point* str, size_type num )
01455         {
01456             mData.append( str, num );
01457             return *this;
01458         }
01460         UString& append( size_type num, code_point ch )
01461         {
01462             mData.append( num, ch );
01463             return *this;
01464         }
01466         UString& append( iterator start, iterator end )
01467         {
01468             mData.append( start.mIter, end.mIter );
01469             return *this;
01470         }
01471 #if MYGUI_IS_NATIVE_WCHAR_T
01472 
01473         UString& append( const wchar_t* w_str, size_type num )
01474         {
01475             std::wstring tmp( w_str, num );
01476             return append( tmp );
01477         }
01479         UString& append( size_type num, wchar_t ch )
01480         {
01481             return append( num, static_cast<unicode_char>( ch ) );
01482         }
01483 #endif
01484 
01485         UString& append( const char* c_str, size_type num )
01486         {
01487             UString tmp( c_str, num );
01488             append( tmp );
01489             return *this;
01490         }
01492         UString& append( size_type num, char ch )
01493         {
01494             append( num, static_cast<code_point>( ch ) );
01495             return *this;
01496         }
01498         UString& append( size_type num, unicode_char ch )
01499         {
01500             code_point cp[2] = { 0, 0 };
01501             if ( _utf32_to_utf16( ch, cp ) == 2 )
01502             {
01503                 for ( size_type i = 0; i < num; i++ )
01504                 {
01505                     append( 1, cp[0] );
01506                     append( 1, cp[1] );
01507                 }
01508             }
01509             else
01510             {
01511                 for ( size_type i = 0; i < num; i++ )
01512                 {
01513                     append( 1, cp[0] );
01514                 }
01515             }
01516             return *this;
01517         }
01519 
01521 
01523 
01524 
01525         iterator insert( iterator i, const code_point& ch )
01526         {
01527             iterator ret;
01528             ret.mIter = mData.insert( i.mIter, ch );
01529             ret.mString = this;
01530             return ret;
01531         }
01533         UString& insert( size_type index, const UString& str )
01534         {
01535             mData.insert( index, str.mData );
01536             return *this;
01537         }
01539         UString& insert( size_type index, const code_point* str )
01540         {
01541             mData.insert( index, str );
01542             return *this;
01543         }
01545         UString& insert( size_type index1, const UString& str, size_type index2, size_type num )
01546         {
01547             mData.insert( index1, str.mData, index2, num );
01548             return *this;
01549         }
01551         void insert( iterator i, iterator start, iterator end )
01552         {
01553             mData.insert( i.mIter, start.mIter, end.mIter );
01554         }
01556         UString& insert( size_type index, const code_point* str, size_type num )
01557         {
01558             mData.insert( index, str, num );
01559             return *this;
01560         }
01561 #if MYGUI_IS_NATIVE_WCHAR_T
01562 
01563         UString& insert( size_type index, const wchar_t* w_str, size_type num )
01564         {
01565             UString tmp( w_str, num );
01566             insert( index, tmp );
01567             return *this;
01568         }
01569 #endif
01570 
01571         UString& insert( size_type index, const char* c_str, size_type num )
01572         {
01573             UString tmp( c_str, num );
01574             insert( index, tmp );
01575             return *this;
01576         }
01578         UString& insert( size_type index, size_type num, code_point ch )
01579         {
01580             mData.insert( index, num, ch );
01581             return *this;
01582         }
01583 #if MYGUI_IS_NATIVE_WCHAR_T
01584 
01585         UString& insert( size_type index, size_type num, wchar_t ch )
01586         {
01587             insert( index, num, static_cast<unicode_char>( ch ) );
01588             return *this;
01589         }
01590 #endif
01591 
01592         UString& insert( size_type index, size_type num, char ch )
01593         {
01594             insert( index, num, static_cast<code_point>( ch ) );
01595             return *this;
01596         }
01598         UString& insert( size_type index, size_type num, unicode_char ch )
01599         {
01600             code_point cp[3] = { 0, 0, 0 };
01601             size_t lc = _utf32_to_utf16( ch, cp );
01602             if ( lc == 1 )
01603             {
01604                 return insert( index, num, cp[0] );
01605             }
01606             for ( size_type c = 0; c < num; c++ )
01607             {
01608                 // insert in reverse order to preserve ordering after insert
01609                 insert( index, 1, cp[1] );
01610                 insert( index, 1, cp[0] );
01611             }
01612             return *this;
01613         }
01615         void insert( iterator i, size_type num, const code_point& ch )
01616         {
01617             mData.insert( i.mIter, num, ch );
01618         }
01619 #if MYGUI_IS_NATIVE_WCHAR_T
01620 
01621         void insert( iterator i, size_type num, const wchar_t& ch )
01622         {
01623             insert( i, num, static_cast<unicode_char>( ch ) );
01624         }
01625 #endif
01626 
01627         void insert( iterator i, size_type num, const char& ch )
01628         {
01629             insert( i, num, static_cast<code_point>( ch ) );
01630         }
01632         void insert( iterator i, size_type num, const unicode_char& ch )
01633         {
01634             code_point cp[3] = { 0, 0, 0 };
01635             size_t lc = _utf32_to_utf16( ch, cp );
01636             if ( lc == 1 )
01637             {
01638                 insert( i, num, cp[0] );
01639             }
01640             else
01641             {
01642                 for ( size_type c = 0; c < num; c++ )
01643                 {
01644                     // insert in reverse order to preserve ordering after insert
01645                     insert( i, 1, cp[1] );
01646                     insert( i, 1, cp[0] );
01647                 }
01648             }
01649         }
01651 
01653 
01655 
01656 
01657         iterator erase( iterator loc )
01658         {
01659             iterator ret;
01660             ret.mIter = mData.erase( loc.mIter );
01661             ret.mString = this;
01662             return ret;
01663         }
01665         iterator erase( iterator start, iterator end )
01666         {
01667             iterator ret;
01668             ret.mIter = mData.erase( start.mIter, end.mIter );
01669             ret.mString = this;
01670             return ret;
01671         }
01673         UString& erase( size_type index = 0, size_type num = npos )
01674         {
01675             if ( num == npos )
01676                 mData.erase( index );
01677             else
01678                 mData.erase( index, num );
01679             return *this;
01680         }
01682 
01684 
01686 
01687 
01688         UString& replace( size_type index1, size_type num1, const UString& str )
01689         {
01690             mData.replace( index1, num1, str.mData, 0, npos );
01691             return *this;
01692         }
01694         UString& replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01695         {
01696             mData.replace( index1, num1, str.mData, 0, num2 );
01697             return *this;
01698         }
01700         UString& replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01701         {
01702             mData.replace( index1, num1, str.mData, index2, num2 );
01703             return *this;
01704         }
01706         UString& replace( iterator start, iterator end, const UString& str, size_type num = npos )
01707         {
01708             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01709 
01710             size_type index1 = begin() - st;
01711             size_type num1 = end - st;
01712             return replace( index1, num1, str, 0, num );
01713         }
01715         UString& replace( size_type index, size_type num1, size_type num2, code_point ch )
01716         {
01717             mData.replace( index, num1, num2, ch );
01718             return *this;
01719         }
01721         UString& replace( iterator start, iterator end, size_type num, code_point ch )
01722         {
01723             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01724 
01725             size_type index1 = begin() - st;
01726             size_type num1 = end - st;
01727             return replace( index1, num1, num, ch );
01728         }
01730 
01732 
01734 
01735 
01736         int compare( const UString& str ) const
01737         {
01738             return mData.compare( str.mData );
01739         }
01741         int compare( const code_point* str ) const
01742         {
01743             return mData.compare( str );
01744         }
01746         int compare( size_type index, size_type length, const UString& str ) const
01747         {
01748             return mData.compare( index, length, str.mData );
01749         }
01751         int compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01752         {
01753             return mData.compare( index, length, str.mData, index2, length2 );
01754         }
01756         int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01757         {
01758             return mData.compare( index, length, str, length2 );
01759         }
01760 #if MYGUI_IS_NATIVE_WCHAR_T
01761 
01762         int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01763         {
01764             UString tmp( w_str, length2 );
01765             return compare( index, length, tmp );
01766         }
01767 #endif
01768 
01769         int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01770         {
01771             UString tmp( c_str, length2 );
01772             return compare( index, length, tmp );
01773         }
01775 
01777 
01779 
01780 
01781 
01782         size_type find( const UString& str, size_type index = 0 ) const
01783         {
01784             return mData.find( str.c_str(), index );
01785         }
01787 
01788         size_type find( const code_point* cp_str, size_type index, size_type length ) const
01789         {
01790             UString tmp( cp_str );
01791             return mData.find( tmp.c_str(), index, length );
01792         }
01794 
01795         size_type find( const char* c_str, size_type index, size_type length ) const
01796         {
01797             UString tmp( c_str );
01798             return mData.find( tmp.c_str(), index, length );
01799         }
01800 #if MYGUI_IS_NATIVE_WCHAR_T
01801 
01802 
01803         size_type find( const wchar_t* w_str, size_type index, size_type length ) const
01804         {
01805             UString tmp( w_str );
01806             return mData.find( tmp.c_str(), index, length );
01807         }
01808 #endif
01809 
01810 
01811         size_type find( char ch, size_type index = 0 ) const
01812         {
01813             return find( static_cast<code_point>( ch ), index );
01814         }
01816 
01817         size_type find( code_point ch, size_type index = 0 ) const
01818         {
01819             return mData.find( ch, index );
01820         }
01821 #if MYGUI_IS_NATIVE_WCHAR_T
01822 
01823 
01824         size_type find( wchar_t ch, size_type index = 0 ) const
01825         {
01826             return find( static_cast<unicode_char>( ch ), index );
01827         }
01828 #endif
01829 
01830 
01831         size_type find( unicode_char ch, size_type index = 0 ) const
01832         {
01833             code_point cp[3] = { 0, 0, 0 };
01834             size_t lc = _utf32_to_utf16( ch, cp );
01835             return find( UString( cp, lc ), index );
01836         }
01837 
01839         size_type rfind( const UString& str, size_type index = 0 ) const
01840         {
01841             return mData.rfind( str.c_str(), index );
01842         }
01844         size_type rfind( const code_point* cp_str, size_type index, size_type num ) const
01845         {
01846             UString tmp( cp_str );
01847             return mData.rfind( tmp.c_str(), index, num );
01848         }
01850         size_type rfind( const char* c_str, size_type index, size_type num ) const
01851         {
01852             UString tmp( c_str );
01853             return mData.rfind( tmp.c_str(), index, num );
01854         }
01855 #if MYGUI_IS_NATIVE_WCHAR_T
01856 
01857         size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const
01858         {
01859             UString tmp( w_str );
01860             return mData.rfind( tmp.c_str(), index, num );
01861         }
01862 #endif
01863 
01864         size_type rfind( char ch, size_type index = 0 ) const
01865         {
01866             return rfind( static_cast<code_point>( ch ), index );
01867         }
01869         size_type rfind( code_point ch, size_type index ) const
01870         {
01871             return mData.rfind( ch, index );
01872         }
01873 #if MYGUI_IS_NATIVE_WCHAR_T
01874 
01875         size_type rfind( wchar_t ch, size_type index = 0 ) const
01876         {
01877             return rfind( static_cast<unicode_char>( ch ), index );
01878         }
01879 #endif
01880 
01881         size_type rfind( unicode_char ch, size_type index = 0 ) const
01882         {
01883             code_point cp[3] = { 0, 0, 0 };
01884             size_t lc = _utf32_to_utf16( ch, cp );
01885             return rfind( UString( cp, lc ), index );
01886         }
01888 
01890 
01892 
01893 
01894         size_type find_first_of( const UString &str, size_type index = 0, size_type num = npos ) const
01895         {
01896             size_type i = 0;
01897             const size_type len = length();
01898             while ( i < num && ( index + i ) < len )
01899             {
01900                 unicode_char ch = getChar( index + i );
01901                 if ( str.inString( ch ) )
01902                     return index + i;
01903                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01904             }
01905             return npos;
01906         }
01908         size_type find_first_of( code_point ch, size_type index = 0 ) const
01909         {
01910             UString tmp;
01911             tmp.assign( 1, ch );
01912             return find_first_of( tmp, index );
01913         }
01915         size_type find_first_of( char ch, size_type index = 0 ) const
01916         {
01917             return find_first_of( static_cast<code_point>( ch ), index );
01918         }
01919 #if MYGUI_IS_NATIVE_WCHAR_T
01920 
01921         size_type find_first_of( wchar_t ch, size_type index = 0 ) const
01922         {
01923             return find_first_of( static_cast<unicode_char>( ch ), index );
01924         }
01925 #endif
01926 
01927         size_type find_first_of( unicode_char ch, size_type index = 0 ) const
01928         {
01929             code_point cp[3] = { 0, 0, 0 };
01930             size_t lc = _utf32_to_utf16( ch, cp );
01931             return find_first_of( UString( cp, lc ), index );
01932         }
01933 
01935         size_type find_first_not_of( const UString& str, size_type index = 0, size_type num = npos ) const
01936         {
01937             size_type i = 0;
01938             const size_type len = length();
01939             while ( i < num && ( index + i ) < len )
01940             {
01941                 unicode_char ch = getChar( index + i );
01942                 if ( !str.inString( ch ) )
01943                     return index + i;
01944                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01945             }
01946             return npos;
01947         }
01949         size_type find_first_not_of( code_point ch, size_type index = 0 ) const
01950         {
01951             UString tmp;
01952             tmp.assign( 1, ch );
01953             return find_first_not_of( tmp, index );
01954         }
01956         size_type find_first_not_of( char ch, size_type index = 0 ) const
01957         {
01958             return find_first_not_of( static_cast<code_point>( ch ), index );
01959         }
01960 #if MYGUI_IS_NATIVE_WCHAR_T
01961 
01962         size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const
01963         {
01964             return find_first_not_of( static_cast<unicode_char>( ch ), index );
01965         }
01966 #endif
01967 
01968         size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const
01969         {
01970             code_point cp[3] = { 0, 0, 0 };
01971             size_t lc = _utf32_to_utf16( ch, cp );
01972             return find_first_not_of( UString( cp, lc ), index );
01973         }
01974 
01976         size_type find_last_of( const UString& str, size_type index = npos, size_type num = npos ) const
01977         {
01978             size_type i = 0;
01979             const size_type len = length();
01980             if ( index > len ) index = len - 1;
01981 
01982             while ( i < num && ( index - i ) != npos )
01983             {
01984                 size_type j = index - i;
01985                 // careful to step full Unicode characters
01986                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
01987                 {
01988                     j = index - ++i;
01989                 }
01990                 // and back to the usual dull test
01991                 unicode_char ch = getChar( j );
01992                 if ( str.inString( ch ) )
01993                     return j;
01994                 i++;
01995             }
01996             return npos;
01997         }
01999         size_type find_last_of( code_point ch, size_type index = npos ) const
02000         {
02001             UString tmp;
02002             tmp.assign( 1, ch );
02003             return find_last_of( tmp, index );
02004         }
02006         size_type find_last_of( char ch, size_type index = npos ) const
02007         {
02008             return find_last_of( static_cast<code_point>( ch ), index );
02009         }
02010 #if MYGUI_IS_NATIVE_WCHAR_T
02011 
02012         size_type find_last_of( wchar_t ch, size_type index = npos ) const
02013         {
02014             return find_last_of( static_cast<unicode_char>( ch ), index );
02015         }
02016 #endif
02017 
02018         size_type find_last_of( unicode_char ch, size_type index = npos ) const
02019         {
02020             code_point cp[3] = { 0, 0, 0 };
02021             size_t lc = _utf32_to_utf16( ch, cp );
02022             return find_last_of( UString( cp, lc ), index );
02023         }
02024 
02026         size_type find_last_not_of( const UString& str, size_type index = npos, size_type num = npos ) const
02027         {
02028             size_type i = 0;
02029             const size_type len = length();
02030             if ( index > len ) index = len - 1;
02031 
02032             while ( i < num && ( index - i ) != npos )
02033             {
02034                 size_type j = index - i;
02035                 // careful to step full Unicode characters
02036                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
02037                 {
02038                     j = index - ++i;
02039                 }
02040                 // and back to the usual dull test
02041                 unicode_char ch = getChar( j );
02042                 if ( !str.inString( ch ) )
02043                     return j;
02044                 i++;
02045             }
02046             return npos;
02047         }
02049         size_type find_last_not_of( code_point ch, size_type index = npos ) const
02050         {
02051             UString tmp;
02052             tmp.assign( 1, ch );
02053             return find_last_not_of( tmp, index );
02054         }
02056         size_type find_last_not_of( char ch, size_type index = npos ) const
02057         {
02058             return find_last_not_of( static_cast<code_point>( ch ), index );
02059         }
02060 #if MYGUI_IS_NATIVE_WCHAR_T
02061 
02062         size_type find_last_not_of( wchar_t ch, size_type index = npos ) const
02063         {
02064             return find_last_not_of( static_cast<unicode_char>( ch ), index );
02065         }
02066 #endif
02067 
02068         size_type find_last_not_of( unicode_char ch, size_type index = npos ) const
02069         {
02070             code_point cp[3] = { 0, 0, 0 };
02071             size_t lc = _utf32_to_utf16( ch, cp );
02072             return find_last_not_of( UString( cp, lc ), index );
02073         }
02075 
02077 
02079 
02080 
02081         bool operator<( const UString& right ) const
02082         {
02083             return compare( right ) < 0;
02084         }
02086         bool operator<=( const UString& right ) const
02087         {
02088             return compare( right ) <= 0;
02089         }
02091         bool operator>( const UString& right ) const
02092         {
02093             return compare( right ) > 0;
02094         }
02096         bool operator>=( const UString& right ) const
02097         {
02098             return compare( right ) >= 0;
02099         }
02101         bool operator==( const UString& right ) const
02102         {
02103             return compare( right ) == 0;
02104         }
02106         bool operator!=( const UString& right ) const
02107         {
02108             return !operator==( right );
02109         }
02111         UString& operator=( const UString& s )
02112         {
02113             return assign( s );
02114         }
02116         UString& operator=( code_point ch )
02117         {
02118             clear();
02119             return append( 1, ch );
02120         }
02122         UString& operator=( char ch )
02123         {
02124             clear();
02125             return append( 1, ch );
02126         }
02127 #if MYGUI_IS_NATIVE_WCHAR_T
02128 
02129         UString& operator=( wchar_t ch )
02130         {
02131             clear();
02132             return append( 1, ch );
02133         }
02134 #endif
02135 
02136         UString& operator=( unicode_char ch )
02137         {
02138             clear();
02139             return append( 1, ch );
02140         }
02142         code_point& operator[]( size_type index )
02143         {
02144             return at( index );
02145         }
02147         const code_point& operator[]( size_type index ) const
02148         {
02149             return at( index );
02150         }
02152 
02154 
02156 
02157 
02158         operator std::string() const
02159         {
02160             return std::string( asUTF8() );
02161         }
02163         operator std::wstring() const
02164         {
02165             return std::wstring( asWStr() );
02166         }
02168 
02170 
02172 
02173 
02174         static bool _utf16_independent_char( code_point cp )
02175         {
02176             if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
02177                 return false; // it matches a surrogate pair signature
02178             return true; // everything else is a standalone code point
02179         }
02181         static bool _utf16_surrogate_lead( code_point cp )
02182         {
02183             if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
02184                 return true; // it is a 1st word
02185             return false; // it isn't
02186         }
02188         static bool _utf16_surrogate_follow( code_point cp )
02189         {
02190             if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
02191                 return true; // it is a 2nd word
02192             return false; // everything else isn't
02193         }
02195         static size_t _utf16_char_length( code_point cp )
02196         {
02197             if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
02198                 return 2; // if it is, then we are 2 words long
02199             return 1; // otherwise we are only 1 word long
02200         }
02202         static size_t _utf16_char_length( unicode_char uc )
02203         {
02204             if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
02205                 return 2; // if so, we need a surrogate pair
02206             return 1; // otherwise we can stuff it into a single word
02207         }
02209 
02213         static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
02214         {
02215             const code_point& cp1 = in_cp[0];
02216             const code_point& cp2 = in_cp[1];
02217             bool wordPair = false;
02218 
02219             // does it look like a surrogate pair?
02220             if ( 0xD800 <= cp1 && cp1 <= 0xDBFF )
02221             {
02222                 // looks like one, but does the other half match the algorithm as well?
02223                 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
02224                     wordPair = true; // yep!
02225             }
02226 
02227             if ( !wordPair )
02228             { // if we aren't a 100% authentic surrogate pair, then just copy the value
02229                 out_uc = cp1;
02230                 return 1;
02231             }
02232 
02233             unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
02234             cU -= 0xD800; // remove the encoding markers
02235             cL -= 0xDC00;
02236 
02237             out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
02238             out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
02239             out_uc += 0x10000; // add back in the value offset
02240 
02241             return 2; // this whole operation takes to words, so that's what we'll return
02242         }
02244 
02249         static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
02250         {
02251             if ( in_uc <= 0xFFFF )
02252             { // we blindly preserve sentinel values because our decoder understands them
02253                 out_cp[0] = in_uc;
02254                 return 1;
02255             }
02256             unicode_char uc = in_uc; // copy to writable buffer
02257             unsigned short tmp; // single code point buffer
02258             uc -= 0x10000; // subtract value offset
02259 
02260             //process upper word
02261             tmp = ( uc >> 10 ) & 0x03FF; // grab the upper 10 bits
02262             tmp += 0xD800; // add encoding offset
02263             out_cp[0] = tmp; // write
02264 
02265             // process lower word
02266             tmp = uc & 0x03FF; // grab the lower 10 bits
02267             tmp += 0xDC00; // add encoding offset
02268             out_cp[1] = tmp; // write
02269 
02270             return 2; // return used word count (2 for surrogate pairs)
02271         }
02273 
02275 
02277 
02278 
02279         static bool _utf8_start_char( unsigned char cp )
02280         {
02281             return ( cp & ~_cont_mask ) != _cont;
02282         }
02284         static size_t _utf8_char_length( unsigned char cp )
02285         {
02286             if ( !( cp & 0x80 ) ) return 1;
02287             if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
02288             if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
02289             if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
02290             if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
02291             if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
02292             throw invalid_data( "invalid UTF-8 sequence header value" );
02293         }
02295         static size_t _utf8_char_length( unicode_char uc )
02296         {
02297             /*
02298             7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
02299             11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
02300             16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
02301             21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
02302             26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02303             31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02304             */
02305             if ( !( uc & ~0x0000007F ) ) return 1;
02306             if ( !( uc & ~0x000007FF ) ) return 2;
02307             if ( !( uc & ~0x0000FFFF ) ) return 3;
02308             if ( !( uc & ~0x001FFFFF ) ) return 4;
02309             if ( !( uc & ~0x03FFFFFF ) ) return 5;
02310             if ( !( uc & ~0x7FFFFFFF ) ) return 6;
02311             throw invalid_data( "invalid UTF-32 value" );
02312         }
02313 
02315         static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
02316         {
02317             size_t len = _utf8_char_length( in_cp[0] );
02318             if ( len == 1 )
02319             { // if we are only 1 byte long, then just grab it and exit
02320                 out_uc = in_cp[0];
02321                 return 1;
02322             }
02323 
02324             unicode_char c = 0; // temporary buffer
02325             size_t i = 0;
02326             switch ( len )
02327             { // load header byte
02328             case 6:
02329                 c = in_cp[i] & _lead5_mask;
02330                 break;
02331             case 5:
02332                 c = in_cp[i] & _lead4_mask;
02333                 break;
02334             case 4:
02335                 c = in_cp[i] & _lead3_mask;
02336                 break;
02337             case 3:
02338                 c = in_cp[i] & _lead2_mask;
02339                 break;
02340             case 2:
02341                 c = in_cp[i] & _lead1_mask;
02342                 break;
02343             }
02344 
02345             for ( ++i; i < len; i++ )
02346             { // load each continuation byte
02347                 if (( in_cp[i] & ~_cont_mask ) != _cont )
02348                     throw invalid_data( "bad UTF-8 continuation byte" );
02349                 c <<= 6;
02350                 c |= ( in_cp[i] & _cont_mask );
02351             }
02352 
02353             out_uc = c; // write the final value and return the used byte length
02354             return len;
02355         }
02357         static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
02358         {
02359             size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
02360             unicode_char c = in_uc; // copy to temp buffer
02361 
02362             //stuff all of the lower bits
02363             for ( size_t i = len - 1; i > 0; i-- )
02364             {
02365                 out_cp[i] = (( c ) & _cont_mask ) | _cont;
02366                 c >>= 6;
02367             }
02368 
02369             //now write the header byte
02370             switch ( len )
02371             {
02372             case 6:
02373                 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
02374                 break;
02375             case 5:
02376                 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
02377                 break;
02378             case 4:
02379                 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
02380                 break;
02381             case 3:
02382                 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
02383                 break;
02384             case 2:
02385                 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
02386                 break;
02387             case 1:
02388             default:
02389                 out_cp[0] = ( c ) & 0x7F;
02390                 break;
02391             }
02392 
02393             // return the byte length of the sequence
02394             return len;
02395         }
02396 
02398         static size_type _verifyUTF8( const unsigned char* c_str )
02399         {
02400             std::string tmp( reinterpret_cast<const char*>( c_str ) );
02401             return _verifyUTF8( tmp );
02402         }
02404         static size_type _verifyUTF8( const std::string& str )
02405         {
02406             std::string::const_iterator i, ie = str.end();
02407             i = str.begin();
02408             size_type length = 0;
02409 
02410             while ( i != ie )
02411             {
02412                 // characters pass until we find an extended sequence
02413                 if (( *i ) & 0x80 )
02414                 {
02415                     unsigned char c = ( *i );
02416                     size_t contBytes = 0;
02417 
02418                     // get continuation byte count and test for overlong sequences
02419                     if (( c & ~_lead1_mask ) == _lead1 )
02420                     { // 1 additional byte
02421                         if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
02422                         contBytes = 1;
02423 
02424                     }
02425                     else if (( c & ~_lead2_mask ) == _lead2 )
02426                     { // 2 additional bytes
02427                         contBytes = 2;
02428                         if ( c == _lead2 )
02429                         { // possible overlong UTF-8 sequence
02430                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02431                             if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02432                         }
02433 
02434                     }
02435                     else if (( c & ~_lead3_mask ) == _lead3 )
02436                     { // 3 additional bytes
02437                         contBytes = 3;
02438                         if ( c == _lead3 )
02439                         { // possible overlong UTF-8 sequence
02440                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02441                             if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02442                         }
02443 
02444                     }
02445                     else if (( c & ~_lead4_mask ) == _lead4 )
02446                     { // 4 additional bytes
02447                         contBytes = 4;
02448                         if ( c == _lead4 )
02449                         { // possible overlong UTF-8 sequence
02450                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02451                             if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02452                         }
02453 
02454                     }
02455                     else if (( c & ~_lead5_mask ) == _lead5 )
02456                     { // 5 additional bytes
02457                         contBytes = 5;
02458                         if ( c == _lead5 )
02459                         { // possible overlong UTF-8 sequence
02460                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02461                             if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02462                         }
02463                     }
02464 
02465                     // check remaining continuation bytes for
02466                     while ( contBytes-- )
02467                     {
02468                         c = ( *( ++i ) ); // get next byte in sequence
02469                         if (( c & ~_cont_mask ) != _cont )
02470                             throw invalid_data( "bad UTF-8 continuation byte" );
02471                     }
02472                 }
02473                 length++;
02474                 i++;
02475             }
02476             return length;
02477         }
02479 
02480     private:
02481         //template<class ITER_TYPE> friend class _iterator;
02482         dstring mData;
02483 
02485         enum BufferType
02486         {
02487             bt_none,
02488             bt_string,
02489             bt_wstring,
02490             bt_utf32string
02491         };
02492 
02494         void _init()
02495         {
02496             m_buffer.mVoidBuffer = 0;
02497             m_bufferType = bt_none;
02498             m_bufferSize = 0;
02499         }
02500 
02502         // Scratch buffer
02504         void _cleanBuffer() const
02505         {
02506             if ( m_buffer.mVoidBuffer != 0 )
02507             {
02508                 switch ( m_bufferType )
02509                 {
02510                 case bt_string:
02511                     delete m_buffer.mStrBuffer;
02512                     break;
02513                 case bt_wstring:
02514                     delete m_buffer.mWStrBuffer;
02515                     break;
02516                 case bt_utf32string:
02517                     delete m_buffer.mUTF32StrBuffer;
02518                     break;
02519                 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
02520                 default:
02521                     //delete m_buffer.mVoidBuffer;
02522                     // delete void* is undefined, don't do that
02523                     MYGUI_ASSERT(false, "This should never happen - mVoidBuffer should never contain something if we "
02524                         "don't know the type");
02525                     break;
02526                 }
02527                 m_buffer.mVoidBuffer = 0;
02528                 m_bufferSize = 0;
02529             }
02530         }
02531 
02533         void _getBufferStr() const
02534         {
02535             if ( m_bufferType != bt_string )
02536             {
02537                 _cleanBuffer();
02538                 m_buffer.mStrBuffer = new std::string();
02539                 m_bufferType = bt_string;
02540             }
02541             m_buffer.mStrBuffer->clear();
02542         }
02544         void _getBufferWStr() const
02545         {
02546             if ( m_bufferType != bt_wstring )
02547             {
02548                 _cleanBuffer();
02549                 m_buffer.mWStrBuffer = new std::wstring();
02550                 m_bufferType = bt_wstring;
02551             }
02552             m_buffer.mWStrBuffer->clear();
02553         }
02555         void _getBufferUTF32Str() const
02556         {
02557             if ( m_bufferType != bt_utf32string )
02558             {
02559                 _cleanBuffer();
02560                 m_buffer.mUTF32StrBuffer = new utf32string();
02561                 m_bufferType = bt_utf32string;
02562             }
02563             m_buffer.mUTF32StrBuffer->clear();
02564         }
02565 
02566         void _load_buffer_UTF8() const
02567         {
02568             _getBufferStr();
02569             std::string& buffer = ( *m_buffer.mStrBuffer );
02570             buffer.reserve( length() );
02571 
02572             unsigned char utf8buf[6];
02573             char* charbuf = ( char* )utf8buf;
02574             unicode_char c;
02575             size_t len;
02576 
02577             const_iterator i, ie = end();
02578             for ( i = begin(); i != ie; i.moveNext() )
02579             {
02580                 c = i.getCharacter();
02581                 len = _utf32_to_utf8( c, utf8buf );
02582                 size_t j = 0;
02583                 while ( j < len )
02584                     buffer.push_back( charbuf[j++] );
02585             }
02586         }
02587         void _load_buffer_WStr() const
02588         {
02589             _getBufferWStr();
02590             std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02591             buffer.reserve( length() ); // may over reserve, but should be close enough
02592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02593             const_iterator i, ie = end();
02594             for ( i = begin(); i != ie; ++i )
02595             {
02596                 buffer.push_back(( wchar_t )( *i ) );
02597             }
02598 #else // wchar_t fits UTF-32
02599             unicode_char c;
02600             const_iterator i, ie = end();
02601             for ( i = begin(); i != ie; i.moveNext() )
02602             {
02603                 c = i.getCharacter();
02604                 buffer.push_back(( wchar_t )c );
02605             }
02606 #endif
02607         }
02608         void _load_buffer_UTF32() const
02609         {
02610             _getBufferUTF32Str();
02611             utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02612             buffer.reserve( length() ); // may over reserve, but should be close enough
02613 
02614             unicode_char c;
02615 
02616             const_iterator i, ie = end();
02617             for ( i = begin(); i != ie; i.moveNext() )
02618             {
02619                 c = i.getCharacter();
02620                 buffer.push_back( c );
02621             }
02622         }
02623 
02624         mutable BufferType m_bufferType; // identifies the data type held in m_buffer
02625         mutable size_t m_bufferSize; // size of the CString buffer
02626 
02627         // multi-purpose buffer used everywhere we need a throw-away buffer
02628         union Buffer
02629         {
02630             mutable void* mVoidBuffer;
02631             mutable std::string* mStrBuffer;
02632             mutable std::wstring* mWStrBuffer;
02633             mutable utf32string* mUTF32StrBuffer;
02634         }
02635         m_buffer;
02636     };
02637 
02639     inline UString operator+( const UString& s1, const UString& s2 )
02640     {
02641         return UString( s1 ).append( s2 );
02642     }
02644     inline UString operator+( const UString& s1, UString::code_point c )
02645     {
02646         return UString( s1 ).append( 1, c );
02647     }
02649     inline UString operator+( const UString& s1, UString::unicode_char c )
02650     {
02651         return UString( s1 ).append( 1, c );
02652     }
02654     inline UString operator+( const UString& s1, char c )
02655     {
02656         return UString( s1 ).append( 1, c );
02657     }
02658 #if MYGUI_IS_NATIVE_WCHAR_T
02659 
02660     inline UString operator+( const UString& s1, wchar_t c )
02661     {
02662         return UString( s1 ).append( 1, c );
02663     }
02664 #endif
02665 
02666     inline UString operator+( UString::code_point c, const UString& s2 )
02667     {
02668         return UString().append( 1, c ).append( s2 );
02669     }
02671     inline UString operator+( UString::unicode_char c, const UString& s2 )
02672     {
02673         return UString().append( 1, c ).append( s2 );
02674     }
02676     inline UString operator+( char c, const UString& s2 )
02677     {
02678         return UString().append( 1, c ).append( s2 );
02679     }
02680 #if MYGUI_IS_NATIVE_WCHAR_T
02681 
02682     inline UString operator+( wchar_t c, const UString& s2 )
02683     {
02684         return UString().append( 1, c ).append( s2 );
02685     }
02686 #endif
02687 
02688     // (const) forward iterator common operators
02689     inline UString::size_type operator-( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02690     {
02691         return ( left.mIter - right.mIter );
02692     }
02693     inline bool operator==( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02694     {
02695         return left.mIter == right.mIter;
02696     }
02697     inline bool operator!=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02698     {
02699         return left.mIter != right.mIter;
02700     }
02701     inline bool operator<( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02702     {
02703         return left.mIter < right.mIter;
02704     }
02705     inline bool operator<=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02706     {
02707         return left.mIter <= right.mIter;
02708     }
02709     inline bool operator>( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02710     {
02711         return left.mIter > right.mIter;
02712     }
02713     inline bool operator>=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02714     {
02715         return left.mIter >= right.mIter;
02716     }
02717 
02718     // (const) reverse iterator common operators
02719     // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator
02720     inline UString::size_type operator-( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02721     {
02722         return ( right.mIter - left.mIter );
02723     }
02724     inline bool operator==( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02725     {
02726         return left.mIter == right.mIter;
02727     }
02728     inline bool operator!=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02729     {
02730         return left.mIter != right.mIter;
02731     }
02732     inline bool operator<( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02733     {
02734         return right.mIter < left.mIter;
02735     }
02736     inline bool operator<=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02737     {
02738         return right.mIter <= left.mIter;
02739     }
02740     inline bool operator>( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02741     {
02742         return right.mIter > left.mIter;
02743     }
02744     inline bool operator>=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02745     {
02746         return right.mIter >= left.mIter;
02747     }
02748 
02750     inline std::ostream& operator << ( std::ostream& os, const UString& s )
02751     {
02752         return os << s.asUTF8();
02753     }
02754 
02756     //inline std::wostream& operator << ( std::wostream& os, const UString& s )
02757     //{
02758     //  return os << s.asWStr();
02759     //}
02760 
02761 
02762 
02763 }
02764 
02765 #endif // __MYGUI_U_STRING_H__
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines