00001 // Modified from OpenGUI under lenient license 00002 // Original copyright details and licensing below: 00003 // OpenGUI (http://opengui.sourceforge.net) 00004 // This source code is released under the BSD License 00005 00006 // Permission is given to the Ogre project to use the contents of file within its 00007 // source and binary applications, as well as any derivative works, in accordance 00008 // with the terms of any license under which Ogre is or will be distributed. 00009 // 00010 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates 00011 // to this file, under any terms that it deems fit, and is not required to maintain 00012 // the original BSD licensing terms of this file, however OpenGUI retains the right 00013 // to present its copy of this file under the terms of any license under which 00014 // OpenGUI is distributed. 00015 // 00016 // Ogre is not required to release to OpenGUI any future changes that it makes to 00017 // this file, and understands and agrees that any such changes that are released 00018 // back to OpenGUI will become available under the terms of any license under which 00019 // OpenGUI is distributed. 00020 // 00021 // For brevity, this permission text may be removed from this file if desired. 00022 // The original record kept within the SourceForge (http://sourceforge.net/) tracker 00023 // is sufficient. 00024 // 00025 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007] 00026 00027 #ifndef __OGRE_UTFSTRING_H__ 00028 #define __OGRE_UTFSTRING_H__ 00029 00030 00031 #include "OgrePrerequisites.h" 00032 00033 #if OGRE_UNICODE_SUPPORT 00034 00035 // these are explained later 00036 #include <iterator> 00037 #include <string> 00038 #include <stdexcept> 00039 00040 // Workaround for VC7: 00041 // when build with /MD or /MDd, VC7 have both std::basic_string<unsigned short> and 00042 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header 00043 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile 00044 // option). And since this file used both of them, causing compiler instantiating another 00045 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll. 00046 // 00047 #if OGRE_COMPILER == OGRE_COMPILER_MSVC && (1300 <= OGRE_COMP_VER && OGRE_COMP_VER <= 1310) 00048 00049 # if defined(_DLL_CPPLIB) 00050 00051 namespace std 00052 { 00053 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>, 00054 allocator<unsigned short> >; 00055 00056 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>, 00057 allocator<__wchar_t> >; 00058 } 00059 00060 # endif // defined(_DLL_CPPLIB) 00061 00062 #endif // OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_COMP_VER == 1300 00063 00064 00065 namespace Ogre { 00066 00067 /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS 00068 =NOTICE= 00069 This class is not a complete Unicode solution. It purposefully does not 00070 provide certain functionality, such as proper lexical sorting for 00071 Unicode values. It does provide comparison operators for the sole purpose 00072 of using UTFString as an index with std::map and other operator< sorted 00073 containers, but it should NOT be relied upon for meaningful lexical 00074 operations, such as alphabetical sorts. If you need this type of 00075 functionality, look into using ICU instead (http://icu.sourceforge.net/). 00076 00077 =REQUIREMENTS= 00078 There are a few requirements for proper operation. They are fairly small, 00079 and shouldn't restrict usage on any reasonable target. 00080 * Compiler must support unsigned 16-bit integer types 00081 * Compiler must support signed 32-bit integer types 00082 * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such 00083 using the WCHAR_UTF16 macro as outlined below. 00084 * You must include <iterator>, <string>, and <wchar>. Probably more, but 00085 these are the most obvious. 00086 00087 =REQUIRED PREPROCESSOR MACROS= 00088 This class requires two preprocessor macros to be defined in order to 00089 work as advertised. 00090 INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int) 00091 UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short) 00092 00093 Additionally, a third macro should be defined to control the evaluation of wchar_t: 00094 WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points, 00095 such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit 00096 integer representing UTF-32 code points. 00097 */ 00098 00099 // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS 00100 #ifdef __STDC_ISO_10646__ 00101 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger) 00102 // so we can safely skip the rest of the testing 00103 #else // #ifdef __STDC_ISO_10646__ 00104 #if defined( __WIN32__ ) || defined( _WIN32 ) 00105 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t 00106 #else // #if defined( __WIN32__ ) || defined( _WIN32 ) 00107 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h> 00108 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit 00109 #endif // #if WCHAR_MAX <= 0xFFFF 00110 #endif // #if defined( __WIN32__ ) || defined( _WIN32 ) 00111 #endif // #ifdef __STDC_ISO_10646__ 00112 00113 00114 // OGRE_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of 00115 // uint16 or uint32. 00116 #if OGRE_COMPILER == OGRE_COMPILER_MSVC 00117 00118 // Don't define wchar_t related functions since it'll duplicate 00119 // with UTFString::code_point related functions when compile 00120 // without /Zc:wchar_t, because in this case both of them are 00121 // a typedef of uint16. 00122 # if defined(_NATIVE_WCHAR_T_DEFINED) 00123 # define OGRE_IS_NATIVE_WCHAR_T 1 00124 # else 00125 # define OGRE_IS_NATIVE_WCHAR_T 0 00126 # endif 00127 00128 #else // OGRE_COMPILER != OGRE_COMPILER_MSVC 00129 00130 // Assumed wchar_t is natively for other compilers 00131 # define OGRE_IS_NATIVE_WCHAR_T 1 00132 00133 #endif // OGRE_COMPILER == OGRE_COMPILER_MSVC 00134 00136 00161 class UTFString { 00162 // constants used in UTF-8 conversions 00163 static const unsigned char _lead1 = 0xC0; //110xxxxx 00164 static const unsigned char _lead1_mask = 0x1F; //00011111 00165 static const unsigned char _lead2 = 0xE0; //1110xxxx 00166 static const unsigned char _lead2_mask = 0x0F; //00001111 00167 static const unsigned char _lead3 = 0xF0; //11110xxx 00168 static const unsigned char _lead3_mask = 0x07; //00000111 00169 static const unsigned char _lead4 = 0xF8; //111110xx 00170 static const unsigned char _lead4_mask = 0x03; //00000011 00171 static const unsigned char _lead5 = 0xFC; //1111110x 00172 static const unsigned char _lead5_mask = 0x01; //00000001 00173 static const unsigned char _cont = 0x80; //10xxxxxx 00174 static const unsigned char _cont_mask = 0x3F; //00111111 00175 00176 public: 00178 typedef size_t size_type; 00180 static const size_type npos = ~0; 00181 00183 typedef uint32 unicode_char; 00184 00186 typedef uint16 code_point; 00187 00189 typedef code_point value_type; 00190 00191 typedef std::basic_string<code_point> dstring; // data string 00192 00194 typedef std::basic_string<unicode_char> utf32string; 00195 00197 class invalid_data: public std::runtime_error { /* i don't know why the beautifier is freaking out on this line */ 00198 public: 00200 explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message ) { 00201 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */ 00202 } 00203 }; 00204 00205 //######################################################################### 00207 class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type> { /* i don't know why the beautifier is freaking out on this line */ 00208 friend class UTFString; 00209 protected: 00210 _base_iterator() { 00211 mString = 0; 00212 } 00213 00214 void _seekFwd( size_type c ) { 00215 mIter += c; 00216 } 00217 void _seekRev( size_type c ) { 00218 mIter -= c; 00219 } 00220 void _become( const _base_iterator& i ) { 00221 mIter = i.mIter; 00222 mString = i.mString; 00223 } 00224 bool _test_begin() const { 00225 return mIter == mString->mData.begin(); 00226 } 00227 bool _test_end() const { 00228 return mIter == mString->mData.end(); 00229 } 00230 size_type _get_index() const { 00231 return mIter - mString->mData.begin(); 00232 } 00233 void _jump_to( size_type index ) { 00234 mIter = mString->mData.begin() + index; 00235 } 00236 00237 unicode_char _getCharacter() const { 00238 size_type current_index = _get_index(); 00239 return mString->getChar( current_index ); 00240 } 00241 int _setCharacter( unicode_char uc ) { 00242 size_type current_index = _get_index(); 00243 int change = mString->setChar( current_index, uc ); 00244 _jump_to( current_index ); 00245 return change; 00246 } 00247 00248 void _moveNext() { 00249 _seekFwd( 1 ); // move 1 code point forward 00250 if ( _test_end() ) return; // exit if we hit the end 00251 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00252 // landing on a follow code point means we might be part of a bigger character 00253 // so we test for that 00254 code_point lead_half = 0; 00255 //NB: we can't possibly be at the beginning here, so no need to test 00256 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair 00257 if ( _utf16_surrogate_lead( lead_half ) ) { 00258 _seekFwd( 1 ); // if so, then advance 1 more code point 00259 } 00260 } 00261 } 00262 void _movePrev() { 00263 _seekRev( 1 ); // move 1 code point backwards 00264 if ( _test_begin() ) return; // exit if we hit the beginning 00265 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00266 // landing on a follow code point means we might be part of a bigger character 00267 // so we test for that 00268 code_point lead_half = 0; 00269 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair 00270 if ( _utf16_surrogate_lead( lead_half ) ) { 00271 _seekRev( 1 ); // if so, then rewind 1 more code point 00272 } 00273 } 00274 } 00275 00276 dstring::iterator mIter; 00277 UTFString* mString; 00278 }; 00279 00280 //######################################################################### 00281 // FORWARD ITERATORS 00282 //######################################################################### 00283 class _const_fwd_iterator; // forward declaration 00284 00286 class _fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 00287 friend class _const_fwd_iterator; 00288 public: 00289 _fwd_iterator() {} 00290 _fwd_iterator( const _fwd_iterator& i ) { 00291 _become( i ); 00292 } 00293 00295 _fwd_iterator& operator++() { 00296 _seekFwd( 1 ); 00297 return *this; 00298 } 00300 _fwd_iterator operator++( int ) { 00301 _fwd_iterator tmp( *this ); 00302 _seekFwd( 1 ); 00303 return tmp; 00304 } 00305 00307 _fwd_iterator& operator--() { 00308 _seekRev( 1 ); 00309 return *this; 00310 } 00312 _fwd_iterator operator--( int ) { 00313 _fwd_iterator tmp( *this ); 00314 _seekRev( 1 ); 00315 return tmp; 00316 } 00317 00319 _fwd_iterator operator+( size_type n ) { 00320 _fwd_iterator tmp( *this ); 00321 tmp._seekFwd( n ); 00322 return tmp; 00323 } 00325 _fwd_iterator operator+( difference_type n ) { 00326 _fwd_iterator tmp( *this ); 00327 if ( n < 0 ) 00328 tmp._seekRev( -n ); 00329 else 00330 tmp._seekFwd( n ); 00331 return tmp; 00332 } 00334 _fwd_iterator operator-( size_type n ) { 00335 _fwd_iterator tmp( *this ); 00336 tmp._seekRev( n ); 00337 return tmp; 00338 } 00340 _fwd_iterator operator-( difference_type n ) { 00341 _fwd_iterator tmp( *this ); 00342 if ( n < 0 ) 00343 tmp._seekFwd( -n ); 00344 else 00345 tmp._seekRev( n ); 00346 return tmp; 00347 } 00348 00350 _fwd_iterator& operator+=( size_type n ) { 00351 _seekFwd( n ); 00352 return *this; 00353 } 00355 _fwd_iterator& operator+=( difference_type n ) { 00356 if ( n < 0 ) 00357 _seekRev( -n ); 00358 else 00359 _seekFwd( n ); 00360 return *this; 00361 } 00363 _fwd_iterator& operator-=( size_type n ) { 00364 _seekRev( n ); 00365 return *this; 00366 } 00368 _fwd_iterator& operator-=( difference_type n ) { 00369 if ( n < 0 ) 00370 _seekFwd( -n ); 00371 else 00372 _seekRev( n ); 00373 return *this; 00374 } 00375 00377 value_type& operator*() const { 00378 return *mIter; 00379 } 00380 00382 value_type& operator[]( size_type n ) const { 00383 _fwd_iterator tmp( *this ); 00384 tmp += n; 00385 return *tmp; 00386 } 00388 value_type& operator[]( difference_type n ) const { 00389 _fwd_iterator tmp( *this ); 00390 tmp += n; 00391 return *tmp; 00392 } 00393 00395 _fwd_iterator& moveNext() { 00396 _moveNext(); 00397 return *this; 00398 } 00400 _fwd_iterator& movePrev() { 00401 _movePrev(); 00402 return *this; 00403 } 00405 unicode_char getCharacter() const { 00406 return _getCharacter(); 00407 } 00409 int setCharacter( unicode_char uc ) { 00410 return _setCharacter( uc ); 00411 } 00412 }; 00413 00414 00415 00416 //######################################################################### 00418 class _const_fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 00419 public: 00420 _const_fwd_iterator() {} 00421 _const_fwd_iterator( const _const_fwd_iterator& i ) { 00422 _become( i ); 00423 } 00424 _const_fwd_iterator( const _fwd_iterator& i ) { 00425 _become( i ); 00426 } 00427 00429 _const_fwd_iterator& operator++() { 00430 _seekFwd( 1 ); 00431 return *this; 00432 } 00434 _const_fwd_iterator operator++( int ) { 00435 _const_fwd_iterator tmp( *this ); 00436 _seekFwd( 1 ); 00437 return tmp; 00438 } 00439 00441 _const_fwd_iterator& operator--() { 00442 _seekRev( 1 ); 00443 return *this; 00444 } 00446 _const_fwd_iterator operator--( int ) { 00447 _const_fwd_iterator tmp( *this ); 00448 _seekRev( 1 ); 00449 return tmp; 00450 } 00451 00453 _const_fwd_iterator operator+( size_type n ) { 00454 _const_fwd_iterator tmp( *this ); 00455 tmp._seekFwd( n ); 00456 return tmp; 00457 } 00459 _const_fwd_iterator operator+( difference_type n ) { 00460 _const_fwd_iterator tmp( *this ); 00461 if ( n < 0 ) 00462 tmp._seekRev( -n ); 00463 else 00464 tmp._seekFwd( n ); 00465 return tmp; 00466 } 00468 _const_fwd_iterator operator-( size_type n ) { 00469 _const_fwd_iterator tmp( *this ); 00470 tmp._seekRev( n ); 00471 return tmp; 00472 } 00474 _const_fwd_iterator operator-( difference_type n ) { 00475 _const_fwd_iterator tmp( *this ); 00476 if ( n < 0 ) 00477 tmp._seekFwd( -n ); 00478 else 00479 tmp._seekRev( n ); 00480 return tmp; 00481 } 00482 00484 _const_fwd_iterator& operator+=( size_type n ) { 00485 _seekFwd( n ); 00486 return *this; 00487 } 00489 _const_fwd_iterator& operator+=( difference_type n ) { 00490 if ( n < 0 ) 00491 _seekRev( -n ); 00492 else 00493 _seekFwd( n ); 00494 return *this; 00495 } 00497 _const_fwd_iterator& operator-=( size_type n ) { 00498 _seekRev( n ); 00499 return *this; 00500 } 00502 _const_fwd_iterator& operator-=( difference_type n ) { 00503 if ( n < 0 ) 00504 _seekFwd( -n ); 00505 else 00506 _seekRev( n ); 00507 return *this; 00508 } 00509 00511 const value_type& operator*() const { 00512 return *mIter; 00513 } 00514 00516 const value_type& operator[]( size_type n ) const { 00517 _const_fwd_iterator tmp( *this ); 00518 tmp += n; 00519 return *tmp; 00520 } 00522 const value_type& operator[]( difference_type n ) const { 00523 _const_fwd_iterator tmp( *this ); 00524 tmp += n; 00525 return *tmp; 00526 } 00527 00529 _const_fwd_iterator& moveNext() { 00530 _moveNext(); 00531 return *this; 00532 } 00534 _const_fwd_iterator& movePrev() { 00535 _movePrev(); 00536 return *this; 00537 } 00539 unicode_char getCharacter() const { 00540 return _getCharacter(); 00541 } 00542 00544 friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00546 friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00548 friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00550 friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00552 friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00554 friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00556 friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right ); 00557 00558 }; 00559 00560 //######################################################################### 00561 // REVERSE ITERATORS 00562 //######################################################################### 00563 class _const_rev_iterator; // forward declaration 00565 class _rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 00566 friend class _const_rev_iterator; 00567 public: 00568 _rev_iterator() {} 00569 _rev_iterator( const _rev_iterator& i ) { 00570 _become( i ); 00571 } 00572 00574 _rev_iterator& operator++() { 00575 _seekRev( 1 ); 00576 return *this; 00577 } 00579 _rev_iterator operator++( int ) { 00580 _rev_iterator tmp( *this ); 00581 _seekRev( 1 ); 00582 return tmp; 00583 } 00584 00586 _rev_iterator& operator--() { 00587 _seekFwd( 1 ); 00588 return *this; 00589 } 00591 _rev_iterator operator--( int ) { 00592 _rev_iterator tmp( *this ); 00593 _seekFwd( 1 ); 00594 return tmp; 00595 } 00596 00598 _rev_iterator operator+( size_type n ) { 00599 _rev_iterator tmp( *this ); 00600 tmp._seekRev( n ); 00601 return tmp; 00602 } 00604 _rev_iterator operator+( difference_type n ) { 00605 _rev_iterator tmp( *this ); 00606 if ( n < 0 ) 00607 tmp._seekFwd( -n ); 00608 else 00609 tmp._seekRev( n ); 00610 return tmp; 00611 } 00613 _rev_iterator operator-( size_type n ) { 00614 _rev_iterator tmp( *this ); 00615 tmp._seekFwd( n ); 00616 return tmp; 00617 } 00619 _rev_iterator operator-( difference_type n ) { 00620 _rev_iterator tmp( *this ); 00621 if ( n < 0 ) 00622 tmp._seekRev( -n ); 00623 else 00624 tmp._seekFwd( n ); 00625 return tmp; 00626 } 00627 00629 _rev_iterator& operator+=( size_type n ) { 00630 _seekRev( n ); 00631 return *this; 00632 } 00634 _rev_iterator& operator+=( difference_type n ) { 00635 if ( n < 0 ) 00636 _seekFwd( -n ); 00637 else 00638 _seekRev( n ); 00639 return *this; 00640 } 00642 _rev_iterator& operator-=( size_type n ) { 00643 _seekFwd( n ); 00644 return *this; 00645 } 00647 _rev_iterator& operator-=( difference_type n ) { 00648 if ( n < 0 ) 00649 _seekRev( -n ); 00650 else 00651 _seekFwd( n ); 00652 return *this; 00653 } 00654 00656 value_type& operator*() const { 00657 return mIter[-1]; 00658 } 00659 00661 value_type& operator[]( size_type n ) const { 00662 _rev_iterator tmp( *this ); 00663 tmp -= n; 00664 return *tmp; 00665 } 00667 value_type& operator[]( difference_type n ) const { 00668 _rev_iterator tmp( *this ); 00669 tmp -= n; 00670 return *tmp; 00671 } 00672 }; 00673 //######################################################################### 00675 class _const_rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */ 00676 public: 00677 _const_rev_iterator() {} 00678 _const_rev_iterator( const _const_rev_iterator& i ) { 00679 _become( i ); 00680 } 00681 _const_rev_iterator( const _rev_iterator& i ) { 00682 _become( i ); 00683 } 00685 _const_rev_iterator& operator++() { 00686 _seekRev( 1 ); 00687 return *this; 00688 } 00690 _const_rev_iterator operator++( int ) { 00691 _const_rev_iterator tmp( *this ); 00692 _seekRev( 1 ); 00693 return tmp; 00694 } 00695 00697 _const_rev_iterator& operator--() { 00698 _seekFwd( 1 ); 00699 return *this; 00700 } 00702 _const_rev_iterator operator--( int ) { 00703 _const_rev_iterator tmp( *this ); 00704 _seekFwd( 1 ); 00705 return tmp; 00706 } 00707 00709 _const_rev_iterator operator+( size_type n ) { 00710 _const_rev_iterator tmp( *this ); 00711 tmp._seekRev( n ); 00712 return tmp; 00713 } 00715 _const_rev_iterator operator+( difference_type n ) { 00716 _const_rev_iterator tmp( *this ); 00717 if ( n < 0 ) 00718 tmp._seekFwd( -n ); 00719 else 00720 tmp._seekRev( n ); 00721 return tmp; 00722 } 00724 _const_rev_iterator operator-( size_type n ) { 00725 _const_rev_iterator tmp( *this ); 00726 tmp._seekFwd( n ); 00727 return tmp; 00728 } 00730 _const_rev_iterator operator-( difference_type n ) { 00731 _const_rev_iterator tmp( *this ); 00732 if ( n < 0 ) 00733 tmp._seekRev( -n ); 00734 else 00735 tmp._seekFwd( n ); 00736 return tmp; 00737 } 00738 00740 _const_rev_iterator& operator+=( size_type n ) { 00741 _seekRev( n ); 00742 return *this; 00743 } 00745 _const_rev_iterator& operator+=( difference_type n ) { 00746 if ( n < 0 ) 00747 _seekFwd( -n ); 00748 else 00749 _seekRev( n ); 00750 return *this; 00751 } 00753 _const_rev_iterator& operator-=( size_type n ) { 00754 _seekFwd( n ); 00755 return *this; 00756 } 00758 _const_rev_iterator& operator-=( difference_type n ) { 00759 if ( n < 0 ) 00760 _seekRev( -n ); 00761 else 00762 _seekFwd( n ); 00763 return *this; 00764 } 00765 00767 const value_type& operator*() const { 00768 return mIter[-1]; 00769 } 00770 00772 const value_type& operator[]( size_type n ) const { 00773 _const_rev_iterator tmp( *this ); 00774 tmp -= n; 00775 return *tmp; 00776 } 00778 const value_type& operator[]( difference_type n ) const { 00779 _const_rev_iterator tmp( *this ); 00780 tmp -= n; 00781 return *tmp; 00782 } 00783 00785 friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00787 friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00789 friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00791 friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00793 friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00795 friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00797 friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right ); 00798 }; 00799 //######################################################################### 00800 00801 typedef _fwd_iterator iterator; 00802 typedef _rev_iterator reverse_iterator; 00803 typedef _const_fwd_iterator const_iterator; 00804 typedef _const_rev_iterator const_reverse_iterator; 00805 00806 00808 00809 00810 UTFString() { 00811 _init(); 00812 } 00814 UTFString( const UTFString& copy ) { 00815 _init(); 00816 mData = copy.mData; 00817 } 00819 UTFString( size_type length, const code_point& ch ) { 00820 _init(); 00821 assign( length, ch ); 00822 } 00824 UTFString( const code_point* str ) { 00825 _init(); 00826 assign( str ); 00827 } 00829 UTFString( const code_point* str, size_type length ) { 00830 _init(); 00831 assign( str, length ); 00832 } 00834 UTFString( const UTFString& str, size_type index, size_type length ) { 00835 _init(); 00836 assign( str, index, length ); 00837 } 00838 #if OGRE_IS_NATIVE_WCHAR_T 00839 00840 UTFString( const wchar_t* w_str ) { 00841 _init(); 00842 assign( w_str ); 00843 } 00845 UTFString( const wchar_t* w_str, size_type length ) { 00846 _init(); 00847 assign( w_str, length ); 00848 } 00849 #endif 00850 00851 UTFString( const std::wstring& wstr ) { 00852 _init(); 00853 assign( wstr ); 00854 } 00856 UTFString( const char* c_str ) { 00857 _init(); 00858 assign( c_str ); 00859 } 00861 UTFString( const char* c_str, size_type length ) { 00862 _init(); 00863 assign( c_str, length ); 00864 } 00866 UTFString( const std::string& str ) { 00867 _init(); 00868 assign( str ); 00869 } 00871 ~UTFString() { 00872 _cleanBuffer(); 00873 } 00875 00877 00879 00880 00881 size_type size() const { 00882 return mData.size(); 00883 } 00885 size_type length() const { 00886 return size(); 00887 } 00889 00890 size_type length_Characters() const { 00891 const_iterator i = begin(), ie = end(); 00892 size_type c = 0; 00893 while ( i != ie ) { 00894 i.moveNext(); 00895 ++c; 00896 } 00897 return c; 00898 } 00900 size_type max_size() const { 00901 return mData.max_size(); 00902 } 00904 void reserve( size_type size ) { 00905 mData.reserve( size ); 00906 } 00908 void resize( size_type num, const code_point& val = 0 ) { 00909 mData.resize( num, val ); 00910 } 00912 void swap( UTFString& from ) { 00913 mData.swap( from.mData ); 00914 } 00916 bool empty() const { 00917 return mData.empty(); 00918 } 00920 const code_point* c_str() const { 00921 return mData.c_str(); 00922 } 00924 const code_point* data() const { 00925 return c_str(); 00926 } 00928 size_type capacity() const { 00929 return mData.capacity(); 00930 } 00932 void clear() { 00933 mData.clear(); 00934 } 00936 00937 UTFString substr( size_type index, size_type num = npos ) const { 00938 // this could avoid the extra copy if we used a private specialty constructor 00939 dstring data = mData.substr( index, num ); 00940 UTFString tmp; 00941 tmp.mData.swap( data ); 00942 return tmp; 00943 } 00945 void push_back( unicode_char val ) { 00946 code_point cp[2]; 00947 size_t c = _utf32_to_utf16( val, cp ); 00948 if ( c > 0 ) push_back( cp[0] ); 00949 if ( c > 1 ) push_back( cp[1] ); 00950 } 00951 #if OGRE_IS_NATIVE_WCHAR_T 00952 00953 void push_back( wchar_t val ) { 00954 // we do this because the Unicode method still preserves UTF-16 code points 00955 mData.push_back( static_cast<unicode_char>( val ) ); 00956 } 00957 #endif 00958 00959 00961 void push_back( code_point val ) { 00962 mData.push_back( val ); 00963 } 00965 00966 void push_back( char val ) { 00967 mData.push_back( static_cast<code_point>( val ) ); 00968 } 00970 bool inString( unicode_char ch ) const { 00971 const_iterator i, ie = end(); 00972 for ( i = begin(); i != ie; i.moveNext() ) { 00973 if ( i.getCharacter() == ch ) 00974 return true; 00975 } 00976 return false; 00977 } 00979 00981 00983 00984 00985 const std::string& asUTF8() const { 00986 _load_buffer_UTF8(); 00987 return *m_buffer.mStrBuffer; 00988 } 00990 const char* asUTF8_c_str() const { 00991 _load_buffer_UTF8(); 00992 return m_buffer.mStrBuffer->c_str(); 00993 } 00995 const utf32string& asUTF32() const { 00996 _load_buffer_UTF32(); 00997 return *m_buffer.mUTF32StrBuffer; 00998 } 01000 const unicode_char* asUTF32_c_str() const { 01001 _load_buffer_UTF32(); 01002 return m_buffer.mUTF32StrBuffer->c_str(); 01003 } 01005 const std::wstring& asWStr() const { 01006 _load_buffer_WStr(); 01007 return *m_buffer.mWStrBuffer; 01008 } 01010 const wchar_t* asWStr_c_str() const { 01011 _load_buffer_WStr(); 01012 return m_buffer.mWStrBuffer->c_str(); 01013 } 01015 01017 01019 01020 01021 code_point& at( size_type loc ) { 01022 return mData.at( loc ); 01023 } 01025 const code_point& at( size_type loc ) const { 01026 return mData.at( loc ); 01027 } 01029 01033 unicode_char getChar( size_type loc ) const { 01034 const code_point* ptr = c_str(); 01035 unicode_char uc; 01036 size_t l = _utf16_char_length( ptr[loc] ); 01037 code_point cp[2] = { /* blame the code beautifier */ 01038 0, 0 01039 }; 01040 cp[0] = ptr[loc]; 01041 01042 if ( l == 2 && ( loc + 1 ) < mData.length() ) { 01043 cp[1] = ptr[loc+1]; 01044 } 01045 _utf16_to_utf32( cp, uc ); 01046 return uc; 01047 } 01049 01057 int setChar( size_type loc, unicode_char ch ) { 01058 code_point cp[2] = { /* blame the code beautifier */ 01059 0, 0 01060 }; 01061 size_t l = _utf32_to_utf16( ch, cp ); 01062 unicode_char existingChar = getChar( loc ); 01063 size_t existingSize = _utf16_char_length( existingChar ); 01064 size_t newSize = _utf16_char_length( ch ); 01065 01066 if ( newSize > existingSize ) { 01067 at( loc ) = cp[0]; 01068 insert( loc + 1, 1, cp[1] ); 01069 return 1; 01070 } 01071 if ( newSize < existingSize ) { 01072 erase( loc, 1 ); 01073 at( loc ) = cp[0]; 01074 return -1; 01075 } 01076 01077 // newSize == existingSize 01078 at( loc ) = cp[0]; 01079 if ( l == 2 ) at( loc + 1 ) = cp[1]; 01080 return 0; 01081 } 01083 01085 01087 01088 01089 iterator begin() { 01090 iterator i; 01091 i.mIter = mData.begin(); 01092 i.mString = this; 01093 return i; 01094 } 01096 const_iterator begin() const { 01097 const_iterator i; 01098 i.mIter = const_cast<UTFString*>( this )->mData.begin(); 01099 i.mString = const_cast<UTFString*>( this ); 01100 return i; 01101 } 01103 iterator end() { 01104 iterator i; 01105 i.mIter = mData.end(); 01106 i.mString = this; 01107 return i; 01108 } 01110 const_iterator end() const { 01111 const_iterator i; 01112 i.mIter = const_cast<UTFString*>( this )->mData.end(); 01113 i.mString = const_cast<UTFString*>( this ); 01114 return i; 01115 } 01117 reverse_iterator rbegin() { 01118 reverse_iterator i; 01119 i.mIter = mData.end(); 01120 i.mString = this; 01121 return i; 01122 } 01124 const_reverse_iterator rbegin() const { 01125 const_reverse_iterator i; 01126 i.mIter = const_cast<UTFString*>( this )->mData.end(); 01127 i.mString = const_cast<UTFString*>( this ); 01128 return i; 01129 } 01131 reverse_iterator rend() { 01132 reverse_iterator i; 01133 i.mIter = mData.begin(); 01134 i.mString = this; 01135 return i; 01136 } 01138 const_reverse_iterator rend() const { 01139 const_reverse_iterator i; 01140 i.mIter = const_cast<UTFString*>( this )->mData.begin(); 01141 i.mString = const_cast<UTFString*>( this ); 01142 return i; 01143 } 01145 01147 01149 01150 01151 UTFString& assign( iterator start, iterator end ) { 01152 mData.assign( start.mIter, end.mIter ); 01153 return *this; 01154 } 01156 UTFString& assign( const UTFString& str ) { 01157 mData.assign( str.mData ); 01158 return *this; 01159 } 01161 UTFString& assign( const code_point* str ) { 01162 mData.assign( str ); 01163 return *this; 01164 } 01166 UTFString& assign( const code_point* str, size_type num ) { 01167 mData.assign( str, num ); 01168 return *this; 01169 } 01171 UTFString& assign( const UTFString& str, size_type index, size_type len ) { 01172 mData.assign( str.mData, index, len ); 01173 return *this; 01174 } 01176 UTFString& assign( size_type num, const code_point& ch ) { 01177 mData.assign( num, ch ); 01178 return *this; 01179 } 01181 UTFString& assign( const std::wstring& wstr ) { 01182 mData.clear(); 01183 mData.reserve( wstr.length() ); // best guess bulk allocate 01184 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy 01185 code_point tmp; 01186 std::wstring::const_iterator i, ie = wstr.end(); 01187 for ( i = wstr.begin(); i != ie; i++ ) { 01188 tmp = static_cast<code_point>( *i ); 01189 mData.push_back( tmp ); 01190 } 01191 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower) 01192 code_point cp[3] = {0, 0, 0}; 01193 unicode_char tmp; 01194 std::wstring::const_iterator i, ie = wstr.end(); 01195 for ( i = wstr.begin(); i != ie; i++ ) { 01196 tmp = static_cast<unicode_char>( *i ); 01197 size_t l = _utf32_to_utf16( tmp, cp ); 01198 if ( l > 0 ) mData.push_back( cp[0] ); 01199 if ( l > 1 ) mData.push_back( cp[1] ); 01200 } 01201 #endif 01202 return *this; 01203 } 01204 #if OGRE_IS_NATIVE_WCHAR_T 01205 01206 UTFString& assign( const wchar_t* w_str ) { 01207 std::wstring tmp; 01208 tmp.assign( w_str ); 01209 return assign( tmp ); 01210 } 01212 UTFString& assign( const wchar_t* w_str, size_type num ) { 01213 std::wstring tmp; 01214 tmp.assign( w_str, num ); 01215 return assign( tmp ); 01216 } 01217 #endif 01218 01219 UTFString& assign( const std::string& str ) { 01220 size_type len = _verifyUTF8( str ); 01221 clear(); // empty our contents, if there are any 01222 reserve( len ); // best guess bulk capacity growth 01223 01224 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32, 01225 // then converting it to UTF-16, then finally appending the data buffer 01226 01227 unicode_char uc; // temporary Unicode character buffer 01228 unsigned char utf8buf[7]; // temporary UTF-8 buffer 01229 utf8buf[6] = 0; 01230 size_t utf8len; // UTF-8 length 01231 code_point utf16buff[3]; // temporary UTF-16 buffer 01232 utf16buff[2] = 0; 01233 size_t utf16len; // UTF-16 length 01234 01235 std::string::const_iterator i, ie = str.end(); 01236 for ( i = str.begin(); i != ie; i++ ) { 01237 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load 01238 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes 01239 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful) 01240 } 01241 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer 01242 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion 01243 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop 01244 01245 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion 01246 append( utf16buff, utf16len ); // append the characters to the string 01247 } 01248 return *this; 01249 } 01251 UTFString& assign( const char* c_str ) { 01252 std::string tmp( c_str ); 01253 return assign( tmp ); 01254 } 01256 UTFString& assign( const char* c_str, size_type num ) { 01257 std::string tmp; 01258 tmp.assign( c_str, num ); 01259 return assign( tmp ); 01260 } 01262 01264 01266 01267 01268 UTFString& append( const UTFString& str ) { 01269 mData.append( str.mData ); 01270 return *this; 01271 } 01273 UTFString& append( const code_point* str ) { 01274 mData.append( str ); 01275 return *this; 01276 } 01278 UTFString& append( const UTFString& str, size_type index, size_type len ) { 01279 mData.append( str.mData, index, len ); 01280 return *this; 01281 } 01283 UTFString& append( const code_point* str, size_type num ) { 01284 mData.append( str, num ); 01285 return *this; 01286 } 01288 UTFString& append( size_type num, code_point ch ) { 01289 mData.append( num, ch ); 01290 return *this; 01291 } 01293 UTFString& append( iterator start, iterator end ) { 01294 mData.append( start.mIter, end.mIter ); 01295 return *this; 01296 } 01297 #if OGRE_IS_NATIVE_WCHAR_T 01298 01299 UTFString& append( const wchar_t* w_str, size_type num ) { 01300 std::wstring tmp( w_str, num ); 01301 return append( tmp ); 01302 } 01304 UTFString& append( size_type num, wchar_t ch ) { 01305 return append( num, static_cast<unicode_char>( ch ) ); 01306 } 01307 #endif 01308 01309 UTFString& append( const char* c_str, size_type num ) { 01310 UTFString tmp( c_str, num ); 01311 append( tmp ); 01312 return *this; 01313 } 01315 UTFString& append( size_type num, char ch ) { 01316 append( num, static_cast<code_point>( ch ) ); 01317 return *this; 01318 } 01320 UTFString& append( size_type num, unicode_char ch ) { 01321 code_point cp[2] = {0, 0}; 01322 if ( _utf32_to_utf16( ch, cp ) == 2 ) { 01323 for ( size_type i = 0; i < num; i++ ) { 01324 append( 1, cp[0] ); 01325 append( 1, cp[1] ); 01326 } 01327 } else { 01328 for ( size_type i = 0; i < num; i++ ) { 01329 append( 1, cp[0] ); 01330 } 01331 } 01332 return *this; 01333 } 01335 01337 01339 01340 01341 iterator insert( iterator i, const code_point& ch ) { 01342 iterator ret; 01343 ret.mIter = mData.insert( i.mIter, ch ); 01344 ret.mString = this; 01345 return ret; 01346 } 01348 UTFString& insert( size_type index, const UTFString& str ) { 01349 mData.insert( index, str.mData ); 01350 return *this; 01351 } 01353 UTFString& insert( size_type index, const code_point* str ) { 01354 mData.insert( index, str ); 01355 return *this; 01356 } 01358 UTFString& insert( size_type index1, const UTFString& str, size_type index2, size_type num ) { 01359 mData.insert( index1, str.mData, index2, num ); 01360 return *this; 01361 } 01363 void insert( iterator i, iterator start, iterator end ) { 01364 mData.insert( i.mIter, start.mIter, end.mIter ); 01365 } 01367 UTFString& insert( size_type index, const code_point* str, size_type num ) { 01368 mData.insert( index, str, num ); 01369 return *this; 01370 } 01371 #if OGRE_IS_NATIVE_WCHAR_T 01372 01373 UTFString& insert( size_type index, const wchar_t* w_str, size_type num ) { 01374 UTFString tmp( w_str, num ); 01375 insert( index, tmp ); 01376 return *this; 01377 } 01378 #endif 01379 01380 UTFString& insert( size_type index, const char* c_str, size_type num ) { 01381 UTFString tmp( c_str, num ); 01382 insert( index, tmp ); 01383 return *this; 01384 } 01386 UTFString& insert( size_type index, size_type num, code_point ch ) { 01387 mData.insert( index, num, ch ); 01388 return *this; 01389 } 01390 #if OGRE_IS_NATIVE_WCHAR_T 01391 01392 UTFString& insert( size_type index, size_type num, wchar_t ch ) { 01393 insert( index, num, static_cast<unicode_char>( ch ) ); 01394 return *this; 01395 } 01396 #endif 01397 01398 UTFString& insert( size_type index, size_type num, char ch ) { 01399 insert( index, num, static_cast<code_point>( ch ) ); 01400 return *this; 01401 } 01403 UTFString& insert( size_type index, size_type num, unicode_char ch ) { 01404 code_point cp[3] = {0, 0, 0}; 01405 size_t l = _utf32_to_utf16( ch, cp ); 01406 if ( l == 1 ) { 01407 return insert( index, num, cp[0] ); 01408 } 01409 for ( size_type c = 0; c < num; c++ ) { 01410 // insert in reverse order to preserve ordering after insert 01411 insert( index, 1, cp[1] ); 01412 insert( index, 1, cp[0] ); 01413 } 01414 return *this; 01415 } 01417 void insert( iterator i, size_type num, const code_point& ch ) { 01418 mData.insert( i.mIter, num, ch ); 01419 } 01420 #if OGRE_IS_NATIVE_WCHAR_T 01421 01422 void insert( iterator i, size_type num, const wchar_t& ch ) { 01423 insert( i, num, static_cast<unicode_char>( ch ) ); 01424 } 01425 #endif 01426 01427 void insert( iterator i, size_type num, const char& ch ) { 01428 insert( i, num, static_cast<code_point>( ch ) ); 01429 } 01431 void insert( iterator i, size_type num, const unicode_char& ch ) { 01432 code_point cp[3] = {0, 0, 0}; 01433 size_t l = _utf32_to_utf16( ch, cp ); 01434 if ( l == 1 ) { 01435 insert( i, num, cp[0] ); 01436 } else { 01437 for ( size_type c = 0; c < num; c++ ) { 01438 // insert in reverse order to preserve ordering after insert 01439 insert( i, 1, cp[1] ); 01440 insert( i, 1, cp[0] ); 01441 } 01442 } 01443 } 01445 01447 01449 01450 01451 iterator erase( iterator loc ) { 01452 iterator ret; 01453 ret.mIter = mData.erase( loc.mIter ); 01454 ret.mString = this; 01455 return ret; 01456 } 01458 iterator erase( iterator start, iterator end ) { 01459 iterator ret; 01460 ret.mIter = mData.erase( start.mIter, end.mIter ); 01461 ret.mString = this; 01462 return ret; 01463 } 01465 UTFString& erase( size_type index = 0, size_type num = npos ) { 01466 if ( num == npos ) 01467 mData.erase( index ); 01468 else 01469 mData.erase( index, num ); 01470 return *this; 01471 } 01473 01475 01477 01478 01479 UTFString& replace( size_type index1, size_type num1, const UTFString& str ) { 01480 mData.replace( index1, num1, str.mData, 0, npos ); 01481 return *this; 01482 } 01484 UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type num2 ) { 01485 mData.replace( index1, num1, str.mData, 0, num2 ); 01486 return *this; 01487 } 01489 UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type index2, size_type num2 ) { 01490 mData.replace( index1, num1, str.mData, index2, num2 ); 01491 return *this; 01492 } 01494 UTFString& replace( iterator start, iterator end, const UTFString& str, size_type num = npos ) { 01495 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01496 01497 size_type index1 = begin() - st; 01498 size_type num1 = end - st; 01499 return replace( index1, num1, str, 0, num ); 01500 } 01502 UTFString& replace( size_type index, size_type num1, size_type num2, code_point ch ) { 01503 mData.replace( index, num1, num2, ch ); 01504 return *this; 01505 } 01507 UTFString& replace( iterator start, iterator end, size_type num, code_point ch ) { 01508 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01509 01510 size_type index1 = begin() - st; 01511 size_type num1 = end - st; 01512 return replace( index1, num1, num, ch ); 01513 } 01515 01517 01519 01520 01521 int compare( const UTFString& str ) const { 01522 return mData.compare( str.mData ); 01523 } 01525 int compare( const code_point* str ) const { 01526 return mData.compare( str ); 01527 } 01529 int compare( size_type index, size_type length, const UTFString& str ) const { 01530 return mData.compare( index, length, str.mData ); 01531 } 01533 int compare( size_type index, size_type length, const UTFString& str, size_type index2, size_type length2 ) const { 01534 return mData.compare( index, length, str.mData, index2, length2 ); 01535 } 01537 int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const { 01538 return mData.compare( index, length, str, length2 ); 01539 } 01540 #if OGRE_IS_NATIVE_WCHAR_T 01541 01542 int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const { 01543 UTFString tmp( w_str, length2 ); 01544 return compare( index, length, tmp ); 01545 } 01546 #endif 01547 01548 int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const { 01549 UTFString tmp( c_str, length2 ); 01550 return compare( index, length, tmp ); 01551 } 01553 01555 01557 01558 01559 01560 size_type find( const UTFString& str, size_type index = 0 ) const { 01561 return mData.find( str.c_str(), index ); 01562 } 01564 01565 size_type find( const code_point* cp_str, size_type index, size_type length ) const { 01566 UTFString tmp( cp_str ); 01567 return mData.find( tmp.c_str(), index, length ); 01568 } 01570 01571 size_type find( const char* c_str, size_type index, size_type length ) const { 01572 UTFString tmp( c_str ); 01573 return mData.find( tmp.c_str(), index, length ); 01574 } 01575 #if OGRE_IS_NATIVE_WCHAR_T 01576 01577 01578 size_type find( const wchar_t* w_str, size_type index, size_type length ) const { 01579 UTFString tmp( w_str ); 01580 return mData.find( tmp.c_str(), index, length ); 01581 } 01582 #endif 01583 01584 01585 size_type find( char ch, size_type index = 0 ) const { 01586 return find( static_cast<code_point>( ch ), index ); 01587 } 01589 01590 size_type find( code_point ch, size_type index = 0 ) const { 01591 return mData.find( ch, index ); 01592 } 01593 #if OGRE_IS_NATIVE_WCHAR_T 01594 01595 01596 size_type find( wchar_t ch, size_type index = 0 ) const { 01597 return find( static_cast<unicode_char>( ch ), index ); 01598 } 01599 #endif 01600 01601 01602 size_type find( unicode_char ch, size_type index = 0 ) const { 01603 code_point cp[3] = {0, 0, 0}; 01604 size_t l = _utf32_to_utf16( ch, cp ); 01605 return find( UTFString( cp, l ), index ); 01606 } 01607 01609 size_type rfind( const UTFString& str, size_type index = 0 ) const { 01610 return mData.rfind( str.c_str(), index ); 01611 } 01613 size_type rfind( const code_point* cp_str, size_type index, size_type num ) const { 01614 UTFString tmp( cp_str ); 01615 return mData.rfind( tmp.c_str(), index, num ); 01616 } 01618 size_type rfind( const char* c_str, size_type index, size_type num ) const { 01619 UTFString tmp( c_str ); 01620 return mData.rfind( tmp.c_str(), index, num ); 01621 } 01622 #if OGRE_IS_NATIVE_WCHAR_T 01623 01624 size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const { 01625 UTFString tmp( w_str ); 01626 return mData.rfind( tmp.c_str(), index, num ); 01627 } 01628 #endif 01629 01630 size_type rfind( char ch, size_type index = 0 ) const { 01631 return rfind( static_cast<code_point>( ch ), index ); 01632 } 01634 size_type rfind( code_point ch, size_type index ) const { 01635 return mData.rfind( ch, index ); 01636 } 01637 #if OGRE_IS_NATIVE_WCHAR_T 01638 01639 size_type rfind( wchar_t ch, size_type index = 0 ) const { 01640 return rfind( static_cast<unicode_char>( ch ), index ); 01641 } 01642 #endif 01643 01644 size_type rfind( unicode_char ch, size_type index = 0 ) const { 01645 code_point cp[3] = {0, 0, 0}; 01646 size_t l = _utf32_to_utf16( ch, cp ); 01647 return rfind( UTFString( cp, l ), index ); 01648 } 01650 01652 01654 01655 01656 size_type find_first_of( const UTFString &str, size_type index = 0, size_type num = npos ) const { 01657 size_type i = 0; 01658 const size_type len = length(); 01659 while ( i < num && ( index + i ) < len ) { 01660 unicode_char ch = getChar( index + i ); 01661 if ( str.inString( ch ) ) 01662 return index + i; 01663 i += _utf16_char_length( ch ); // increment by the Unicode character length 01664 } 01665 return npos; 01666 } 01668 size_type find_first_of( code_point ch, size_type index = 0 ) const { 01669 UTFString tmp; 01670 tmp.assign( 1, ch ); 01671 return find_first_of( tmp, index ); 01672 } 01674 size_type find_first_of( char ch, size_type index = 0 ) const { 01675 return find_first_of( static_cast<code_point>( ch ), index ); 01676 } 01677 #if OGRE_IS_NATIVE_WCHAR_T 01678 01679 size_type find_first_of( wchar_t ch, size_type index = 0 ) const { 01680 return find_first_of( static_cast<unicode_char>( ch ), index ); 01681 } 01682 #endif 01683 01684 size_type find_first_of( unicode_char ch, size_type index = 0 ) const { 01685 code_point cp[3] = {0, 0, 0}; 01686 size_t l = _utf32_to_utf16( ch, cp ); 01687 return find_first_of( UTFString( cp, l ), index ); 01688 } 01689 01691 size_type find_first_not_of( const UTFString& str, size_type index = 0, size_type num = npos ) const { 01692 size_type i = 0; 01693 const size_type len = length(); 01694 while ( i < num && ( index + i ) < len ) { 01695 unicode_char ch = getChar( index + i ); 01696 if ( !str.inString( ch ) ) 01697 return index + i; 01698 i += _utf16_char_length( ch ); // increment by the Unicode character length 01699 } 01700 return npos; 01701 } 01703 size_type find_first_not_of( code_point ch, size_type index = 0 ) const { 01704 UTFString tmp; 01705 tmp.assign( 1, ch ); 01706 return find_first_not_of( tmp, index ); 01707 } 01709 size_type find_first_not_of( char ch, size_type index = 0 ) const { 01710 return find_first_not_of( static_cast<code_point>( ch ), index ); 01711 } 01712 #if OGRE_IS_NATIVE_WCHAR_T 01713 01714 size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const { 01715 return find_first_not_of( static_cast<unicode_char>( ch ), index ); 01716 } 01717 #endif 01718 01719 size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const { 01720 code_point cp[3] = {0, 0, 0}; 01721 size_t l = _utf32_to_utf16( ch, cp ); 01722 return find_first_not_of( UTFString( cp, l ), index ); 01723 } 01724 01726 size_type find_last_of( const UTFString& str, size_type index = npos, size_type num = npos ) const { 01727 size_type i = 0; 01728 const size_type len = length(); 01729 if ( index > len ) index = len - 1; 01730 01731 while ( i < num && ( index - i ) != npos ) { 01732 size_type j = index - i; 01733 // careful to step full Unicode characters 01734 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01735 j = index - ++i; 01736 } 01737 // and back to the usual dull test 01738 unicode_char ch = getChar( j ); 01739 if ( str.inString( ch ) ) 01740 return j; 01741 i++; 01742 } 01743 return npos; 01744 } 01746 size_type find_last_of( code_point ch, size_type index = npos ) const { 01747 UTFString tmp; 01748 tmp.assign( 1, ch ); 01749 return find_last_of( tmp, index ); 01750 } 01752 size_type find_last_of( char ch, size_type index = npos ) const { 01753 return find_last_of( static_cast<code_point>( ch ), index ); 01754 } 01755 #if OGRE_IS_NATIVE_WCHAR_T 01756 01757 size_type find_last_of( wchar_t ch, size_type index = npos ) const { 01758 return find_last_of( static_cast<unicode_char>( ch ), index ); 01759 } 01760 #endif 01761 01762 size_type find_last_of( unicode_char ch, size_type index = npos ) const { 01763 code_point cp[3] = {0, 0, 0}; 01764 size_t l = _utf32_to_utf16( ch, cp ); 01765 return find_last_of( UTFString( cp, l ), index ); 01766 } 01767 01769 size_type find_last_not_of( const UTFString& str, size_type index = npos, size_type num = npos ) const { 01770 size_type i = 0; 01771 const size_type len = length(); 01772 if ( index > len ) index = len - 1; 01773 01774 while ( i < num && ( index - i ) != npos ) { 01775 size_type j = index - i; 01776 // careful to step full Unicode characters 01777 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01778 j = index - ++i; 01779 } 01780 // and back to the usual dull test 01781 unicode_char ch = getChar( j ); 01782 if ( !str.inString( ch ) ) 01783 return j; 01784 i++; 01785 } 01786 return npos; 01787 } 01789 size_type find_last_not_of( code_point ch, size_type index = npos ) const { 01790 UTFString tmp; 01791 tmp.assign( 1, ch ); 01792 return find_last_not_of( tmp, index ); 01793 } 01795 size_type find_last_not_of( char ch, size_type index = npos ) const { 01796 return find_last_not_of( static_cast<code_point>( ch ), index ); 01797 } 01798 #if OGRE_IS_NATIVE_WCHAR_T 01799 01800 size_type find_last_not_of( wchar_t ch, size_type index = npos ) const { 01801 return find_last_not_of( static_cast<unicode_char>( ch ), index ); 01802 } 01803 #endif 01804 01805 size_type find_last_not_of( unicode_char ch, size_type index = npos ) const { 01806 code_point cp[3] = {0, 0, 0}; 01807 size_t l = _utf32_to_utf16( ch, cp ); 01808 return find_last_not_of( UTFString( cp, l ), index ); 01809 } 01811 01813 01815 01816 01817 bool operator<( const UTFString& right ) const { 01818 return compare( right ) < 0; 01819 } 01821 bool operator<=( const UTFString& right ) const { 01822 return compare( right ) <= 0; 01823 } 01825 bool operator>( const UTFString& right ) const { 01826 return compare( right ) > 0; 01827 } 01829 bool operator>=( const UTFString& right ) const { 01830 return compare( right ) >= 0; 01831 } 01833 bool operator==( const UTFString& right ) const { 01834 return compare( right ) == 0; 01835 } 01837 bool operator!=( const UTFString& right ) const { 01838 return !operator==( right ); 01839 } 01841 UTFString& operator=( const UTFString& s ) { 01842 return assign( s ); 01843 } 01845 UTFString& operator=( code_point ch ) { 01846 clear(); 01847 return append( 1, ch ); 01848 } 01850 UTFString& operator=( char ch ) { 01851 clear(); 01852 return append( 1, ch ); 01853 } 01854 #if OGRE_IS_NATIVE_WCHAR_T 01855 01856 UTFString& operator=( wchar_t ch ) { 01857 clear(); 01858 return append( 1, ch ); 01859 } 01860 #endif 01861 01862 UTFString& operator=( unicode_char ch ) { 01863 clear(); 01864 return append( 1, ch ); 01865 } 01867 code_point& operator[]( size_type index ) { 01868 return at( index ); 01869 } 01871 const code_point& operator[]( size_type index ) const { 01872 return at( index ); 01873 } 01875 01877 01879 01880 01881 operator std::string() const { 01882 return std::string( asUTF8() ); 01883 } 01885 operator std::wstring() const { 01886 return std::wstring( asWStr() ); 01887 } 01889 01891 01893 01894 01895 static bool _utf16_independent_char( code_point cp ) { 01896 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range 01897 return false; // it matches a surrogate pair signature 01898 return true; // everything else is a standalone code point 01899 } 01901 static bool _utf16_surrogate_lead( code_point cp ) { 01902 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair 01903 return true; // it is a 1st word 01904 return false; // it isn't 01905 } 01907 static bool _utf16_surrogate_follow( code_point cp ) { 01908 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair 01909 return true; // it is a 2nd word 01910 return false; // everything else isn't 01911 } 01913 static size_t _utf16_char_length( code_point cp ) { 01914 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair 01915 return 2; // if it is, then we are 2 words long 01916 return 1; // otherwise we are only 1 word long 01917 } 01919 static size_t _utf16_char_length( unicode_char uc ) { 01920 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum 01921 return 2; // if so, we need a surrogate pair 01922 return 1; // otherwise we can stuff it into a single word 01923 } 01925 01929 static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ) { 01930 const code_point& cp1 = in_cp[0]; 01931 const code_point& cp2 = in_cp[1]; 01932 bool wordPair = false; 01933 01934 // does it look like a surrogate pair? 01935 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) { 01936 // looks like one, but does the other half match the algorithm as well? 01937 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF ) 01938 wordPair = true; // yep! 01939 } 01940 01941 if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value 01942 out_uc = cp1; 01943 return 1; 01944 } 01945 01946 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers 01947 cU -= 0xD800; // remove the encoding markers 01948 cL -= 0xDC00; 01949 01950 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location 01951 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits 01952 out_uc += 0x10000; // add back in the value offset 01953 01954 return 2; // this whole operation takes to words, so that's what we'll return 01955 } 01957 01962 static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ) { 01963 if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them 01964 out_cp[0] = in_uc; 01965 return 1; 01966 } 01967 unicode_char uc = in_uc; // copy to writable buffer 01968 unsigned short tmp; // single code point buffer 01969 uc -= 0x10000; // subtract value offset 01970 01971 //process upper word 01972 tmp = ( uc >> 10 ) & 0x03FF; // grab the upper 10 bits 01973 tmp += 0xD800; // add encoding offset 01974 out_cp[0] = tmp; // write 01975 01976 // process lower word 01977 tmp = uc & 0x03FF; // grab the lower 10 bits 01978 tmp += 0xDC00; // add encoding offset 01979 out_cp[1] = tmp; // write 01980 01981 return 2; // return used word count (2 for surrogate pairs) 01982 } 01984 01986 01988 01989 01990 static bool _utf8_start_char( unsigned char cp ) { 01991 return ( cp & ~_cont_mask ) != _cont; 01992 } 01994 static size_t _utf8_char_length( unsigned char cp ) { 01995 if ( !( cp & 0x80 ) ) return 1; 01996 if (( cp & ~_lead1_mask ) == _lead1 ) return 2; 01997 if (( cp & ~_lead2_mask ) == _lead2 ) return 3; 01998 if (( cp & ~_lead3_mask ) == _lead3 ) return 4; 01999 if (( cp & ~_lead4_mask ) == _lead4 ) return 5; 02000 if (( cp & ~_lead5_mask ) == _lead5 ) return 6; 02001 throw invalid_data( "invalid UTF-8 sequence header value" ); 02002 } 02004 static size_t _utf8_char_length( unicode_char uc ) { 02005 /* 02006 7 bit: U-00000000 - U-0000007F: 0xxxxxxx 02007 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 02008 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 02009 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 02010 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 02011 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 02012 */ 02013 if ( !( uc & ~0x0000007F ) ) return 1; 02014 if ( !( uc & ~0x000007FF ) ) return 2; 02015 if ( !( uc & ~0x0000FFFF ) ) return 3; 02016 if ( !( uc & ~0x001FFFFF ) ) return 4; 02017 if ( !( uc & ~0x03FFFFFF ) ) return 5; 02018 if ( !( uc & ~0x7FFFFFFF ) ) return 6; 02019 throw invalid_data( "invalid UTF-32 value" ); 02020 } 02021 02023 static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ) { 02024 size_t len = _utf8_char_length( in_cp[0] ); 02025 if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit 02026 out_uc = in_cp[0]; 02027 return 1; 02028 } 02029 02030 unicode_char c = 0; // temporary buffer 02031 size_t i = 0; 02032 switch ( len ) { // load header byte 02033 case 6: 02034 c = in_cp[i] & _lead5_mask; 02035 break; 02036 case 5: 02037 c = in_cp[i] & _lead4_mask; 02038 break; 02039 case 4: 02040 c = in_cp[i] & _lead3_mask; 02041 break; 02042 case 3: 02043 c = in_cp[i] & _lead2_mask; 02044 break; 02045 case 2: 02046 c = in_cp[i] & _lead1_mask; 02047 break; 02048 } 02049 02050 for ( ++i; i < len; i++ ) { // load each continuation byte 02051 if (( in_cp[i] & ~_cont_mask ) != _cont ) 02052 throw invalid_data( "bad UTF-8 continuation byte" ); 02053 c <<= 6; 02054 c |= ( in_cp[i] & _cont_mask ); 02055 } 02056 02057 out_uc = c; // write the final value and return the used byte length 02058 return len; 02059 } 02061 static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ) { 02062 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence 02063 unicode_char c = in_uc; // copy to temp buffer 02064 02065 //stuff all of the lower bits 02066 for ( size_t i = len - 1; i > 0; i-- ) { 02067 out_cp[i] = (( c ) & _cont_mask ) | _cont; 02068 c >>= 6; 02069 } 02070 02071 //now write the header byte 02072 switch ( len ) { 02073 case 6: 02074 out_cp[0] = (( c ) & _lead5_mask ) | _lead5; 02075 break; 02076 case 5: 02077 out_cp[0] = (( c ) & _lead4_mask ) | _lead4; 02078 break; 02079 case 4: 02080 out_cp[0] = (( c ) & _lead3_mask ) | _lead3; 02081 break; 02082 case 3: 02083 out_cp[0] = (( c ) & _lead2_mask ) | _lead2; 02084 break; 02085 case 2: 02086 out_cp[0] = (( c ) & _lead1_mask ) | _lead1; 02087 break; 02088 case 1: 02089 default: 02090 out_cp[0] = ( c ) & 0x7F; 02091 break; 02092 } 02093 02094 // return the byte length of the sequence 02095 return len; 02096 } 02097 02099 static size_type _verifyUTF8( const unsigned char* c_str ) { 02100 std::string tmp( reinterpret_cast<const char*>( c_str ) ); 02101 return _verifyUTF8( tmp ); 02102 } 02104 static size_type _verifyUTF8( const std::string& str ) { 02105 std::string::const_iterator i, ie = str.end(); 02106 i = str.begin(); 02107 size_type length = 0; 02108 02109 while ( i != ie ) { 02110 // characters pass until we find an extended sequence 02111 if (( *i ) & 0x80 ) { 02112 unsigned char c = ( *i ); 02113 size_t contBytes = 0; 02114 02115 // get continuation byte count and test for overlong sequences 02116 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte 02117 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" ); 02118 contBytes = 1; 02119 02120 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes 02121 contBytes = 2; 02122 if ( c == _lead2 ) { // possible overlong UTF-8 sequence 02123 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02124 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02125 } 02126 02127 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes 02128 contBytes = 3; 02129 if ( c == _lead3 ) { // possible overlong UTF-8 sequence 02130 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02131 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02132 } 02133 02134 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes 02135 contBytes = 4; 02136 if ( c == _lead4 ) { // possible overlong UTF-8 sequence 02137 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02138 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02139 } 02140 02141 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes 02142 contBytes = 5; 02143 if ( c == _lead5 ) { // possible overlong UTF-8 sequence 02144 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 02145 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 02146 } 02147 } 02148 02149 // check remaining continuation bytes for 02150 while ( contBytes-- ) { 02151 c = ( *( ++i ) ); // get next byte in sequence 02152 if (( c & ~_cont_mask ) != _cont ) 02153 throw invalid_data( "bad UTF-8 continuation byte" ); 02154 } 02155 } 02156 length++; 02157 i++; 02158 } 02159 return length; 02160 } 02162 02163 private: 02164 //template<class ITER_TYPE> friend class _iterator; 02165 dstring mData; 02166 02168 enum BufferType { 02169 bt_none, 02170 bt_string, 02171 bt_wstring, 02172 bt_utf32string 02173 }; 02174 02176 void _init() { 02177 m_buffer.mVoidBuffer = 0; 02178 m_bufferType = bt_none; 02179 m_bufferSize = 0; 02180 } 02181 02183 // Scratch buffer 02185 void _cleanBuffer() const { 02186 if ( m_buffer.mVoidBuffer != 0 ) { 02187 switch ( m_bufferType ) { 02188 case bt_string: 02189 delete m_buffer.mStrBuffer; 02190 break; 02191 case bt_wstring: 02192 delete m_buffer.mWStrBuffer; 02193 break; 02194 case bt_utf32string: 02195 delete m_buffer.mUTF32StrBuffer; 02196 break; 02197 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out 02198 default: 02199 //delete m_buffer.mVoidBuffer; 02200 // delete void* is undefined, don't do that 02201 assert("This should never happen - mVoidBuffer should never contain something if we " 02202 "don't know the type"); 02203 break; 02204 } 02205 m_buffer.mVoidBuffer = 0; 02206 m_bufferSize = 0; 02207 } 02208 } 02209 02211 void _getBufferStr() const { 02212 if ( m_bufferType != bt_string ) { 02213 _cleanBuffer(); 02214 m_buffer.mStrBuffer = new std::string(); 02215 m_bufferType = bt_string; 02216 } 02217 m_buffer.mStrBuffer->clear(); 02218 } 02220 void _getBufferWStr() const { 02221 if ( m_bufferType != bt_wstring ) { 02222 _cleanBuffer(); 02223 m_buffer.mWStrBuffer = new std::wstring(); 02224 m_bufferType = bt_wstring; 02225 } 02226 m_buffer.mWStrBuffer->clear(); 02227 } 02229 void _getBufferUTF32Str() const { 02230 if ( m_bufferType != bt_utf32string ) { 02231 _cleanBuffer(); 02232 m_buffer.mUTF32StrBuffer = new utf32string(); 02233 m_bufferType = bt_utf32string; 02234 } 02235 m_buffer.mUTF32StrBuffer->clear(); 02236 } 02237 02238 void _load_buffer_UTF8() const { 02239 _getBufferStr(); 02240 std::string& buffer = ( *m_buffer.mStrBuffer ); 02241 buffer.reserve( length() ); 02242 02243 unsigned char utf8buf[6]; 02244 char* charbuf = ( char* )utf8buf; 02245 unicode_char c; 02246 size_t len; 02247 02248 const_iterator i, ie = end(); 02249 for ( i = begin(); i != ie; i.moveNext() ) { 02250 c = i.getCharacter(); 02251 len = _utf32_to_utf8( c, utf8buf ); 02252 size_t j = 0; 02253 while ( j < len ) 02254 buffer.push_back( charbuf[j++] ); 02255 } 02256 } 02257 void _load_buffer_WStr() const { 02258 _getBufferWStr(); 02259 std::wstring& buffer = ( *m_buffer.mWStrBuffer ); 02260 buffer.reserve( length() ); // may over reserve, but should be close enough 02261 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16 02262 const_iterator i, ie = end(); 02263 for ( i = begin(); i != ie; ++i ) { 02264 buffer.push_back(( wchar_t )( *i ) ); 02265 } 02266 #else // wchar_t fits UTF-32 02267 unicode_char c; 02268 const_iterator i, ie = end(); 02269 for ( i = begin(); i != ie; i.moveNext() ) { 02270 c = i.getCharacter(); 02271 buffer.push_back(( wchar_t )c ); 02272 } 02273 #endif 02274 } 02275 void _load_buffer_UTF32() const { 02276 _getBufferUTF32Str(); 02277 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer ); 02278 buffer.reserve( length() ); // may over reserve, but should be close enough 02279 02280 unicode_char c; 02281 02282 const_iterator i, ie = end(); 02283 for ( i = begin(); i != ie; i.moveNext() ) { 02284 c = i.getCharacter(); 02285 buffer.push_back( c ); 02286 } 02287 } 02288 02289 mutable BufferType m_bufferType; // identifies the data type held in m_buffer 02290 mutable size_t m_bufferSize; // size of the CString buffer 02291 02292 // multi-purpose buffer used everywhere we need a throw-away buffer 02293 union { 02294 mutable void* mVoidBuffer; 02295 mutable std::string* mStrBuffer; 02296 mutable std::wstring* mWStrBuffer; 02297 mutable utf32string* mUTF32StrBuffer; 02298 } 02299 m_buffer; 02300 }; 02301 02303 inline UTFString operator+( const UTFString& s1, const UTFString& s2 ) { 02304 return UTFString( s1 ).append( s2 ); 02305 } 02307 inline UTFString operator+( const UTFString& s1, UTFString::code_point c ) { 02308 return UTFString( s1 ).append( 1, c ); 02309 } 02311 inline UTFString operator+( const UTFString& s1, UTFString::unicode_char c ) { 02312 return UTFString( s1 ).append( 1, c ); 02313 } 02315 inline UTFString operator+( const UTFString& s1, char c ) { 02316 return UTFString( s1 ).append( 1, c ); 02317 } 02318 #if OGRE_IS_NATIVE_WCHAR_T 02319 02320 inline UTFString operator+( const UTFString& s1, wchar_t c ) { 02321 return UTFString( s1 ).append( 1, c ); 02322 } 02323 #endif 02324 02325 inline UTFString operator+( UTFString::code_point c, const UTFString& s2 ) { 02326 return UTFString().append( 1, c ).append( s2 ); 02327 } 02329 inline UTFString operator+( UTFString::unicode_char c, const UTFString& s2 ) { 02330 return UTFString().append( 1, c ).append( s2 ); 02331 } 02333 inline UTFString operator+( char c, const UTFString& s2 ) { 02334 return UTFString().append( 1, c ).append( s2 ); 02335 } 02336 #if OGRE_IS_NATIVE_WCHAR_T 02337 02338 inline UTFString operator+( wchar_t c, const UTFString& s2 ) { 02339 return UTFString().append( 1, c ).append( s2 ); 02340 } 02341 #endif 02342 02343 // (const) forward iterator common operators 02344 inline UTFString::size_type operator-( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02345 return ( left.mIter - right.mIter ); 02346 } 02347 inline bool operator==( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02348 return left.mIter == right.mIter; 02349 } 02350 inline bool operator!=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02351 return left.mIter != right.mIter; 02352 } 02353 inline bool operator<( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02354 return left.mIter < right.mIter; 02355 } 02356 inline bool operator<=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02357 return left.mIter <= right.mIter; 02358 } 02359 inline bool operator>( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02360 return left.mIter > right.mIter; 02361 } 02362 inline bool operator>=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) { 02363 return left.mIter >= right.mIter; 02364 } 02365 02366 // (const) reverse iterator common operators 02367 // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator 02368 inline UTFString::size_type operator-( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02369 return ( right.mIter - left.mIter ); 02370 } 02371 inline bool operator==( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02372 return left.mIter == right.mIter; 02373 } 02374 inline bool operator!=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02375 return left.mIter != right.mIter; 02376 } 02377 inline bool operator<( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02378 return right.mIter < left.mIter; 02379 } 02380 inline bool operator<=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02381 return right.mIter <= left.mIter; 02382 } 02383 inline bool operator>( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02384 return right.mIter > left.mIter; 02385 } 02386 inline bool operator>=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) { 02387 return right.mIter >= left.mIter; 02388 } 02389 02391 inline std::ostream& operator << ( std::ostream& os, const UTFString& s ) { 02392 return os << s.asUTF8(); 02393 } 02394 02396 inline std::wostream& operator << ( std::wostream& os, const UTFString& s ) { 02397 return os << s.asWStr(); 02398 } 02399 02400 02401 02402 } // namespace Ogre{ 02403 02404 #endif // OGRE_UNICODE_SUPPORT 02405 02406 #endif
Copyright © 2000-2005 by The OGRE Team
This work is licensed under a Creative Commons Attribution-ShareAlike 2.5 License.
Last modified Sun Jul 8 15:20:10 2007