ICU 78.2 78.2
Loading...
Searching...
No Matches
unistr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1998-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File unistr.h
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 11/11/98 stephen Changed per 11/9 code review.
16* 04/20/99 stephen Overhauled per 4/16 code review.
17* 11/18/99 aliu Made to inherit from Replaceable. Added method
18* handleReplaceBetween(); other methods unchanged.
19* 06/25/01 grhoten Remove dependency on iostream.
20******************************************************************************
21*/
22
23#ifndef UNISTR_H
24#define UNISTR_H
25
30
31#include "unicode/utypes.h"
32
33#if U_SHOW_CPLUSPLUS_API
34
35#include <cstddef>
36#include <string_view>
37#include "unicode/char16ptr.h"
38#include "unicode/rep.h"
39#include "unicode/std_string.h"
40#include "unicode/stringpiece.h"
41#include "unicode/bytestream.h"
42
43struct UConverter; // unicode/ucnv.h
44
45#ifndef USTRING_H
51U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52#endif
53
54U_NAMESPACE_BEGIN
55
56#if !UCONFIG_NO_BREAK_ITERATION
57class BreakIterator; // unicode/brkiter.h
58#endif
59class Edits;
60
61U_NAMESPACE_END
62
63// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
70typedef int32_t U_CALLCONV
71UStringCaseMapper(int32_t caseLocale, uint32_t options,
74#endif
75 char16_t *dest, int32_t destCapacity,
76 const char16_t *src, int32_t srcLength,
77 icu::Edits *edits,
78 UErrorCode &errorCode);
79
80U_NAMESPACE_BEGIN
81
82class Locale; // unicode/locid.h
83class StringCharacterIterator;
84class UnicodeStringAppendable; // unicode/appendable.h
85
86/* The <iostream> include has been moved to unicode/ustream.h */
87
98#define US_INV icu::UnicodeString::kInvariant
99
120#if !U_CHAR16_IS_TYPEDEF
121# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122#else
123# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124#endif
125
135#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136
144#ifndef UNISTR_FROM_CHAR_EXPLICIT
145# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146 // Auto-"explicit" in ICU library code.
147# define UNISTR_FROM_CHAR_EXPLICIT explicit
148# else
149 // Empty by default for source code compatibility.
150# define UNISTR_FROM_CHAR_EXPLICIT
151# endif
152#endif
153
164#ifndef UNISTR_FROM_STRING_EXPLICIT
165# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166 // Auto-"explicit" in ICU library code.
167# define UNISTR_FROM_STRING_EXPLICIT explicit
168# else
169 // Empty by default for source code compatibility.
170# define UNISTR_FROM_STRING_EXPLICIT
171# endif
172#endif
173
207#ifndef UNISTR_OBJECT_SIZE
208# define UNISTR_OBJECT_SIZE 64
209#endif
210
303{
304public:
306 using value_type = char16_t;
307
323
324 //========================================
325 // Read-only operations
326 //========================================
327
328 /* Comparison - bitwise only - for international comparison use collation */
329
337 inline bool operator== (const UnicodeString& text) const;
338
354 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
355 inline bool operator==(const S &text) const {
356 std::u16string_view sv(internal::toU16StringView(text));
357 uint32_t len; // unsigned to avoid a compiler warning
358 return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
359 }
360
368 inline bool operator!= (const UnicodeString& text) const;
369
387 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
388 inline bool operator!=(const S &text) const {
389 return !operator==(text);
390 }
391
399 inline UBool operator> (const UnicodeString& text) const;
400
408 inline UBool operator< (const UnicodeString& text) const;
409
417 inline UBool operator>= (const UnicodeString& text) const;
418
426 inline UBool operator<= (const UnicodeString& text) const;
427
439 inline int8_t compare(const UnicodeString& text) const;
440
456 inline int8_t compare(int32_t start,
457 int32_t length,
458 const UnicodeString& text) const;
459
477 inline int8_t compare(int32_t start,
478 int32_t length,
479 const UnicodeString& srcText,
480 int32_t srcStart,
481 int32_t srcLength) const;
482
495 inline int8_t compare(ConstChar16Ptr srcChars,
496 int32_t srcLength) const;
497
512 inline int8_t compare(int32_t start,
513 int32_t length,
514 const char16_t *srcChars) const;
515
533 inline int8_t compare(int32_t start,
534 int32_t length,
535 const char16_t *srcChars,
536 int32_t srcStart,
537 int32_t srcLength) const;
538
556 inline int8_t compareBetween(int32_t start,
557 int32_t limit,
558 const UnicodeString& srcText,
559 int32_t srcStart,
560 int32_t srcLimit) const;
561
579 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
580
600 inline int8_t compareCodePointOrder(int32_t start,
601 int32_t length,
602 const UnicodeString& srcText) const;
603
625 inline int8_t compareCodePointOrder(int32_t start,
626 int32_t length,
627 const UnicodeString& srcText,
628 int32_t srcStart,
629 int32_t srcLength) const;
630
649 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
650 int32_t srcLength) const;
651
671 inline int8_t compareCodePointOrder(int32_t start,
672 int32_t length,
673 const char16_t *srcChars) const;
674
696 inline int8_t compareCodePointOrder(int32_t start,
697 int32_t length,
698 const char16_t *srcChars,
699 int32_t srcStart,
700 int32_t srcLength) const;
701
723 inline int8_t compareCodePointOrderBetween(int32_t start,
724 int32_t limit,
725 const UnicodeString& srcText,
726 int32_t srcStart,
727 int32_t srcLimit) const;
728
747 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
748
769 inline int8_t caseCompare(int32_t start,
770 int32_t length,
771 const UnicodeString& srcText,
772 uint32_t options) const;
773
796 inline int8_t caseCompare(int32_t start,
797 int32_t length,
798 const UnicodeString& srcText,
799 int32_t srcStart,
800 int32_t srcLength,
801 uint32_t options) const;
802
822 inline int8_t caseCompare(ConstChar16Ptr srcChars,
823 int32_t srcLength,
824 uint32_t options) const;
825
846 inline int8_t caseCompare(int32_t start,
847 int32_t length,
848 const char16_t *srcChars,
849 uint32_t options) const;
850
873 inline int8_t caseCompare(int32_t start,
874 int32_t length,
875 const char16_t *srcChars,
876 int32_t srcStart,
877 int32_t srcLength,
878 uint32_t options) const;
879
902 inline int8_t caseCompareBetween(int32_t start,
903 int32_t limit,
904 const UnicodeString& srcText,
905 int32_t srcStart,
906 int32_t srcLimit,
907 uint32_t options) const;
908
916 inline UBool startsWith(const UnicodeString& text) const;
917
928 inline UBool startsWith(const UnicodeString& srcText,
929 int32_t srcStart,
930 int32_t srcLength) const;
931
940 inline UBool startsWith(ConstChar16Ptr srcChars,
941 int32_t srcLength) const;
942
952 inline UBool startsWith(const char16_t *srcChars,
953 int32_t srcStart,
954 int32_t srcLength) const;
955
963 inline UBool endsWith(const UnicodeString& text) const;
964
975 inline UBool endsWith(const UnicodeString& srcText,
976 int32_t srcStart,
977 int32_t srcLength) const;
978
987 inline UBool endsWith(ConstChar16Ptr srcChars,
988 int32_t srcLength) const;
989
1000 inline UBool endsWith(const char16_t *srcChars,
1001 int32_t srcStart,
1002 int32_t srcLength) const;
1003
1004
1005 /* Searching - bitwise only */
1006
1015 inline int32_t indexOf(const UnicodeString& text) const;
1016
1026 inline int32_t indexOf(const UnicodeString& text,
1027 int32_t start) const;
1028
1040 inline int32_t indexOf(const UnicodeString& text,
1041 int32_t start,
1042 int32_t length) const;
1043
1060 inline int32_t indexOf(const UnicodeString& srcText,
1061 int32_t srcStart,
1062 int32_t srcLength,
1063 int32_t start,
1064 int32_t length) const;
1065
1077 inline int32_t indexOf(const char16_t *srcChars,
1078 int32_t srcLength,
1079 int32_t start) const;
1080
1093 inline int32_t indexOf(ConstChar16Ptr srcChars,
1094 int32_t srcLength,
1095 int32_t start,
1096 int32_t length) const;
1097
1114 int32_t indexOf(const char16_t *srcChars,
1115 int32_t srcStart,
1116 int32_t srcLength,
1117 int32_t start,
1118 int32_t length) const;
1119
1127 inline int32_t indexOf(char16_t c) const;
1128
1137 inline int32_t indexOf(UChar32 c) const;
1138
1147 inline int32_t indexOf(char16_t c,
1148 int32_t start) const;
1149
1159 inline int32_t indexOf(UChar32 c,
1160 int32_t start) const;
1161
1172 inline int32_t indexOf(char16_t c,
1173 int32_t start,
1174 int32_t length) const;
1175
1187 inline int32_t indexOf(UChar32 c,
1188 int32_t start,
1189 int32_t length) const;
1190
1199 inline int32_t lastIndexOf(const UnicodeString& text) const;
1200
1210 inline int32_t lastIndexOf(const UnicodeString& text,
1211 int32_t start) const;
1212
1224 inline int32_t lastIndexOf(const UnicodeString& text,
1225 int32_t start,
1226 int32_t length) const;
1227
1244 inline int32_t lastIndexOf(const UnicodeString& srcText,
1245 int32_t srcStart,
1246 int32_t srcLength,
1247 int32_t start,
1248 int32_t length) const;
1249
1260 inline int32_t lastIndexOf(const char16_t *srcChars,
1261 int32_t srcLength,
1262 int32_t start) const;
1263
1276 inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1277 int32_t srcLength,
1278 int32_t start,
1279 int32_t length) const;
1280
1297 int32_t lastIndexOf(const char16_t *srcChars,
1298 int32_t srcStart,
1299 int32_t srcLength,
1300 int32_t start,
1301 int32_t length) const;
1302
1310 inline int32_t lastIndexOf(char16_t c) const;
1311
1320 inline int32_t lastIndexOf(UChar32 c) const;
1321
1330 inline int32_t lastIndexOf(char16_t c,
1331 int32_t start) const;
1332
1342 inline int32_t lastIndexOf(UChar32 c,
1343 int32_t start) const;
1344
1355 inline int32_t lastIndexOf(char16_t c,
1356 int32_t start,
1357 int32_t length) const;
1358
1370 inline int32_t lastIndexOf(UChar32 c,
1371 int32_t start,
1372 int32_t length) const;
1373
1374
1375 /* Character access */
1376
1385 inline char16_t charAt(int32_t offset) const;
1386
1394 inline char16_t operator[] (int32_t offset) const;
1395
1407 UChar32 char32At(int32_t offset) const;
1408
1424 int32_t getChar32Start(int32_t offset) const;
1425
1442 int32_t getChar32Limit(int32_t offset) const;
1443
1494 int32_t moveIndex32(int32_t index, int32_t delta) const;
1495
1496 /* Substring extraction */
1497
1513 inline void extract(int32_t start,
1514 int32_t length,
1515 Char16Ptr dst,
1516 int32_t dstStart = 0) const;
1517
1539 int32_t
1540 extract(Char16Ptr dest, int32_t destCapacity,
1541 UErrorCode &errorCode) const;
1542
1552 inline void extract(int32_t start,
1553 int32_t length,
1554 UnicodeString& target) const;
1555
1567 inline void extractBetween(int32_t start,
1568 int32_t limit,
1569 char16_t *dst,
1570 int32_t dstStart = 0) const;
1571
1580 virtual void extractBetween(int32_t start,
1581 int32_t limit,
1582 UnicodeString& target) const override;
1583
1605 int32_t extract(int32_t start,
1606 int32_t startLength,
1607 char *target,
1608 int32_t targetCapacity,
1609 enum EInvariant inv) const;
1610
1611#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1612
1632 int32_t extract(int32_t start,
1633 int32_t startLength,
1634 char *target,
1635 uint32_t targetLength) const;
1636
1637#endif
1638
1639#if !UCONFIG_NO_CONVERSION
1640
1666 inline int32_t extract(int32_t start,
1667 int32_t startLength,
1668 char* target,
1669 const char* codepage = nullptr) const;
1670
1700 int32_t extract(int32_t start,
1701 int32_t startLength,
1702 char *target,
1703 uint32_t targetLength,
1704 const char *codepage) const;
1705
1723 int32_t extract(char *dest, int32_t destCapacity,
1724 UConverter *cnv,
1725 UErrorCode &errorCode) const;
1726
1727#endif
1728
1742 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1743
1754 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1755
1767 void toUTF8(ByteSink &sink) const;
1768
1782 template<typename StringClass>
1783 StringClass &toUTF8String(StringClass &result) const {
1784 StringByteSink<StringClass> sbs(&result, length());
1785 toUTF8(sbs);
1786 return result;
1787 }
1788
1789#ifndef U_HIDE_DRAFT_API
1801 template<typename StringClass>
1802 StringClass toUTF8String() const {
1803 StringClass result;
1804 StringByteSink<StringClass> sbs(&result, length());
1805 toUTF8(sbs);
1806 return result;
1807 }
1808#endif // U_HIDE_DRAFT_API
1809
1825 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1826
1827 /* Length operations */
1828
1837 inline int32_t length() const;
1838
1852 int32_t
1853 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1854
1878 UBool
1879 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1880
1886 inline UBool isEmpty() const;
1887
1897 inline int32_t getCapacity() const;
1898
1899 /* Other operations */
1900
1906 inline int32_t hashCode() const;
1907
1920 inline UBool isBogus() const;
1921
1922#ifndef U_HIDE_DRAFT_API
1923private:
1924 // These type aliases are private; there is no guarantee that they will remain
1925 // aliases to the same types in subsequent versions of ICU.
1926 // Note that whether `std::u16string_view::const_iterator` is a pointer or a
1927 // class that models contiguous_iterator is platform-dependent.
1928 using unspecified_iterator = std::u16string_view::const_iterator;
1929 using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
1930
1931public:
1937 unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
1943 unspecified_iterator end() const { return std::u16string_view(*this).end(); }
1949 unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
1955 unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
1956#endif // U_HIDE_DRAFT_API
1957
1958 //========================================
1959 // Write operations
1960 //========================================
1961
1962 /* Assignment operations */
1963
1983
2010
2021 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2022 inline UnicodeString &operator=(const S &src) {
2023 unBogus();
2024 return doReplace(0, length(), internal::toU16StringView(src));
2025 }
2026
2036
2042 void swap(UnicodeString &other) noexcept;
2043
2050 friend inline void U_EXPORT2
2051 swap(UnicodeString &s1, UnicodeString &s2) noexcept {
2052 s1.swap(s2);
2053 }
2054
2062 inline UnicodeString& operator= (char16_t ch);
2063
2071 inline UnicodeString& operator= (UChar32 ch);
2072
2084 inline UnicodeString& setTo(const UnicodeString& srcText,
2085 int32_t srcStart);
2086
2100 inline UnicodeString& setTo(const UnicodeString& srcText,
2101 int32_t srcStart,
2102 int32_t srcLength);
2103
2112 inline UnicodeString& setTo(const UnicodeString& srcText);
2113
2122 inline UnicodeString& setTo(const char16_t *srcChars,
2123 int32_t srcLength);
2124
2133 inline UnicodeString& setTo(char16_t srcChar);
2134
2143 inline UnicodeString& setTo(UChar32 srcChar);
2144
2169 ConstChar16Ptr text,
2170 int32_t textLength);
2171
2191 UnicodeString &setTo(char16_t *buffer,
2192 int32_t buffLength,
2193 int32_t buffCapacity);
2194
2235
2243 UnicodeString& setCharAt(int32_t offset,
2244 char16_t ch);
2245
2246
2247 /* Append operations */
2248
2256 inline UnicodeString& operator+= (char16_t ch);
2257
2265 inline UnicodeString& operator+= (UChar32 ch);
2266
2274 inline UnicodeString& operator+= (const UnicodeString& srcText);
2275
2286 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2287 inline UnicodeString& operator+=(const S &src) {
2288 return doAppend(internal::toU16StringView(src));
2289 }
2290
2305 inline UnicodeString& append(const UnicodeString& srcText,
2306 int32_t srcStart,
2307 int32_t srcLength);
2308
2316 inline UnicodeString& append(const UnicodeString& srcText);
2317
2331 inline UnicodeString& append(const char16_t *srcChars,
2332 int32_t srcStart,
2333 int32_t srcLength);
2334
2344 inline UnicodeString& append(ConstChar16Ptr srcChars,
2345 int32_t srcLength);
2346
2357 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2358 inline UnicodeString& append(const S &src) {
2359 return doAppend(internal::toU16StringView(src));
2360 }
2361
2368 inline UnicodeString& append(char16_t srcChar);
2369
2377
2378#ifndef U_HIDE_DRAFT_API
2386 inline void push_back(char16_t c) { append(c); }
2387#endif // U_HIDE_DRAFT_API
2388
2389 /* Insert operations */
2390
2404 inline UnicodeString& insert(int32_t start,
2405 const UnicodeString& srcText,
2406 int32_t srcStart,
2407 int32_t srcLength);
2408
2417 inline UnicodeString& insert(int32_t start,
2418 const UnicodeString& srcText);
2419
2433 inline UnicodeString& insert(int32_t start,
2434 const char16_t *srcChars,
2435 int32_t srcStart,
2436 int32_t srcLength);
2437
2447 inline UnicodeString& insert(int32_t start,
2448 ConstChar16Ptr srcChars,
2449 int32_t srcLength);
2450
2459 inline UnicodeString& insert(int32_t start,
2460 char16_t srcChar);
2461
2470 inline UnicodeString& insert(int32_t start,
2471 UChar32 srcChar);
2472
2473
2474 /* Replace operations */
2475
2493 inline UnicodeString& replace(int32_t start,
2494 int32_t length,
2495 const UnicodeString& srcText,
2496 int32_t srcStart,
2497 int32_t srcLength);
2498
2511 inline UnicodeString& replace(int32_t start,
2512 int32_t length,
2513 const UnicodeString& srcText);
2514
2532 inline UnicodeString& replace(int32_t start,
2533 int32_t length,
2534 const char16_t *srcChars,
2535 int32_t srcStart,
2536 int32_t srcLength);
2537
2550 inline UnicodeString& replace(int32_t start,
2551 int32_t length,
2552 ConstChar16Ptr srcChars,
2553 int32_t srcLength);
2554
2566 inline UnicodeString& replace(int32_t start,
2567 int32_t length,
2568 char16_t srcChar);
2569
2581 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2582
2592 inline UnicodeString& replaceBetween(int32_t start,
2593 int32_t limit,
2594 const UnicodeString& srcText);
2595
2610 inline UnicodeString& replaceBetween(int32_t start,
2611 int32_t limit,
2612 const UnicodeString& srcText,
2613 int32_t srcStart,
2614 int32_t srcLimit);
2615
2623 virtual void handleReplaceBetween(int32_t start,
2624 int32_t limit,
2625 const UnicodeString& text) override;
2626
2632 virtual UBool hasMetaData() const override;
2633
2647 virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2648
2649 /* Search and replace operations */
2650
2659 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2660 const UnicodeString& newText);
2661
2673 inline UnicodeString& findAndReplace(int32_t start,
2674 int32_t length,
2675 const UnicodeString& oldText,
2676 const UnicodeString& newText);
2677
2696 int32_t length,
2697 const UnicodeString& oldText,
2698 int32_t oldStart,
2699 int32_t oldLength,
2700 const UnicodeString& newText,
2701 int32_t newStart,
2702 int32_t newLength);
2703
2704
2705 /* Remove operations */
2706
2715 inline UnicodeString& remove();
2716
2725 inline UnicodeString& remove(int32_t start,
2726 int32_t length = static_cast<int32_t>(INT32_MAX));
2727
2736 inline UnicodeString& removeBetween(int32_t start,
2737 int32_t limit = static_cast<int32_t>(INT32_MAX));
2738
2748 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2749
2750 /* Length operations */
2751
2763 UBool padLeading(int32_t targetLength,
2764 char16_t padChar = 0x0020);
2765
2777 UBool padTrailing(int32_t targetLength,
2778 char16_t padChar = 0x0020);
2779
2786 inline UBool truncate(int32_t targetLength);
2787
2794
2795 /* Miscellaneous operations */
2796
2802 inline UnicodeString& reverse();
2803
2812 inline UnicodeString& reverse(int32_t start,
2813 int32_t length);
2814
2822
2831
2839
2848
2849#if !UCONFIG_NO_BREAK_ITERATION
2850
2878
2906 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2907
2938 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2939
2940#endif
2941
2955 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2956
2957 //========================================
2958 // Access to the internal buffer
2959 //========================================
2960
3004 char16_t *getBuffer(int32_t minCapacity);
3005
3026 void releaseBuffer(int32_t newLength=-1);
3027
3058 inline const char16_t *getBuffer() const;
3059
3093 const char16_t *getTerminatedBuffer();
3094
3101 inline operator std::u16string_view() const {
3102 return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3103 }
3104
3105#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3115 inline operator std::wstring_view() const {
3116 const char16_t *p = getBuffer();
3117#ifdef U_ALIASING_BARRIER
3119#endif
3120 return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3121 }
3122#endif // U_SIZEOF_WCHAR_T
3123
3124 //========================================
3125 // Constructors
3126 //========================================
3127
3131 inline UnicodeString();
3132
3144 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3145
3156
3167
3168#ifdef U_HIDE_DRAFT_API
3188 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3189 UnicodeString(text, -1) {}
3190#endif // U_HIDE_DRAFT_API
3191
3192#if !U_CHAR16_IS_TYPEDEF && \
3193 (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3213 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3214 UnicodeString(ConstChar16Ptr(text), -1) {}
3215#endif
3216
3217#if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3238 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3239 UnicodeString(ConstChar16Ptr(text), -1) {}
3240#endif
3241
3252 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3253
3270 UnicodeString(const char16_t *text,
3271 int32_t textLength);
3272
3273#if !U_CHAR16_IS_TYPEDEF
3290 UnicodeString(const uint16_t *text, int32_t textLength) :
3291 UnicodeString(ConstChar16Ptr(text), textLength) {}
3292#endif
3293
3294#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3312 UnicodeString(const wchar_t *text, int32_t textLength) :
3313 UnicodeString(ConstChar16Ptr(text), textLength) {}
3314#endif
3315
3323 inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3324
3337 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3339 fUnion.fFields.fLengthAndFlags = kShortString;
3340 doAppend(internal::toU16StringViewNullable(text));
3341 }
3342
3373 UnicodeString(UBool isTerminated,
3374 ConstChar16Ptr text,
3375 int32_t textLength);
3376
3395 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3396
3397#if !U_CHAR16_IS_TYPEDEF
3406 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3407 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3408#endif
3409
3410#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3420 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3421 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3422#endif
3423
3432 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3433
3434#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3435
3460
3469 UnicodeString(const char *codepageData, int32_t dataLength);
3470
3471#endif
3472
3473#if !UCONFIG_NO_CONVERSION
3474
3492 UnicodeString(const char *codepageData, const char *codepage);
3493
3511 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3512
3535 const char *src, int32_t srcLength,
3536 UConverter *cnv,
3537 UErrorCode &errorCode);
3538
3539#endif
3540
3573 UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3574
3575
3593
3601
3608 UnicodeString(const UnicodeString& src, int32_t srcStart);
3609
3617 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3618
3632 virtual UnicodeString *clone() const override;
3633
3638
3661 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3662 static inline UnicodeString readOnlyAlias(const S &text) {
3663 return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3664 }
3665
3685 static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3686 return readOnlyAliasFromUnicodeString(text);
3687 }
3688
3703
3715 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3716
3717 /* Miscellaneous operations */
3718
3754
3774 UChar32 unescapeAt(int32_t &offset) const;
3775
3781 static UClassID U_EXPORT2 getStaticClassID();
3782
3788 virtual UClassID getDynamicClassID() const override;
3789
3790 //========================================
3791 // Implementation methods
3792 //========================================
3793
3794protected:
3799 virtual int32_t getLength() const override;
3800
3806 virtual char16_t getCharAt(int32_t offset) const override;
3807
3813 virtual UChar32 getChar32At(int32_t offset) const override;
3814
3815private:
3816 static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3817 static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3818
3819 // For char* constructors. Could be made public.
3820 UnicodeString &setToUTF8(StringPiece utf8);
3821 // For extract(char*).
3822 // We could make a toUTF8(target, capacity, errorCode) public but not
3823 // this version: New API will be cleaner if we make callers create substrings
3824 // rather than having start+length on every method,
3825 // and it should take a UErrorCode&.
3826 int32_t
3827 toUTF8(int32_t start, int32_t len,
3828 char *target, int32_t capacity) const;
3829
3834 inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3835 return doEquals(text.getArrayStart(), len);
3836 }
3837 UBool doEquals(const char16_t *text, int32_t len) const;
3838
3839 inline UBool
3840 doEqualsSubstring(int32_t start,
3841 int32_t length,
3842 const UnicodeString& srcText,
3843 int32_t srcStart,
3844 int32_t srcLength) const;
3845
3846 UBool doEqualsSubstring(int32_t start,
3847 int32_t length,
3848 const char16_t *srcChars,
3849 int32_t srcStart,
3850 int32_t srcLength) const;
3851
3852 inline int8_t
3853 doCompare(int32_t start,
3854 int32_t length,
3855 const UnicodeString& srcText,
3856 int32_t srcStart,
3857 int32_t srcLength) const;
3858
3859 int8_t doCompare(int32_t start,
3860 int32_t length,
3861 const char16_t *srcChars,
3862 int32_t srcStart,
3863 int32_t srcLength) const;
3864
3865 inline int8_t
3866 doCompareCodePointOrder(int32_t start,
3867 int32_t length,
3868 const UnicodeString& srcText,
3869 int32_t srcStart,
3870 int32_t srcLength) const;
3871
3872 int8_t doCompareCodePointOrder(int32_t start,
3873 int32_t length,
3874 const char16_t *srcChars,
3875 int32_t srcStart,
3876 int32_t srcLength) const;
3877
3878 inline int8_t
3879 doCaseCompare(int32_t start,
3880 int32_t length,
3881 const UnicodeString &srcText,
3882 int32_t srcStart,
3883 int32_t srcLength,
3884 uint32_t options) const;
3885
3886 int8_t
3887 doCaseCompare(int32_t start,
3888 int32_t length,
3889 const char16_t *srcChars,
3890 int32_t srcStart,
3891 int32_t srcLength,
3892 uint32_t options) const;
3893
3894 int32_t doIndexOf(char16_t c,
3895 int32_t start,
3896 int32_t length) const;
3897
3898 int32_t doIndexOf(UChar32 c,
3899 int32_t start,
3900 int32_t length) const;
3901
3902 int32_t doLastIndexOf(char16_t c,
3903 int32_t start,
3904 int32_t length) const;
3905
3906 int32_t doLastIndexOf(UChar32 c,
3907 int32_t start,
3908 int32_t length) const;
3909
3910 void doExtract(int32_t start,
3911 int32_t length,
3912 char16_t *dst,
3913 int32_t dstStart) const;
3914
3915 inline void doExtract(int32_t start,
3916 int32_t length,
3917 UnicodeString& target) const;
3918
3919 inline char16_t doCharAt(int32_t offset) const;
3920
3921 UnicodeString& doReplace(int32_t start,
3922 int32_t length,
3923 const UnicodeString& srcText,
3924 int32_t srcStart,
3925 int32_t srcLength);
3926
3927 UnicodeString& doReplace(int32_t start,
3928 int32_t length,
3929 const char16_t *srcChars,
3930 int32_t srcStart,
3931 int32_t srcLength);
3932 UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3933
3934 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3935 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3936 UnicodeString& doAppend(std::u16string_view src);
3937
3938 UnicodeString& doReverse(int32_t start,
3939 int32_t length);
3940
3941 // calculate hash code
3942 int32_t doHashCode() const;
3943
3944 // get pointer to start of array
3945 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3946 inline char16_t* getArrayStart();
3947 inline const char16_t* getArrayStart() const;
3948
3949 inline UBool hasShortLength() const;
3950 inline int32_t getShortLength() const;
3951
3952 // A UnicodeString object (not necessarily its current buffer)
3953 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3954 inline UBool isWritable() const;
3955
3956 // Is the current buffer writable?
3957 inline UBool isBufferWritable() const;
3958
3959 // None of the following does releaseArray().
3960 inline void setZeroLength();
3961 inline void setShortLength(int32_t len);
3962 inline void setLength(int32_t len);
3963 inline void setToEmpty();
3964 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3965
3966 // allocate the array; result may be the stack buffer
3967 // sets refCount to 1 if appropriate
3968 // sets fArray, fCapacity, and flags
3969 // sets length to 0
3970 // returns boolean for success or failure
3971 UBool allocate(int32_t capacity);
3972
3973 // release the array if owned
3974 void releaseArray();
3975
3976 // turn a bogus string into an empty one
3977 void unBogus();
3978
3979 // implements assignment operator, copy constructor, and fastCopyFrom()
3980 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3981
3982 // Copies just the fields without memory management.
3983 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3984
3985 // Pin start and limit to acceptable values.
3986 inline void pinIndex(int32_t& start) const;
3987 inline void pinIndices(int32_t& start,
3988 int32_t& length) const;
3989
3990#if !UCONFIG_NO_CONVERSION
3991
3992 /* Internal extract() using UConverter. */
3993 int32_t doExtract(int32_t start, int32_t length,
3994 char *dest, int32_t destCapacity,
3995 UConverter *cnv,
3996 UErrorCode &errorCode) const;
3997
3998 /*
3999 * Real constructor for converting from codepage data.
4000 * It assumes that it is called with !fRefCounted.
4001 *
4002 * If `codepage==0`, then the default converter
4003 * is used for the platform encoding.
4004 * If `codepage` is an empty string (`""`),
4005 * then a simple conversion is performed on the codepage-invariant
4006 * subset ("invariant characters") of the platform encoding. See utypes.h.
4007 */
4008 void doCodepageCreate(const char *codepageData,
4009 int32_t dataLength,
4010 const char *codepage);
4011
4012 /*
4013 * Worker function for creating a UnicodeString from
4014 * a codepage string using a UConverter.
4015 */
4016 void
4017 doCodepageCreate(const char *codepageData,
4018 int32_t dataLength,
4019 UConverter *converter,
4020 UErrorCode &status);
4021
4022#endif
4023
4024 /*
4025 * This function is called when write access to the array
4026 * is necessary.
4027 *
4028 * We need to make a copy of the array if
4029 * the buffer is read-only, or
4030 * the buffer is refCounted (shared), and refCount>1, or
4031 * the buffer is too small.
4032 *
4033 * Return false if memory could not be allocated.
4034 */
4035 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
4036 int32_t growCapacity = -1,
4037 UBool doCopyArray = true,
4038 int32_t** pBufferToDelete = nullptr,
4039 UBool forceClone = false);
4040
4047 caseMap(int32_t caseLocale, uint32_t options,
4049 BreakIterator *iter,
4050#endif
4051 UStringCaseMapper *stringCaseMapper);
4052
4053 // ref counting
4054 void addRef();
4055 int32_t removeRef();
4056 int32_t refCount() const;
4057
4058 // constants
4059 enum {
4065 US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4066 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4067 kInvalidHashCode=0, // invalid hash code
4068 kEmptyHashCode=1, // hash code for empty string
4069
4070 // bit flag values for fLengthAndFlags
4071 kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4072 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4073 kRefCounted=4, // there is a refCount field before the characters in fArray
4074 kBufferIsReadonly=8,// do not write to this buffer
4075 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4076 // and releaseBuffer(newLength) must be called
4077 kAllStorageFlags=0x1f,
4078
4079 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4080 kLength1=1<<kLengthShift,
4081 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4082 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4083
4084 // combined values for convenience
4085 kShortString=kUsingStackBuffer,
4086 kLongString=kRefCounted,
4087 kReadonlyAlias=kBufferIsReadonly,
4088 kWritableAlias=0
4089 };
4090
4091 friend class UnicodeStringAppendable;
4092
4093 union StackBufferOrFields; // forward declaration necessary before friend declaration
4094 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4095
4096 /*
4097 * The following are all the class fields that are stored
4098 * in each UnicodeString object.
4099 * Note that UnicodeString has virtual functions,
4100 * therefore there is an implicit vtable pointer
4101 * as the first real field.
4102 * The fields should be aligned such that no padding is necessary.
4103 * On 32-bit machines, the size should be 32 bytes,
4104 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4105 *
4106 * We use a hack to achieve this.
4107 *
4108 * With at least some compilers, each of the following is forced to
4109 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4110 * rounded up with additional padding if the fields do not already fit that requirement:
4111 * - sizeof(class UnicodeString)
4112 * - offsetof(UnicodeString, fUnion)
4113 * - sizeof(fUnion)
4114 * - sizeof(fStackFields)
4115 *
4116 * We optimize for the longest possible internal buffer for short strings.
4117 * fUnion.fStackFields begins with 2 bytes for storage flags
4118 * and the length of relatively short strings,
4119 * followed by the buffer for short string contents.
4120 * There is no padding inside fStackFields.
4121 *
4122 * Heap-allocated and aliased strings use fUnion.fFields.
4123 * Both fStackFields and fFields must begin with the same fields for flags and short length,
4124 * that is, those must have the same memory offsets inside the object,
4125 * because the flags must be inspected in order to decide which half of fUnion is being used.
4126 * We assume that the compiler does not reorder the fields.
4127 *
4128 * (Padding at the end of fFields is ok:
4129 * As long as it is no larger than fStackFields, it is not wasted space.)
4130 *
4131 * For some of the history of the UnicodeString class fields layout, see
4132 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4133 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4134 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4135 */
4136 // (implicit) *vtable;
4137 union StackBufferOrFields {
4138 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4139 // Each struct of the union must begin with fLengthAndFlags.
4140 struct {
4141 int16_t fLengthAndFlags; // bit fields: see constants above
4142 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4143 } fStackFields;
4144 struct {
4145 int16_t fLengthAndFlags; // bit fields: see constants above
4146 int32_t fLength; // number of characters in fArray if >127; else undefined
4147 int32_t fCapacity; // capacity of fArray (in char16_ts)
4148 // array pointer last to minimize padding for machines with P128 data model
4149 // or pointer sizes that are not a power of 2
4150 char16_t *fArray; // the Unicode data
4151 } fFields;
4152 } fUnion;
4153};
4154
4163U_COMMON_API UnicodeString U_EXPORT2
4164operator+ (const UnicodeString &s1, const UnicodeString &s2);
4165
4176template<
4177 typename US, typename S,
4178 typename = std::enable_if_t<ConvertibleToU16StringView<S> && std::is_same_v<US, UnicodeString>>>
4179inline UnicodeString operator+(const US &s1, const S &s2) {
4181}
4182
4183#ifndef U_FORCE_HIDE_INTERNAL_API
4185U_COMMON_API UnicodeString U_EXPORT2
4186unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4187#endif
4188
4189//========================================
4190// Inline members
4191//========================================
4192
4193//========================================
4194// Privates
4195//========================================
4196
4197inline void
4198UnicodeString::pinIndex(int32_t& start) const
4199{
4200 // pin index
4201 if(start < 0) {
4202 start = 0;
4203 } else if(start > length()) {
4204 start = length();
4205 }
4206}
4207
4208inline void
4209UnicodeString::pinIndices(int32_t& start,
4210 int32_t& _length) const
4211{
4212 // pin indices
4213 int32_t len = length();
4214 if(start < 0) {
4215 start = 0;
4216 } else if(start > len) {
4217 start = len;
4218 }
4219 if(_length < 0) {
4220 _length = 0;
4221 } else if(_length > (len - start)) {
4222 _length = (len - start);
4223 }
4224}
4225
4226inline char16_t*
4227UnicodeString::getArrayStart() {
4228 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4229 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4230}
4231
4232inline const char16_t*
4233UnicodeString::getArrayStart() const {
4234 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4235 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4236}
4237
4238//========================================
4239// Default constructor
4240//========================================
4241
4242inline
4244 fUnion.fStackFields.fLengthAndFlags=kShortString;
4245}
4246
4247inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4248 fUnion.fStackFields.fLengthAndFlags=kShortString;
4249}
4250
4251inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4252 fUnion.fStackFields.fLengthAndFlags=kShortString;
4253}
4254
4255inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4256 fUnion.fStackFields.fLengthAndFlags=kShortString;
4257}
4258
4259//========================================
4260// Read-only implementation methods
4261//========================================
4262inline UBool
4263UnicodeString::hasShortLength() const {
4264 return fUnion.fFields.fLengthAndFlags>=0;
4265}
4266
4267inline int32_t
4268UnicodeString::getShortLength() const {
4269 // fLengthAndFlags must be non-negative -> short length >= 0
4270 // and arithmetic or logical shift does not matter.
4271 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4272}
4273
4274inline int32_t
4276 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4277}
4278
4279inline int32_t
4281 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4282 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4283}
4284
4285inline int32_t
4287{ return doHashCode(); }
4288
4289inline UBool
4291{ return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4292
4293inline UBool
4294UnicodeString::isWritable() const
4295{ return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4296
4297inline UBool
4298UnicodeString::isBufferWritable() const
4299{
4300 return
4301 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4302 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4303}
4304
4305inline const char16_t *
4307 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4308 return nullptr;
4309 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4310 return fUnion.fStackFields.fBuffer;
4311 } else {
4312 return fUnion.fFields.fArray;
4313 }
4314}
4315
4316//========================================
4317// Read-only alias methods
4318//========================================
4319inline int8_t
4320UnicodeString::doCompare(int32_t start,
4321 int32_t thisLength,
4322 const UnicodeString& srcText,
4323 int32_t srcStart,
4324 int32_t srcLength) const
4325{
4326 if(srcText.isBogus()) {
4327 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4328 } else {
4329 srcText.pinIndices(srcStart, srcLength);
4330 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4331 }
4332}
4333
4334inline UBool
4335UnicodeString::doEqualsSubstring(int32_t start,
4336 int32_t thisLength,
4337 const UnicodeString& srcText,
4338 int32_t srcStart,
4339 int32_t srcLength) const
4340{
4341 if(srcText.isBogus()) {
4342 return isBogus();
4343 } else {
4344 srcText.pinIndices(srcStart, srcLength);
4345 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4346 }
4347}
4348
4349inline bool
4350UnicodeString::operator== (const UnicodeString& text) const
4351{
4352 if(isBogus()) {
4353 return text.isBogus();
4354 } else {
4355 int32_t len = length(), textLength = text.length();
4356 return !text.isBogus() && len == textLength && doEquals(text, len);
4357 }
4358}
4359
4360inline bool
4361UnicodeString::operator!= (const UnicodeString& text) const
4362{ return (! operator==(text)); }
4363
4364inline UBool
4365UnicodeString::operator> (const UnicodeString& text) const
4366{ return doCompare(0, length(), text, 0, text.length()) == 1; }
4367
4368inline UBool
4369UnicodeString::operator< (const UnicodeString& text) const
4370{ return doCompare(0, length(), text, 0, text.length()) == -1; }
4371
4372inline UBool
4373UnicodeString::operator>= (const UnicodeString& text) const
4374{ return doCompare(0, length(), text, 0, text.length()) != -1; }
4375
4376inline UBool
4377UnicodeString::operator<= (const UnicodeString& text) const
4378{ return doCompare(0, length(), text, 0, text.length()) != 1; }
4379
4380inline int8_t
4382{ return doCompare(0, length(), text, 0, text.length()); }
4383
4384inline int8_t
4386 int32_t _length,
4387 const UnicodeString& srcText) const
4388{ return doCompare(start, _length, srcText, 0, srcText.length()); }
4389
4390inline int8_t
4392 int32_t srcLength) const
4393{ return doCompare(0, length(), srcChars, 0, srcLength); }
4394
4395inline int8_t
4397 int32_t _length,
4398 const UnicodeString& srcText,
4399 int32_t srcStart,
4400 int32_t srcLength) const
4401{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4402
4403inline int8_t
4405 int32_t _length,
4406 const char16_t *srcChars) const
4407{ return doCompare(start, _length, srcChars, 0, _length); }
4408
4409inline int8_t
4411 int32_t _length,
4412 const char16_t *srcChars,
4413 int32_t srcStart,
4414 int32_t srcLength) const
4415{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4416
4417inline int8_t
4419 int32_t limit,
4420 const UnicodeString& srcText,
4421 int32_t srcStart,
4422 int32_t srcLimit) const
4423{ return doCompare(start, limit - start,
4424 srcText, srcStart, srcLimit - srcStart); }
4425
4426inline int8_t
4427UnicodeString::doCompareCodePointOrder(int32_t start,
4428 int32_t thisLength,
4429 const UnicodeString& srcText,
4430 int32_t srcStart,
4431 int32_t srcLength) const
4432{
4433 if(srcText.isBogus()) {
4434 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4435 } else {
4436 srcText.pinIndices(srcStart, srcLength);
4437 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4438 }
4439}
4440
4441inline int8_t
4443{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4444
4445inline int8_t
4447 int32_t _length,
4448 const UnicodeString& srcText) const
4449{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4450
4451inline int8_t
4453 int32_t srcLength) const
4454{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4455
4456inline int8_t
4458 int32_t _length,
4459 const UnicodeString& srcText,
4460 int32_t srcStart,
4461 int32_t srcLength) const
4462{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4463
4464inline int8_t
4466 int32_t _length,
4467 const char16_t *srcChars) const
4468{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4469
4470inline int8_t
4472 int32_t _length,
4473 const char16_t *srcChars,
4474 int32_t srcStart,
4475 int32_t srcLength) const
4476{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4477
4478inline int8_t
4480 int32_t limit,
4481 const UnicodeString& srcText,
4482 int32_t srcStart,
4483 int32_t srcLimit) const
4484{ return doCompareCodePointOrder(start, limit - start,
4485 srcText, srcStart, srcLimit - srcStart); }
4486
4487inline int8_t
4488UnicodeString::doCaseCompare(int32_t start,
4489 int32_t thisLength,
4490 const UnicodeString &srcText,
4491 int32_t srcStart,
4492 int32_t srcLength,
4493 uint32_t options) const
4494{
4495 if(srcText.isBogus()) {
4496 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4497 } else {
4498 srcText.pinIndices(srcStart, srcLength);
4499 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4500 }
4501}
4502
4503inline int8_t
4504UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4505 return doCaseCompare(0, length(), text, 0, text.length(), options);
4506}
4507
4508inline int8_t
4510 int32_t _length,
4511 const UnicodeString &srcText,
4512 uint32_t options) const {
4513 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4514}
4515
4516inline int8_t
4518 int32_t srcLength,
4519 uint32_t options) const {
4520 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4521}
4522
4523inline int8_t
4525 int32_t _length,
4526 const UnicodeString &srcText,
4527 int32_t srcStart,
4528 int32_t srcLength,
4529 uint32_t options) const {
4530 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4531}
4532
4533inline int8_t
4535 int32_t _length,
4536 const char16_t *srcChars,
4537 uint32_t options) const {
4538 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4539}
4540
4541inline int8_t
4543 int32_t _length,
4544 const char16_t *srcChars,
4545 int32_t srcStart,
4546 int32_t srcLength,
4547 uint32_t options) const {
4548 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4549}
4550
4551inline int8_t
4553 int32_t limit,
4554 const UnicodeString &srcText,
4555 int32_t srcStart,
4556 int32_t srcLimit,
4557 uint32_t options) const {
4558 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4559}
4560
4561inline int32_t
4563 int32_t srcStart,
4564 int32_t srcLength,
4565 int32_t start,
4566 int32_t _length) const
4567{
4568 if(!srcText.isBogus()) {
4569 srcText.pinIndices(srcStart, srcLength);
4570 if(srcLength > 0) {
4571 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4572 }
4573 }
4574 return -1;
4575}
4576
4577inline int32_t
4579{ return indexOf(text, 0, text.length(), 0, length()); }
4580
4581inline int32_t
4583 int32_t start) const {
4584 pinIndex(start);
4585 return indexOf(text, 0, text.length(), start, length() - start);
4586}
4587
4588inline int32_t
4590 int32_t start,
4591 int32_t _length) const
4592{ return indexOf(text, 0, text.length(), start, _length); }
4593
4594inline int32_t
4595UnicodeString::indexOf(const char16_t *srcChars,
4596 int32_t srcLength,
4597 int32_t start) const {
4598 pinIndex(start);
4599 return indexOf(srcChars, 0, srcLength, start, length() - start);
4600}
4601
4602inline int32_t
4604 int32_t srcLength,
4605 int32_t start,
4606 int32_t _length) const
4607{ return indexOf(srcChars, 0, srcLength, start, _length); }
4608
4609inline int32_t
4611 int32_t start,
4612 int32_t _length) const
4613{ return doIndexOf(c, start, _length); }
4614
4615inline int32_t
4617 int32_t start,
4618 int32_t _length) const
4619{ return doIndexOf(c, start, _length); }
4620
4621inline int32_t
4622UnicodeString::indexOf(char16_t c) const
4623{ return doIndexOf(c, 0, length()); }
4624
4625inline int32_t
4627{ return indexOf(c, 0, length()); }
4628
4629inline int32_t
4631 int32_t start) const {
4632 pinIndex(start);
4633 return doIndexOf(c, start, length() - start);
4634}
4635
4636inline int32_t
4638 int32_t start) const {
4639 pinIndex(start);
4640 return indexOf(c, start, length() - start);
4641}
4642
4643inline int32_t
4645 int32_t srcLength,
4646 int32_t start,
4647 int32_t _length) const
4648{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4649
4650inline int32_t
4651UnicodeString::lastIndexOf(const char16_t *srcChars,
4652 int32_t srcLength,
4653 int32_t start) const {
4654 pinIndex(start);
4655 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4656}
4657
4658inline int32_t
4660 int32_t srcStart,
4661 int32_t srcLength,
4662 int32_t start,
4663 int32_t _length) const
4664{
4665 if(!srcText.isBogus()) {
4666 srcText.pinIndices(srcStart, srcLength);
4667 if(srcLength > 0) {
4668 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4669 }
4670 }
4671 return -1;
4672}
4673
4674inline int32_t
4676 int32_t start,
4677 int32_t _length) const
4678{ return lastIndexOf(text, 0, text.length(), start, _length); }
4679
4680inline int32_t
4682 int32_t start) const {
4683 pinIndex(start);
4684 return lastIndexOf(text, 0, text.length(), start, length() - start);
4685}
4686
4687inline int32_t
4689{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4690
4691inline int32_t
4693 int32_t start,
4694 int32_t _length) const
4695{ return doLastIndexOf(c, start, _length); }
4696
4697inline int32_t
4699 int32_t start,
4700 int32_t _length) const {
4701 return doLastIndexOf(c, start, _length);
4702}
4703
4704inline int32_t
4706{ return doLastIndexOf(c, 0, length()); }
4707
4708inline int32_t
4710 return lastIndexOf(c, 0, length());
4711}
4712
4713inline int32_t
4715 int32_t start) const {
4716 pinIndex(start);
4717 return doLastIndexOf(c, start, length() - start);
4718}
4719
4720inline int32_t
4722 int32_t start) const {
4723 pinIndex(start);
4724 return lastIndexOf(c, start, length() - start);
4725}
4726
4727inline UBool
4729{ return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4730
4731inline UBool
4733 int32_t srcStart,
4734 int32_t srcLength) const
4735{ return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4736
4737inline UBool
4738UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4739 if(srcLength < 0) {
4740 srcLength = u_strlen(toUCharPtr(srcChars));
4741 }
4742 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4743}
4744
4745inline UBool
4746UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4747 if(srcLength < 0) {
4748 srcLength = u_strlen(toUCharPtr(srcChars));
4749 }
4750 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4751}
4752
4753inline UBool
4755{ return doEqualsSubstring(length() - text.length(), text.length(),
4756 text, 0, text.length()); }
4757
4758inline UBool
4760 int32_t srcStart,
4761 int32_t srcLength) const {
4762 srcText.pinIndices(srcStart, srcLength);
4763 return doEqualsSubstring(length() - srcLength, srcLength,
4764 srcText, srcStart, srcLength);
4765}
4766
4767inline UBool
4769 int32_t srcLength) const {
4770 if(srcLength < 0) {
4771 srcLength = u_strlen(toUCharPtr(srcChars));
4772 }
4773 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4774}
4775
4776inline UBool
4777UnicodeString::endsWith(const char16_t *srcChars,
4778 int32_t srcStart,
4779 int32_t srcLength) const {
4780 if(srcLength < 0) {
4781 srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4782 }
4783 return doEqualsSubstring(length() - srcLength, srcLength,
4784 srcChars, srcStart, srcLength);
4785}
4786
4787//========================================
4788// replace
4789//========================================
4790inline UnicodeString&
4792 int32_t _length,
4793 const UnicodeString& srcText)
4794{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4795
4796inline UnicodeString&
4798 int32_t _length,
4799 const UnicodeString& srcText,
4800 int32_t srcStart,
4801 int32_t srcLength)
4802{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4803
4804inline UnicodeString&
4806 int32_t _length,
4807 ConstChar16Ptr srcChars,
4808 int32_t srcLength)
4809{ return doReplace(start, _length, srcChars, 0, srcLength); }
4810
4811inline UnicodeString&
4813 int32_t _length,
4814 const char16_t *srcChars,
4815 int32_t srcStart,
4816 int32_t srcLength)
4817{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4818
4819inline UnicodeString&
4821 int32_t _length,
4822 char16_t srcChar)
4823{ return doReplace(start, _length, &srcChar, 0, 1); }
4824
4825inline UnicodeString&
4827 int32_t limit,
4828 const UnicodeString& srcText)
4829{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4830
4831inline UnicodeString&
4833 int32_t limit,
4834 const UnicodeString& srcText,
4835 int32_t srcStart,
4836 int32_t srcLimit)
4837{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4838
4839inline UnicodeString&
4841 const UnicodeString& newText)
4842{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4843 newText, 0, newText.length()); }
4844
4845inline UnicodeString&
4847 int32_t _length,
4848 const UnicodeString& oldText,
4849 const UnicodeString& newText)
4850{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4851 newText, 0, newText.length()); }
4852
4853// ============================
4854// extract
4855// ============================
4856inline void
4857UnicodeString::doExtract(int32_t start,
4858 int32_t _length,
4859 UnicodeString& target) const
4860{ target.replace(0, target.length(), *this, start, _length); }
4861
4862inline void
4864 int32_t _length,
4865 Char16Ptr target,
4866 int32_t targetStart) const
4867{ doExtract(start, _length, target, targetStart); }
4868
4869inline void
4871 int32_t _length,
4872 UnicodeString& target) const
4873{ doExtract(start, _length, target); }
4874
4875#if !UCONFIG_NO_CONVERSION
4876
4877inline int32_t
4879 int32_t _length,
4880 char *dst,
4881 const char *codepage) const
4882
4883{
4884 // This dstSize value will be checked explicitly
4885 return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4886}
4887
4888#endif
4889
4890inline void
4892 int32_t limit,
4893 char16_t *dst,
4894 int32_t dstStart) const {
4895 pinIndex(start);
4896 pinIndex(limit);
4897 doExtract(start, limit - start, dst, dstStart);
4898}
4899
4900inline UnicodeString
4901UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4902 return tempSubString(start, limit - start);
4903}
4904
4905inline char16_t
4906UnicodeString::doCharAt(int32_t offset) const
4907{
4908 if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4909 return getArrayStart()[offset];
4910 } else {
4911 return kInvalidUChar;
4912 }
4913}
4914
4915inline char16_t
4916UnicodeString::charAt(int32_t offset) const
4917{ return doCharAt(offset); }
4918
4919inline char16_t
4920UnicodeString::operator[] (int32_t offset) const
4921{ return doCharAt(offset); }
4922
4923inline UBool
4925 // Arithmetic or logical right shift does not matter: only testing for 0.
4926 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4927}
4928
4929//========================================
4930// Write implementation methods
4931//========================================
4932inline void
4933UnicodeString::setZeroLength() {
4934 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4935}
4936
4937inline void
4938UnicodeString::setShortLength(int32_t len) {
4939 // requires 0 <= len <= kMaxShortLength
4940 fUnion.fFields.fLengthAndFlags =
4941 static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4942}
4943
4944inline void
4945UnicodeString::setLength(int32_t len) {
4946 if(len <= kMaxShortLength) {
4947 setShortLength(len);
4948 } else {
4949 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4950 fUnion.fFields.fLength = len;
4951 }
4952}
4953
4954inline void
4955UnicodeString::setToEmpty() {
4956 fUnion.fFields.fLengthAndFlags = kShortString;
4957}
4958
4959inline void
4960UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4961 setLength(len);
4962 fUnion.fFields.fArray = array;
4963 fUnion.fFields.fCapacity = capacity;
4964}
4965
4966inline UnicodeString&
4967UnicodeString::operator= (char16_t ch)
4968{ return doReplace(0, length(), &ch, 0, 1); }
4969
4970inline UnicodeString&
4971UnicodeString::operator= (UChar32 ch)
4972{ return replace(0, length(), ch); }
4973
4974inline UnicodeString&
4976 int32_t srcStart,
4977 int32_t srcLength)
4978{
4979 unBogus();
4980 return doReplace(0, length(), srcText, srcStart, srcLength);
4981}
4982
4983inline UnicodeString&
4985 int32_t srcStart)
4986{
4987 unBogus();
4988 srcText.pinIndex(srcStart);
4989 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4990}
4991
4992inline UnicodeString&
4994{
4995 return copyFrom(srcText);
4996}
4997
4998inline UnicodeString&
4999UnicodeString::setTo(const char16_t *srcChars,
5000 int32_t srcLength)
5001{
5002 unBogus();
5003 return doReplace(0, length(), srcChars, 0, srcLength);
5004}
5005
5006inline UnicodeString&
5007UnicodeString::setTo(char16_t srcChar)
5008{
5009 unBogus();
5010 return doReplace(0, length(), &srcChar, 0, 1);
5011}
5012
5013inline UnicodeString&
5015{
5016 unBogus();
5017 return replace(0, length(), srcChar);
5018}
5019
5020inline UnicodeString&
5022 int32_t srcStart,
5023 int32_t srcLength)
5024{ return doAppend(srcText, srcStart, srcLength); }
5025
5026inline UnicodeString&
5028{ return doAppend(srcText, 0, srcText.length()); }
5029
5030inline UnicodeString&
5031UnicodeString::append(const char16_t *srcChars,
5032 int32_t srcStart,
5033 int32_t srcLength)
5034{ return doAppend(srcChars, srcStart, srcLength); }
5035
5036inline UnicodeString&
5038 int32_t srcLength)
5039{ return doAppend(srcChars, 0, srcLength); }
5040
5041inline UnicodeString&
5042UnicodeString::append(char16_t srcChar)
5043{ return doAppend(&srcChar, 0, 1); }
5044
5045inline UnicodeString&
5046UnicodeString::operator+= (char16_t ch)
5047{ return doAppend(&ch, 0, 1); }
5048
5049inline UnicodeString&
5050UnicodeString::operator+= (UChar32 ch) {
5051 return append(ch);
5052}
5053
5054inline UnicodeString&
5055UnicodeString::operator+= (const UnicodeString& srcText)
5056{ return doAppend(srcText, 0, srcText.length()); }
5057
5058inline UnicodeString&
5060 const UnicodeString& srcText,
5061 int32_t srcStart,
5062 int32_t srcLength)
5063{ return doReplace(start, 0, srcText, srcStart, srcLength); }
5064
5065inline UnicodeString&
5067 const UnicodeString& srcText)
5068{ return doReplace(start, 0, srcText, 0, srcText.length()); }
5069
5070inline UnicodeString&
5072 const char16_t *srcChars,
5073 int32_t srcStart,
5074 int32_t srcLength)
5075{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
5076
5077inline UnicodeString&
5079 ConstChar16Ptr srcChars,
5080 int32_t srcLength)
5081{ return doReplace(start, 0, srcChars, 0, srcLength); }
5082
5083inline UnicodeString&
5085 char16_t srcChar)
5086{ return doReplace(start, 0, &srcChar, 0, 1); }
5087
5088inline UnicodeString&
5090 UChar32 srcChar)
5091{ return replace(start, 0, srcChar); }
5092
5093
5094inline UnicodeString&
5096{
5097 // remove() of a bogus string makes the string empty and non-bogus
5098 if(isBogus()) {
5099 setToEmpty();
5100 } else {
5101 setZeroLength();
5102 }
5103 return *this;
5104}
5105
5106inline UnicodeString&
5108 int32_t _length)
5109{
5110 if(start <= 0 && _length == INT32_MAX) {
5111 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5112 return remove();
5113 }
5114 return doReplace(start, _length, nullptr, 0, 0);
5115}
5116
5117inline UnicodeString&
5119 int32_t limit)
5120{ return doReplace(start, limit - start, nullptr, 0, 0); }
5121
5122inline UnicodeString &
5123UnicodeString::retainBetween(int32_t start, int32_t limit) {
5124 truncate(limit);
5125 return doReplace(0, start, nullptr, 0, 0);
5126}
5127
5128inline UBool
5129UnicodeString::truncate(int32_t targetLength)
5130{
5131 if(isBogus() && targetLength == 0) {
5132 // truncate(0) of a bogus string makes the string empty and non-bogus
5133 unBogus();
5134 return false;
5135 } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5136 setLength(targetLength);
5137 return true;
5138 } else {
5139 return false;
5140 }
5141}
5142
5143inline UnicodeString&
5145{ return doReverse(0, length()); }
5146
5147inline UnicodeString&
5149 int32_t _length)
5150{ return doReverse(start, _length); }
5151
5152U_NAMESPACE_END
5153
5154#endif /* U_SHOW_CPLUSPLUS_API */
5155
5156#endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
std::u16string_view toU16StringView(std::u16string_view sv)
Pass-through overload.
Definition char16ptr.h:400
std::u16string_view toU16StringViewNullable(const T &text)
Pass-through overload.
Definition char16ptr.h:430
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
Definition char16ptr.h:37
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:106
A ByteSink can be filled with bytes.
Definition bytestream.h:55
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:49
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:156
Records lengths of string edits but not replacement text.
Definition edits.h:80
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:198
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition rep.h:246
Replaceable()
Default constructor.
Definition rep.h:243
Implementation of ByteSink that writes to a "string".
Definition bytestream.h:291
A string-like object that points to a sized piece of memory.
Definition stringpiece.h:61
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:303
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
char16_t charAt(int32_t offset) const
Return the code unit at offset offset.
Definition unistr.h:4916
void push_back(char16_t c)
Appends the code unit c to the UnicodeString object.
Definition unistr.h:2386
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
Definition unistr.h:355
int8_t caseCompareBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit, uint32_t options) const
Compare two strings case-insensitively using full case folding.
Definition unistr.h:4552
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
int32_t hashCode() const
Generate a hash code for this object.
Definition unistr.h:4286
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UnicodeString & operator=(const S &src)
Assignment operator.
Definition unistr.h:2022
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition unistr.h:3312
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
int32_t lastIndexOf(const UnicodeString &text) const
Locate in this the last occurrence of the characters in text, using bitwise comparison.
Definition unistr.h:4688
UnicodeString & reverse()
Reverse this UnicodeString in place.
Definition unistr.h:5144
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
Definition unistr.h:3685
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition unistr.h:316
bool operator!=(const S &text) const
Inequality operator.
Definition unistr.h:388
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
UnicodeString & operator+=(const S &src)
Append operator.
Definition unistr.h:2287
int32_t getCapacity() const
Return the capacity of the internal buffer of the UnicodeString object.
Definition unistr.h:4280
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
int8_t compareCodePointOrder(const UnicodeString &text) const
Compare two Unicode strings in code point order.
Definition unistr.h:4442
UnicodeString & insert(int32_t start, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Insert the characters in srcText in the range [srcStart, srcStart + srcLength) into the UnicodeString...
Definition unistr.h:5059
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition unistr.h:4797
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
int8_t compare(const UnicodeString &text) const
Compare the characters bitwise in this UnicodeString to the characters in text.
Definition unistr.h:4381
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
Definition unistr.h:2358
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const
Create a temporary substring for the specified range.
Definition unistr.h:4901
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
int8_t caseCompare(const UnicodeString &text, uint32_t options) const
Compare two strings case-insensitively using full case folding.
Definition unistr.h:4504
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
Definition unistr.h:4275
unspecified_reverse_iterator rend() const
Definition unistr.h:1955
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition unistr.h:3420
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition unistr.h:2051
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
Definition unistr.h:3338
UnicodeString & findAndReplace(const UnicodeString &oldText, const UnicodeString &newText)
Replace all occurrences of characters in oldText with the characters in newText.
Definition unistr.h:4840
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
StringClass toUTF8String() const
Convert the UnicodeString to a UTF-8 string.
Definition unistr.h:1802
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
Definition unistr.h:3662
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
unspecified_iterator begin() const
Definition unistr.h:1937
UBool startsWith(const UnicodeString &text) const
Determine if this starts with the characters in text.
Definition unistr.h:4728
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition unistr.h:5129
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition unistr.h:1783
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
void extract(int32_t start, int32_t length, Char16Ptr dst, int32_t dstStart=0) const
Copy the characters in the range [start, start + length) into the array dst, beginning at dstStart.
Definition unistr.h:4863
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UBool endsWith(const UnicodeString &text) const
Determine if this ends with the characters in text.
Definition unistr.h:4754
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition unistr.h:3406
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
Definition unistr.h:4290
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString & setTo(const UnicodeString &srcText, int32_t srcStart)
Set the text in the UnicodeString object to the characters in srcText in the range [srcStart,...
Definition unistr.h:4984
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
int8_t compareBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
Compare the characters bitwise in the range [start, limit) with the characters in srcText in the rang...
Definition unistr.h:4418
UnicodeString()
Construct an empty UnicodeString.
Definition unistr.h:4243
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition unistr.h:3290
unspecified_reverse_iterator rbegin() const
Definition unistr.h:1949
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition unistr.h:5021
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UnicodeString & replaceBetween(int32_t start, int32_t limit, const UnicodeString &srcText)
Replace the characters in the range [start, limit) with the characters in srcText.
Definition unistr.h:4826
UnicodeString & removeBetween(int32_t start, int32_t limit=static_cast< int32_t >(INT32_MAX))
Remove the characters in the range [start, limit) from the UnicodeString object.
Definition unistr.h:5118
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
int32_t indexOf(const UnicodeString &text) const
Locate in this the first occurrence of the characters in text, using bitwise comparison.
Definition unistr.h:4578
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
void extractBetween(int32_t start, int32_t limit, char16_t *dst, int32_t dstStart=0) const
Copy the characters in the range [start, limit) into the array dst, beginning at dstStart.
Definition unistr.h:4891
UnicodeString & remove()
Removes all characters from the UnicodeString object and clears the bogus flag.
Definition unistr.h:5095
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
char16_t value_type
C++ boilerplate.
Definition unistr.h:306
unspecified_iterator end() const
Definition unistr.h:1943
UBool isEmpty() const
Determine if this string is empty.
Definition unistr.h:4924
UnicodeString & retainBetween(int32_t start, int32_t limit=INT32_MAX)
Retain only the characters in the range [start, limit) from the UnicodeString object.
Definition unistr.h:5123
virtual UBool hasMetaData() const override
Replaceable API.
int8_t compareCodePointOrderBetween(int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
Compare two Unicode strings in code point order.
Definition unistr.h:4479
U_CAPI int32_t u_strlen(const UChar *s)
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
U_COMMON_API UnicodeString operator+(const UnicodeString &s1, const UnicodeString &s2)
Creates a new UnicodeString from the concatenation of two others.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition platform.h:836
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition uconfig.h:358
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:400
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:449
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition umachine.h:208
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition umachine.h:352
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition unistr.h:71
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:509
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315