ICU 78.2 78.2
Loading...
Searching...
No Matches
brkiter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5* Copyright (C) 1997-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*
9* File brkiter.h
10*
11* Modification History:
12*
13* Date Name Description
14* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15* 05/07/97 aliu Fixed DLL declaration.
16* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17* 08/11/98 helena Sync-up JDK1.2.
18* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19********************************************************************************
20*/
21
22#ifndef BRKITER_H
23#define BRKITER_H
24
25#include "unicode/utypes.h"
26
31
32#include "unicode/utypes.h"
33
34#if U_SHOW_CPLUSPLUS_API
35
36#if UCONFIG_NO_BREAK_ITERATION
37
38U_NAMESPACE_BEGIN
39
40/*
41 * Allow the declaration of APIs with pointers to BreakIterator
42 * even when break iteration is removed from the build.
43 */
44class BreakIterator;
45
46U_NAMESPACE_END
47
48#else
49
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
52#include "unicode/chariter.h"
53#include "unicode/locid.h"
54#include "unicode/ubrk.h"
55#include "unicode/strenum.h"
56#include "unicode/utext.h"
57#include "unicode/umisc.h"
58
59U_NAMESPACE_BEGIN
60
107public:
113
127 U_COMMON_API virtual bool operator==(const BreakIterator&) const = 0;
128
135 U_COMMON_API bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
136
142 U_COMMON_API virtual BreakIterator* clone() const = 0;
143
149 U_COMMON_API virtual UClassID getDynamicClassID() const override = 0;
150
156
171 U_COMMON_API virtual UText* getUText(UText* fillIn, UErrorCode& status) const = 0;
172
184 U_COMMON_API virtual void setText(const UnicodeString& text) = 0;
185
204 U_COMMON_API virtual void setText(UText* text, UErrorCode& status) = 0;
205
215
216 enum {
222 DONE = static_cast<int32_t>(-1)
223 };
224
230 U_COMMON_API virtual int32_t first() = 0;
231
237 U_COMMON_API virtual int32_t last() = 0;
238
245 U_COMMON_API virtual int32_t previous() = 0;
246
253 U_COMMON_API virtual int32_t next() = 0;
254
260 U_COMMON_API virtual int32_t current() const = 0;
261
270 U_COMMON_API virtual int32_t following(int32_t offset) = 0;
271
280 U_COMMON_API virtual int32_t preceding(int32_t offset) = 0;
281
290 U_COMMON_API virtual UBool isBoundary(int32_t offset) = 0;
291
301 U_COMMON_API virtual int32_t next(int32_t n) = 0;
302
316 U_COMMON_API virtual int32_t getRuleStatus() const;
317
346 U_COMMON_API virtual int32_t getRuleStatusVec(int32_t* fillInVec,
347 int32_t capacity,
348 UErrorCode& status);
349
369 U_COMMON_API static BreakIterator* U_EXPORT2
370 createWordInstance(const Locale& where, UErrorCode& status);
371
393 U_COMMON_API static BreakIterator* U_EXPORT2
394 createLineInstance(const Locale& where, UErrorCode& status);
395
415 U_COMMON_API static BreakIterator* U_EXPORT2
417
436 U_COMMON_API static BreakIterator* U_EXPORT2
438
439#ifndef U_HIDE_DEPRECATED_API
462 U_COMMON_API static BreakIterator* U_EXPORT2
463 createTitleInstance(const Locale& where, UErrorCode& status);
464#endif /* U_HIDE_DEPRECATED_API */
465
475 U_COMMON_API static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
476
486 U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
487 const Locale& displayLocale,
488 UnicodeString& name);
489
498 U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
499 UnicodeString& name);
500
501#ifndef U_FORCE_HIDE_DEPRECATED_API
521 U_COMMON_API virtual BreakIterator* createBufferClone(void* stackBuffer,
522 int32_t& BufferSize,
523 UErrorCode& status) = 0;
524#endif // U_FORCE_HIDE_DEPRECATED_API
525
526#ifndef U_HIDE_DEPRECATED_API
527
535
536#endif /* U_HIDE_DEPRECATED_API */
537
538#if !UCONFIG_NO_SERVICE
555 const Locale& locale,
557 UErrorCode& status);
558
571 U_COMMON_API static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
572
580#endif
581
588
589#ifndef U_HIDE_INTERNAL_API
596 U_COMMON_API const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
597#endif /* U_HIDE_INTERNAL_API */
598
625
626 private:
627 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
628 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
629 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
630
631 friend class ICUBreakIteratorFactory;
632 friend class ICUBreakIteratorService;
633
634protected:
635 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
636 // or else the compiler will create a public ones.
641#ifndef U_HIDE_INTERNAL_API
643 U_COMMON_API BreakIterator(const Locale& valid, const Locale& actual);
646#endif /* U_HIDE_INTERNAL_API */
647
648private:
649
650 Locale actualLocale;
651 Locale validLocale;
652 Locale requestLocale;
653};
654
655#ifndef U_HIDE_DEPRECATED_API
656
658{
659 return false;
660}
661
662#endif /* U_HIDE_DEPRECATED_API */
663
664U_NAMESPACE_END
665
666#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
667
668#endif /* U_SHOW_CPLUSPLUS_API */
669
670#endif // BRKITER_H
671//eof
C++ API: Character Iterator.
virtual U_COMMON_API void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
virtual U_COMMON_API UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
static U_COMMON_API BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
static U_COMMON_API UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
U_COMMON_API Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
virtual U_COMMON_API int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
virtual U_COMMON_API int32_t current() const =0
Return character index of the current iterator position within the text.
static U_COMMON_API const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
static U_COMMON_API BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
virtual U_COMMON_API BreakIterator * clone() const =0
Return a polymorphic copy of this object.
virtual U_COMMON_API int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
virtual U_COMMON_API BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
U_COMMON_API bool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:135
U_COMMON_API BreakIterator & operator=(const BreakIterator &other)
U_COMMON_API BreakIterator()
U_COMMON_API const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
virtual U_COMMON_API void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
virtual U_COMMON_API bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
virtual U_COMMON_API CharacterIterator & getText() const =0
Return a CharacterIterator over the text being analyzed.
static U_COMMON_API BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
virtual U_COMMON_API int32_t previous()=0
Set the iterator position to the boundary preceding the current boundary.
static U_COMMON_API BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
static U_COMMON_API StringEnumeration * getAvailableLocales()
Return a StringEnumeration over the locales available at the time of the call, including registered l...
virtual U_COMMON_API int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual U_COMMON_API ~BreakIterator()
destructor
static U_COMMON_API BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual U_COMMON_API int32_t first()=0
Sets the current iteration position to the beginning of the text, position zero.
virtual U_COMMON_API int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
virtual U_COMMON_API void setText(const UnicodeString &text)=0
Change the text over which this operates.
static U_COMMON_API URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
U_COMMON_API BreakIterator(const Locale &valid, const Locale &actual)
U_COMMON_API UBool isBufferClone()
Determine whether the BreakIterator was created in user memory by createBufferClone(),...
Definition brkiter.h:657
static U_COMMON_API UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
virtual U_COMMON_API int32_t last()=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
U_COMMON_API BreakIterator(const BreakIterator &other)
static U_COMMON_API UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
virtual U_COMMON_API int32_t next()=0
Advance the iterator to the boundary following the current boundary.
virtual U_COMMON_API int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
virtual U_COMMON_API BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual U_COMMON_API UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
virtual U_COMMON_API UClassID getDynamicClassID() const override=0
Return a polymorphic class ID for this object.
@ DONE
DONE is returned by previous() and next() after all valid boundaries have been returned.
Definition brkiter.h:222
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:361
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:198
Base class for 'pure' C++ implementations of uenum api.
Definition strenum.h:61
UObject is the common ICU "boilerplate" class.
Definition uobject.h:222
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:303
C++ API: Locale ID object.
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C++ API: String Enumeration.
UText struct.
Definition utext.h:1328
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition ubrk.h:102
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:269
C API: Miscellaneous definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition umisc.h:57
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:509
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315
#define U_COMMON_API_CLASS
Set to export library symbols from inside the common library, and to import them from outside,...
Definition utypes.h:456