summaryrefslogtreecommitdiff
path: root/intl/icu/source/i18n/coleitr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/coleitr.cpp')
-rw-r--r--intl/icu/source/i18n/coleitr.cpp488
1 files changed, 488 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/coleitr.cpp b/intl/icu/source/i18n/coleitr.cpp
new file mode 100644
index 0000000..7db3e5f
--- /dev/null
+++ b/intl/icu/source/i18n/coleitr.cpp
@@ -0,0 +1,488 @@
+/*
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+/*
+* File coleitr.cpp
+*
+*
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+*
+* 6/23/97 helena Adding comments to make code more readable.
+* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
+* 12/10/99 aliu Ported Thai collation support from Java.
+* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
+* 02/19/01 swquek Removed CollationElementsIterator() since it is
+* private constructor and no calls are made to it
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coleitr.h"
+#include "unicode/ustring.h"
+#include "ucol_imp.h"
+#include "uassert.h"
+#include "cmemory.h"
+
+
+/* Constants --------------------------------------------------------------- */
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
+
+/* CollationElementIterator public constructor/destructor ------------------ */
+
+CollationElementIterator::CollationElementIterator(
+ const CollationElementIterator& other)
+ : UObject(other), isDataOwned_(TRUE)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
+ &status);
+
+ *this = other;
+}
+
+CollationElementIterator::~CollationElementIterator()
+{
+ if (isDataOwned_) {
+ ucol_closeElements(m_data_);
+ }
+}
+
+/* CollationElementIterator public methods --------------------------------- */
+
+int32_t CollationElementIterator::getOffset() const
+{
+ return ucol_getOffset(m_data_);
+}
+
+/**
+* Get the ordering priority of the next character in the string.
+* @return the next character's ordering. Returns NULLORDER if an error has
+* occured or if the end of string has been reached
+*/
+int32_t CollationElementIterator::next(UErrorCode& status)
+{
+ return ucol_next(m_data_, &status);
+}
+
+UBool CollationElementIterator::operator!=(
+ const CollationElementIterator& other) const
+{
+ return !(*this == other);
+}
+
+UBool CollationElementIterator::operator==(
+ const CollationElementIterator& that) const
+{
+ if (this == &that || m_data_ == that.m_data_) {
+ return TRUE;
+ }
+
+ // option comparison
+ if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
+ {
+ return FALSE;
+ }
+
+ // the constructor and setText always sets a length
+ // and we only compare the string not the contents of the normalization
+ // buffer
+ int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
+ int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
+
+ if (thislength != thatlength) {
+ return FALSE;
+ }
+
+ if (uprv_memcmp(m_data_->iteratordata_.string,
+ that.m_data_->iteratordata_.string,
+ thislength * U_SIZEOF_UCHAR) != 0) {
+ return FALSE;
+ }
+ if (getOffset() != that.getOffset()) {
+ return FALSE;
+ }
+
+ // checking normalization buffer
+ if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
+ if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
+ return FALSE;
+ }
+ // both are in the normalization buffer
+ if (m_data_->iteratordata_.pos
+ - m_data_->iteratordata_.writableBuffer.getBuffer()
+ != that.m_data_->iteratordata_.pos
+ - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
+ // not in the same position in the normalization buffer
+ return FALSE;
+ }
+ }
+ else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
+ return FALSE;
+ }
+ // checking ce position
+ return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
+ == (that.m_data_->iteratordata_.CEpos
+ - that.m_data_->iteratordata_.CEs);
+}
+
+/**
+* Get the ordering priority of the previous collation element in the string.
+* @param status the error code status.
+* @return the previous element's ordering. Returns NULLORDER if an error has
+* occured or if the start of string has been reached.
+*/
+int32_t CollationElementIterator::previous(UErrorCode& status)
+{
+ return ucol_previous(m_data_, &status);
+}
+
+/**
+* Resets the cursor to the beginning of the string.
+*/
+void CollationElementIterator::reset()
+{
+ ucol_reset(m_data_);
+}
+
+void CollationElementIterator::setOffset(int32_t newOffset,
+ UErrorCode& status)
+{
+ ucol_setOffset(m_data_, newOffset, &status);
+}
+
+/**
+* Sets the source to the new source string.
+*/
+void CollationElementIterator::setText(const UnicodeString& source,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ int32_t length = source.length();
+ UChar *string = NULL;
+ if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
+ uprv_free((UChar *)m_data_->iteratordata_.string);
+ }
+ m_data_->isWritable = TRUE;
+ if (length > 0) {
+ string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+ /* test for NULL */
+ if (string == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ u_memcpy(string, source.getBuffer(), length);
+ }
+ else {
+ string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+ /* test for NULL */
+ if (string == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ *string = 0;
+ }
+ /* Free offsetBuffer before initializing it. */
+ ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
+ uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
+ &m_data_->iteratordata_, &status);
+
+ m_data_->reset_ = TRUE;
+}
+
+// Sets the source to the new character iterator.
+void CollationElementIterator::setText(CharacterIterator& source,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status))
+ return;
+
+ int32_t length = source.getLength();
+ UChar *buffer = NULL;
+
+ if (length == 0) {
+ buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+ /* test for NULL */
+ if (buffer == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ *buffer = 0;
+ }
+ else {
+ buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+ /* test for NULL */
+ if (buffer == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ /*
+ Using this constructor will prevent buffer from being removed when
+ string gets removed
+ */
+ UnicodeString string;
+ source.getText(string);
+ u_memcpy(buffer, string.getBuffer(), length);
+ }
+
+ if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
+ uprv_free((UChar *)m_data_->iteratordata_.string);
+ }
+ m_data_->isWritable = TRUE;
+ /* Free offsetBuffer before initializing it. */
+ ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
+ uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
+ &m_data_->iteratordata_, &status);
+ m_data_->reset_ = TRUE;
+}
+
+int32_t CollationElementIterator::strengthOrder(int32_t order) const
+{
+ UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
+ // Mask off the unwanted differences.
+ if (s == UCOL_PRIMARY) {
+ order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
+ }
+ else if (s == UCOL_SECONDARY) {
+ order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
+ }
+
+ return order;
+}
+
+/* CollationElementIterator private constructors/destructors --------------- */
+
+/**
+* This is the "real" constructor for this class; it constructs an iterator
+* over the source text using the specified collator
+*/
+CollationElementIterator::CollationElementIterator(
+ const UnicodeString& sourceText,
+ const RuleBasedCollator* order,
+ UErrorCode& status)
+ : isDataOwned_(TRUE)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ int32_t length = sourceText.length();
+ UChar *string = NULL;
+
+ if (length > 0) {
+ string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+ /* test for NULL */
+ if (string == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ /*
+ Using this constructor will prevent buffer from being removed when
+ string gets removed
+ */
+ u_memcpy(string, sourceText.getBuffer(), length);
+ }
+ else {
+ string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+ /* test for NULL */
+ if (string == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ *string = 0;
+ }
+ m_data_ = ucol_openElements(order->ucollator, string, length, &status);
+
+ /* Test for buffer overflows */
+ if (U_FAILURE(status)) {
+ return;
+ }
+ m_data_->isWritable = TRUE;
+}
+
+/**
+* This is the "real" constructor for this class; it constructs an iterator over
+* the source text using the specified collator
+*/
+CollationElementIterator::CollationElementIterator(
+ const CharacterIterator& sourceText,
+ const RuleBasedCollator* order,
+ UErrorCode& status)
+ : isDataOwned_(TRUE)
+{
+ if (U_FAILURE(status))
+ return;
+
+ // **** should I just drop this test? ****
+ /*
+ if ( sourceText.endIndex() != 0 )
+ {
+ // A CollationElementIterator is really a two-layered beast.
+ // Internally it uses a Normalizer to munge the source text into a form
+ // where all "composed" Unicode characters (such as \u00FC) are split into a
+ // normal character and a combining accent character.
+ // Afterward, CollationElementIterator does its own processing to handle
+ // expanding and contracting collation sequences, ignorables, and so on.
+
+ Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
+ ? Normalizer::NO_OP : order->getDecomposition();
+
+ text = new Normalizer(sourceText, decomp);
+ if (text == NULL)
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ */
+ int32_t length = sourceText.getLength();
+ UChar *buffer;
+ if (length > 0) {
+ buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+ /* test for NULL */
+ if (buffer == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ /*
+ Using this constructor will prevent buffer from being removed when
+ string gets removed
+ */
+ UnicodeString string(buffer, length, length);
+ ((CharacterIterator &)sourceText).getText(string);
+ const UChar *temp = string.getBuffer();
+ u_memcpy(buffer, temp, length);
+ }
+ else {
+ buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+ /* test for NULL */
+ if (buffer == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ *buffer = 0;
+ }
+ m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
+
+ /* Test for buffer overflows */
+ if (U_FAILURE(status)) {
+ return;
+ }
+ m_data_->isWritable = TRUE;
+}
+
+/* CollationElementIterator protected methods ----------------------------- */
+
+const CollationElementIterator& CollationElementIterator::operator=(
+ const CollationElementIterator& other)
+{
+ if (this != &other)
+ {
+ UCollationElements *ucolelem = this->m_data_;
+ UCollationElements *otherucolelem = other.m_data_;
+ collIterate *coliter = &(ucolelem->iteratordata_);
+ collIterate *othercoliter = &(otherucolelem->iteratordata_);
+ int length = 0;
+
+ // checking only UCOL_ITER_HASLEN is not enough here as we may be in
+ // the normalization buffer
+ length = (int)(othercoliter->endp - othercoliter->string);
+
+ ucolelem->reset_ = otherucolelem->reset_;
+ ucolelem->isWritable = TRUE;
+
+ /* create a duplicate of string */
+ if (length > 0) {
+ coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
+ if(coliter->string != NULL) {
+ uprv_memcpy((UChar *)coliter->string, othercoliter->string,
+ length * U_SIZEOF_UCHAR);
+ } else { // Error: couldn't allocate memory. No copying should be done
+ length = 0;
+ }
+ }
+ else {
+ coliter->string = NULL;
+ }
+
+ /* start and end of string */
+ coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
+
+ /* handle writable buffer here */
+
+ if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
+ coliter->writableBuffer = othercoliter->writableBuffer;
+ coliter->writableBuffer.getTerminatedBuffer();
+ }
+
+ /* current position */
+ if (othercoliter->pos >= othercoliter->string &&
+ othercoliter->pos <= othercoliter->endp)
+ {
+ U_ASSERT(coliter->string != NULL);
+ coliter->pos = coliter->string +
+ (othercoliter->pos - othercoliter->string);
+ }
+ else {
+ coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
+ (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
+ }
+
+ /* CE buffer */
+ int32_t CEsize;
+ if (coliter->extendCEs) {
+ uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ CEsize = sizeof(othercoliter->extendCEs);
+ if (CEsize > 0) {
+ othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
+ uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
+ }
+ coliter->toReturn = coliter->extendCEs +
+ (othercoliter->toReturn - othercoliter->extendCEs);
+ coliter->CEpos = coliter->extendCEs + CEsize;
+ } else {
+ CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
+ if (CEsize > 0) {
+ uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
+ }
+ coliter->toReturn = coliter->CEs +
+ (othercoliter->toReturn - othercoliter->CEs);
+ coliter->CEpos = coliter->CEs + CEsize;
+ }
+
+ if (othercoliter->fcdPosition != NULL) {
+ U_ASSERT(coliter->string != NULL);
+ coliter->fcdPosition = coliter->string +
+ (othercoliter->fcdPosition
+ - othercoliter->string);
+ }
+ else {
+ coliter->fcdPosition = NULL;
+ }
+ coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
+ coliter->origFlags = othercoliter->origFlags;
+ coliter->coll = othercoliter->coll;
+ this->isDataOwned_ = TRUE;
+ }
+
+ return *this;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+/* eof */