1 files changed, 488 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/coleitr.cpp b/intl/icu/source/i18n/coleitr.cpp
new file mode 100644
index 0000000..7db3e5f
--- /dev/null
+++ b/intl/icu/source/i18n/coleitr.cpp
@@ -0,0 +1,488 @@
+/*
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+
+/*
+* File coleitr.cpp
+*
+* 
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date      Name        Description
+*
+*  6/23/97   helena      Adding comments to make code more readable.
+* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
+* 12/10/99   aliu        Ported Thai collation support from Java.
+* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
+* 02/19/01   swquek      Removed CollationElementsIterator() since it is 
+*                        private constructor and no calls are made to it
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coleitr.h"
+#include "unicode/ustring.h"
+#include "ucol_imp.h"
+#include "uassert.h"
+#include "cmemory.h"
+
+
+/* Constants --------------------------------------------------------------- */
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
+
+/* CollationElementIterator public constructor/destructor ------------------ */
+
+CollationElementIterator::CollationElementIterator(
+                                         const CollationElementIterator& other) 
+                                         : UObject(other), isDataOwned_(TRUE)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
+                                &status);
+
+    *this = other;
+}
+
+CollationElementIterator::~CollationElementIterator()
+{
+    if (isDataOwned_) {
+        ucol_closeElements(m_data_);
+    }
+}
+
+/* CollationElementIterator public methods --------------------------------- */
+
+int32_t CollationElementIterator::getOffset() const
+{
+    return ucol_getOffset(m_data_);
+}
+
+/**
+* Get the ordering priority of the next character in the string.
+* @return the next character's ordering. Returns NULLORDER if an error has 
+*         occured or if the end of string has been reached
+*/
+int32_t CollationElementIterator::next(UErrorCode& status)
+{
+    return ucol_next(m_data_, &status);
+}
+
+UBool CollationElementIterator::operator!=(
+                                  const CollationElementIterator& other) const
+{
+    return !(*this == other);
+}
+
+UBool CollationElementIterator::operator==(
+                                    const CollationElementIterator& that) const
+{
+    if (this == &that || m_data_ == that.m_data_) {
+        return TRUE;
+    }
+
+    // option comparison
+    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
+    {
+        return FALSE;
+    }
+
+    // the constructor and setText always sets a length
+    // and we only compare the string not the contents of the normalization
+    // buffer
+    int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
+    int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
+    
+    if (thislength != thatlength) {
+        return FALSE;
+    }
+
+    if (uprv_memcmp(m_data_->iteratordata_.string, 
+                    that.m_data_->iteratordata_.string, 
+                    thislength * U_SIZEOF_UCHAR) != 0) {
+        return FALSE;
+    }
+    if (getOffset() != that.getOffset()) {
+        return FALSE;
+    }
+
+    // checking normalization buffer
+    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
+        if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
+            return FALSE;
+        }
+        // both are in the normalization buffer
+        if (m_data_->iteratordata_.pos 
+            - m_data_->iteratordata_.writableBuffer.getBuffer()
+            != that.m_data_->iteratordata_.pos 
+            - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
+            // not in the same position in the normalization buffer
+            return FALSE;
+        }
+    }
+    else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
+        return FALSE;
+    }
+    // checking ce position
+    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
+            == (that.m_data_->iteratordata_.CEpos 
+                                        - that.m_data_->iteratordata_.CEs);
+}
+
+/**
+* Get the ordering priority of the previous collation element in the string.
+* @param status the error code status.
+* @return the previous element's ordering. Returns NULLORDER if an error has 
+*         occured or if the start of string has been reached.
+*/
+int32_t CollationElementIterator::previous(UErrorCode& status)
+{
+    return ucol_previous(m_data_, &status);
+}
+
+/**
+* Resets the cursor to the beginning of the string.
+*/
+void CollationElementIterator::reset()
+{
+    ucol_reset(m_data_);
+}
+
+void CollationElementIterator::setOffset(int32_t newOffset, 
+                                         UErrorCode& status)
+{
+    ucol_setOffset(m_data_, newOffset, &status);
+}
+
+/**
+* Sets the source to the new source string.
+*/
+void CollationElementIterator::setText(const UnicodeString& source,
+                                       UErrorCode& status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t length = source.length();
+    UChar *string = NULL;
+    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
+        uprv_free((UChar *)m_data_->iteratordata_.string);
+    }
+    m_data_->isWritable = TRUE;
+    if (length > 0) {
+        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+        /* test for NULL */
+        if (string == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        u_memcpy(string, source.getBuffer(), length);
+    }
+    else {
+        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+        /* test for NULL */
+        if (string == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        *string = 0;
+    }
+    /* Free offsetBuffer before initializing it. */
+    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
+    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
+        &m_data_->iteratordata_, &status);
+
+    m_data_->reset_   = TRUE;
+}
+
+// Sets the source to the new character iterator.
+void CollationElementIterator::setText(CharacterIterator& source, 
+                                       UErrorCode& status)
+{
+    if (U_FAILURE(status)) 
+        return;
+
+    int32_t length = source.getLength();
+    UChar *buffer = NULL;
+
+    if (length == 0) {
+        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+        /* test for NULL */
+        if (buffer == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        *buffer = 0;
+    }
+    else {
+        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+        /* test for NULL */
+        if (buffer == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        /* 
+        Using this constructor will prevent buffer from being removed when
+        string gets removed
+        */
+        UnicodeString string;
+        source.getText(string);
+        u_memcpy(buffer, string.getBuffer(), length);
+    }
+
+    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
+        uprv_free((UChar *)m_data_->iteratordata_.string);
+    }
+    m_data_->isWritable = TRUE;
+    /* Free offsetBuffer before initializing it. */
+    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
+    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
+        &m_data_->iteratordata_, &status);
+    m_data_->reset_   = TRUE;
+}
+
+int32_t CollationElementIterator::strengthOrder(int32_t order) const
+{
+    UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
+    // Mask off the unwanted differences.
+    if (s == UCOL_PRIMARY) {
+        order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
+    }
+    else if (s == UCOL_SECONDARY) {
+        order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
+    }
+
+    return order;
+}
+
+/* CollationElementIterator private constructors/destructors --------------- */
+
+/** 
+* This is the "real" constructor for this class; it constructs an iterator
+* over the source text using the specified collator
+*/
+CollationElementIterator::CollationElementIterator(
+                                               const UnicodeString& sourceText,
+                                               const RuleBasedCollator* order,
+                                               UErrorCode& status)
+                                               : isDataOwned_(TRUE)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t length = sourceText.length();
+    UChar *string = NULL;
+
+    if (length > 0) {
+        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+        /* test for NULL */
+        if (string == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        /* 
+        Using this constructor will prevent buffer from being removed when
+        string gets removed
+        */
+        u_memcpy(string, sourceText.getBuffer(), length);
+    }
+    else {
+        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+        /* test for NULL */
+        if (string == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        *string = 0;
+    }
+    m_data_ = ucol_openElements(order->ucollator, string, length, &status);
+
+    /* Test for buffer overflows */
+    if (U_FAILURE(status)) {
+        return;
+    }
+    m_data_->isWritable = TRUE;
+}
+
+/** 
+* This is the "real" constructor for this class; it constructs an iterator over 
+* the source text using the specified collator
+*/
+CollationElementIterator::CollationElementIterator(
+                                           const CharacterIterator& sourceText,
+                                           const RuleBasedCollator* order,
+                                           UErrorCode& status)
+                                           : isDataOwned_(TRUE)
+{
+    if (U_FAILURE(status))
+        return;
+
+    // **** should I just drop this test? ****
+    /*
+    if ( sourceText.endIndex() != 0 )
+    {
+        // A CollationElementIterator is really a two-layered beast.
+        // Internally it uses a Normalizer to munge the source text into a form 
+        // where all "composed" Unicode characters (such as \u00FC) are split into a 
+        // normal character and a combining accent character.  
+        // Afterward, CollationElementIterator does its own processing to handle
+        // expanding and contracting collation sequences, ignorables, and so on.
+        
+        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
+                                ? Normalizer::NO_OP : order->getDecomposition();
+          
+        text = new Normalizer(sourceText, decomp);
+        if (text == NULL)
+        status = U_MEMORY_ALLOCATION_ERROR;    
+    }
+    */
+    int32_t length = sourceText.getLength();
+    UChar *buffer;
+    if (length > 0) {
+        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
+        /* test for NULL */
+        if (buffer == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        /* 
+        Using this constructor will prevent buffer from being removed when
+        string gets removed
+        */
+        UnicodeString string(buffer, length, length);
+        ((CharacterIterator &)sourceText).getText(string);
+        const UChar *temp = string.getBuffer();
+        u_memcpy(buffer, temp, length);
+    }
+    else {
+        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
+        /* test for NULL */
+        if (buffer == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        *buffer = 0;
+    }
+    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
+
+    /* Test for buffer overflows */
+    if (U_FAILURE(status)) {
+        return;
+    }
+    m_data_->isWritable = TRUE;
+}
+
+/* CollationElementIterator protected methods ----------------------------- */
+
+const CollationElementIterator& CollationElementIterator::operator=(
+                                         const CollationElementIterator& other)
+{
+    if (this != &other)
+    {
+        UCollationElements *ucolelem      = this->m_data_;
+        UCollationElements *otherucolelem = other.m_data_;
+        collIterate        *coliter       = &(ucolelem->iteratordata_);
+        collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
+        int                length         = 0;
+
+        // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
+        // the normalization buffer
+        length = (int)(othercoliter->endp - othercoliter->string);
+
+        ucolelem->reset_         = otherucolelem->reset_;
+        ucolelem->isWritable     = TRUE;
+
+        /* create a duplicate of string */
+        if (length > 0) {
+            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
+            if(coliter->string != NULL) {
+                uprv_memcpy((UChar *)coliter->string, othercoliter->string,
+                    length * U_SIZEOF_UCHAR);
+            } else { // Error: couldn't allocate memory. No copying should be done
+                length = 0;
+            }
+        }
+        else {
+            coliter->string = NULL;
+        }
+
+        /* start and end of string */
+        coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
+
+        /* handle writable buffer here */
+
+        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
+            coliter->writableBuffer = othercoliter->writableBuffer;
+            coliter->writableBuffer.getTerminatedBuffer();
+        }
+
+        /* current position */
+        if (othercoliter->pos >= othercoliter->string && 
+            othercoliter->pos <= othercoliter->endp)
+        {
+            U_ASSERT(coliter->string != NULL);
+            coliter->pos = coliter->string + 
+                (othercoliter->pos - othercoliter->string);
+        }
+        else {
+            coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 
+                (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
+        }
+
+        /* CE buffer */
+        int32_t CEsize;
+        if (coliter->extendCEs) {
+            uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+            CEsize = sizeof(othercoliter->extendCEs);
+            if (CEsize > 0) {
+                othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
+                uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
+            }
+            coliter->toReturn = coliter->extendCEs + 
+                (othercoliter->toReturn - othercoliter->extendCEs);
+            coliter->CEpos    = coliter->extendCEs + CEsize;
+        } else {
+            CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
+            if (CEsize > 0) {
+                uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
+            }
+            coliter->toReturn = coliter->CEs + 
+                (othercoliter->toReturn - othercoliter->CEs);
+            coliter->CEpos    = coliter->CEs + CEsize;
+        }
+
+        if (othercoliter->fcdPosition != NULL) {
+            U_ASSERT(coliter->string != NULL);
+            coliter->fcdPosition = coliter->string + 
+                (othercoliter->fcdPosition 
+                - othercoliter->string);
+        }
+        else {
+            coliter->fcdPosition = NULL;
+        }
+        coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
+        coliter->origFlags   = othercoliter->origFlags;
+        coliter->coll = othercoliter->coll;
+        this->isDataOwned_ = TRUE;
+    }
+
+    return *this;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+/* eof */