2006-01-25 Anthony Balkissoon <abalkiss@redhat.com>

* java/lang/Character.java: (codePointCount(char[], int, int)): New API method. (codePointCount(CharSequence, int, int)): Likewise.
author: Anthony Balkissoon <abalkiss@redhat.com> 2006-01-25 22:00:43 +0000
committer: Anthony Balkissoon <abalkiss@redhat.com> 2006-01-25 22:00:43 +0000
commit: b0949fe19e1b1db83526b14c62bd94cebe209ed7 (patch)
tree: 1f8e3d2af28720e7522f3aabcd491d5e6d1009a9
parent: 4953d6b9346a0de4429ae7a32fae4a38a3129f32 (diff)
download: classpath-b0949fe19e1b1db83526b14c62bd94cebe209ed7.tar.gz
2 files changed, 79 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index e26e9b765..2dfbbb6fe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2006-01-25  Anthony Balkissoon  <abalkiss@redhat.com>
+
+	* java/lang/Character.java:
+	(codePointCount(char[], int, int)): New API method.
+	(codePointCount(CharSequence, int, int)): Likewise.
+
 2006-01-25  Audrius Meskauskas  <AudriusA@Bioinformatics.org>
 
 	PR 25205
diff --git a/java/lang/Character.java b/java/lang/Character.java
index 3c88ff805..baffe1a1a 100644
--- a/java/lang/Character.java
+++ b/java/lang/Character.java
@@ -1739,6 +1739,79 @@ public final class Character implements Serializable, Comparable
                | (1 << MODIFIER_LETTER)
                | (1 << OTHER_LETTER))) != 0;
   }
+  
+  /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given CharSequence.  The first char in the range is at position
+   * beginIndex and the last one is at position endIndex - 1.  Paired 
+   * surrogates (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param seq the CharSequence to inspect
+   * @param beginIndex the beginning of the range
+   * @param endIndex the end of the range
+   * @return the number of Unicode code points in the given range of the 
+   * sequence
+   * @throws NullPointerException if seq is null
+   * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
+   * larger than the length of seq, or if beginIndex is greater than endIndex.
+   * @since 1.5
+   */
+  public static int codePointCount(CharSequence seq, int beginIndex,
+                                   int endIndex)
+  {
+    int len = seq.length();
+    if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
+      throw new IndexOutOfBoundsException();
+        
+    int count = 0;
+    for (int i = beginIndex; i < endIndex; i++)
+      {
+        count++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
+            && isLowSurrogate(seq.charAt(i + 1)))
+          i ++;
+      }    
+    return count;
+  }
+  
+  /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given char array.  The first char in the range is at position
+   * offset and the length of the range is count.  Paired surrogates
+   * (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param a the char array to inspect
+   * @param offset the beginning of the range
+   * @param count the length of the range
+   * @return the number of Unicode code points in the given range of the 
+   * array
+   * @throws NullPointerException if a is null
+   * @throws IndexOutOfBoundsException if offset or count is negative or if 
+   * offset + countendIndex is larger than the length of a.
+   * @since 1.5
+   */
+  public static int codePointCount(char[] a, int offset,
+                                   int count)
+  {
+    int len = a.length;
+    int end = offset + count;
+    if (offset < 0 || count < 0 || end > len)
+      throw new IndexOutOfBoundsException();
+        
+    int counter = 0;
+    for (int i = offset; i < end; i++)
+      {
+        counter++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(a[i]) && (i + 1) < end
+            && isLowSurrogate(a[i + 1]))
+          i ++;
+      }    
+    return counter;
+  }
 
   /**
    * Determines if a character is a Unicode letter or a Unicode digit. This
author	Anthony Balkissoon <abalkiss@redhat.com>	2006-01-25 22:00:43 +0000
committer	Anthony Balkissoon <abalkiss@redhat.com>	2006-01-25 22:00:43 +0000
commit	b0949fe19e1b1db83526b14c62bd94cebe209ed7 (patch)
tree	1f8e3d2af28720e7522f3aabcd491d5e6d1009a9
parent	4953d6b9346a0de4429ae7a32fae4a38a3129f32 (diff)
download	classpath-b0949fe19e1b1db83526b14c62bd94cebe209ed7.tar.gz