summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Balkissoon <abalkiss@redhat.com>2006-01-25 22:00:43 +0000
committerAnthony Balkissoon <abalkiss@redhat.com>2006-01-25 22:00:43 +0000
commitb0949fe19e1b1db83526b14c62bd94cebe209ed7 (patch)
tree1f8e3d2af28720e7522f3aabcd491d5e6d1009a9
parent4953d6b9346a0de4429ae7a32fae4a38a3129f32 (diff)
downloadclasspath-b0949fe19e1b1db83526b14c62bd94cebe209ed7.tar.gz
2006-01-25 Anthony Balkissoon <abalkiss@redhat.com>
* java/lang/Character.java: (codePointCount(char[], int, int)): New API method. (codePointCount(CharSequence, int, int)): Likewise.
-rw-r--r--ChangeLog6
-rw-r--r--java/lang/Character.java73
2 files changed, 79 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index e26e9b765..2dfbbb6fe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2006-01-25 Anthony Balkissoon <abalkiss@redhat.com>
+
+ * java/lang/Character.java:
+ (codePointCount(char[], int, int)): New API method.
+ (codePointCount(CharSequence, int, int)): Likewise.
+
2006-01-25 Audrius Meskauskas <AudriusA@Bioinformatics.org>
PR 25205
diff --git a/java/lang/Character.java b/java/lang/Character.java
index 3c88ff805..baffe1a1a 100644
--- a/java/lang/Character.java
+++ b/java/lang/Character.java
@@ -1739,6 +1739,79 @@ public final class Character implements Serializable, Comparable
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER))) != 0;
}
+
+ /**
+ * Returns the number of Unicode code points in the specified range of the
+ * given CharSequence. The first char in the range is at position
+ * beginIndex and the last one is at position endIndex - 1. Paired
+ * surrogates (supplementary characters are represented by a pair of chars -
+ * one from the high surrogates and one from the low surrogates)
+ * count as just one code point.
+ * @param seq the CharSequence to inspect
+ * @param beginIndex the beginning of the range
+ * @param endIndex the end of the range
+ * @return the number of Unicode code points in the given range of the
+ * sequence
+ * @throws NullPointerException if seq is null
+ * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
+ * larger than the length of seq, or if beginIndex is greater than endIndex.
+ * @since 1.5
+ */
+ public static int codePointCount(CharSequence seq, int beginIndex,
+ int endIndex)
+ {
+ int len = seq.length();
+ if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
+ throw new IndexOutOfBoundsException();
+
+ int count = 0;
+ for (int i = beginIndex; i < endIndex; i++)
+ {
+ count++;
+ // If there is a pairing, count it only once.
+ if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
+ && isLowSurrogate(seq.charAt(i + 1)))
+ i ++;
+ }
+ return count;
+ }
+
+ /**
+ * Returns the number of Unicode code points in the specified range of the
+ * given char array. The first char in the range is at position
+ * offset and the length of the range is count. Paired surrogates
+ * (supplementary characters are represented by a pair of chars -
+ * one from the high surrogates and one from the low surrogates)
+ * count as just one code point.
+ * @param a the char array to inspect
+ * @param offset the beginning of the range
+ * @param count the length of the range
+ * @return the number of Unicode code points in the given range of the
+ * array
+ * @throws NullPointerException if a is null
+ * @throws IndexOutOfBoundsException if offset or count is negative or if
+ * offset + countendIndex is larger than the length of a.
+ * @since 1.5
+ */
+ public static int codePointCount(char[] a, int offset,
+ int count)
+ {
+ int len = a.length;
+ int end = offset + count;
+ if (offset < 0 || count < 0 || end > len)
+ throw new IndexOutOfBoundsException();
+
+ int counter = 0;
+ for (int i = offset; i < end; i++)
+ {
+ counter++;
+ // If there is a pairing, count it only once.
+ if (isHighSurrogate(a[i]) && (i + 1) < end
+ && isLowSurrogate(a[i + 1]))
+ i ++;
+ }
+ return counter;
+ }
/**
* Determines if a character is a Unicode letter or a Unicode digit. This