summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLukas Kalbertodt <lukas.kalbertodt@gmail.com>2017-10-01 21:46:17 +0200
committerLukas Kalbertodt <lukas.kalbertodt@gmail.com>2017-11-03 21:27:40 +0100
commit1916e3c4aad7b0e0de1cfd190819609f55520996 (patch)
tree38dbc0bdb12d7be38420776c4ed4a125f61698ef
parent5a1d11a733b856cfaedd82f1c1ff50b87541692d (diff)
downloadrust-1916e3c4aad7b0e0de1cfd190819609f55520996.tar.gz
Copy `AsciiExt` methods to `str` directly
This is done in order to deprecate AsciiExt eventually. Note that this commit contains a bunch of `cfg(stage0)` statements. This is due to a new compiler feature this commit depends on: the `slice_u8` lang item. Once this lang item is available in the stage0 compiler, all those cfg flags (and more) can be removed.
-rw-r--r--src/liballoc/slice.rs2
-rw-r--r--src/liballoc/str.rs276
-rw-r--r--src/libstd/ascii.rs16
-rw-r--r--src/libsyntax/feature_gate.rs1
4 files changed, 290 insertions, 5 deletions
diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs
index 8268528acbd..7590003a681 100644
--- a/src/liballoc/slice.rs
+++ b/src/liballoc/slice.rs
@@ -1533,7 +1533,7 @@ impl<T> [T] {
}
}
-// TODO(LukasKalbertodt): the `not(stage0)` constraint can be removed in the
+// FIXME(LukasKalbertodt): the `not(stage0)` constraint can be removed in the
// future once the stage0 compiler is new enough to know about the `slice_u8`
// lang item.
#[lang = "slice_u8"]
diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs
index 895607ff8d4..2c257b8c736 100644
--- a/src/liballoc/str.rs
+++ b/src/liballoc/str.rs
@@ -2070,6 +2070,282 @@ impl str {
s.extend((0..n).map(|_| self));
s
}
+
+ /// Checks if all characters in this string are within the ASCII range.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let ascii = "hello!\n";
+ /// let non_ascii = "Grüße, Jürgen ❤";
+ ///
+ /// assert!(ascii.is_ascii());
+ /// assert!(!non_ascii.is_ascii());
+ /// ```
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii(&self) -> bool {
+ // We can treat each byte as character here: all multibyte characters
+ // start with a byte that is not in the ascii range, so we will stop
+ // there already.
+ self.bytes().all(|b| b.is_ascii())
+ }
+
+ /// Returns a copy of this string where each character is mapped to its
+ /// ASCII upper case equivalent.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To uppercase the value in-place, use [`make_ascii_uppercase`].
+ ///
+ /// To uppercase ASCII characters in addition to non-ASCII characters, use
+ /// [`to_uppercase`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let s = "Grüße, Jürgen ❤";
+ ///
+ /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase());
+ /// ```
+ ///
+ /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
+ /// [`to_uppercase`]: #method.to_uppercase
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ #[cfg(not(stage0))]
+ pub fn to_ascii_uppercase(&self) -> String {
+ let mut bytes = self.as_bytes().to_vec();
+ bytes.make_ascii_uppercase();
+ // make_ascii_uppercase() preserves the UTF-8 invariant.
+ unsafe { String::from_utf8_unchecked(bytes) }
+ }
+
+ /// Returns a copy of this string where each character is mapped to its
+ /// ASCII lower case equivalent.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To lowercase the value in-place, use [`make_ascii_lowercase`].
+ ///
+ /// To lowercase ASCII characters in addition to non-ASCII characters, use
+ /// [`to_lowercase`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let s = "Grüße, Jürgen ❤";
+ ///
+ /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase());
+ /// ```
+ ///
+ /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
+ /// [`to_lowercase`]: #method.to_lowercase
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ #[cfg(not(stage0))]
+ pub fn to_ascii_lowercase(&self) -> String {
+ let mut bytes = self.as_bytes().to_vec();
+ bytes.make_ascii_lowercase();
+ // make_ascii_lowercase() preserves the UTF-8 invariant.
+ unsafe { String::from_utf8_unchecked(bytes) }
+ }
+
+ /// Checks that two strings are an ASCII case-insensitive match.
+ ///
+ /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
+ /// but without allocating and copying temporaries.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
+ /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
+ /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
+ /// ```
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ #[cfg(not(stage0))]
+ pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
+ self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
+ }
+
+ /// Converts this string to its ASCII upper case equivalent in-place.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To return a new uppercased value without modifying the existing one, use
+ /// [`to_ascii_uppercase`].
+ ///
+ /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[cfg(not(stage0))]
+ pub fn make_ascii_uppercase(&mut self) {
+ let me = unsafe { self.as_bytes_mut() };
+ me.make_ascii_uppercase()
+ }
+
+ /// Converts this string to its ASCII lower case equivalent in-place.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To return a new lowercased value without modifying the existing one, use
+ /// [`to_ascii_lowercase`].
+ ///
+ /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[cfg(not(stage0))]
+ pub fn make_ascii_lowercase(&mut self) {
+ let me = unsafe { self.as_bytes_mut() };
+ me.make_ascii_lowercase()
+ }
+
+ /// Checks if all characters of this string are ASCII alphabetic
+ /// characters:
+ ///
+ /// - U+0041 'A' ... U+005A 'Z', or
+ /// - U+0061 'a' ... U+007A 'z'.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_alphabetic(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_alphabetic())
+ }
+
+ /// Checks if all characters of this string are ASCII uppercase characters:
+ /// U+0041 'A' ... U+005A 'Z'.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// // Only ascii uppercase characters
+ /// assert!("HELLO".is_ascii_uppercase());
+ ///
+ /// // While all characters are ascii, 'y' and 'e' are not uppercase
+ /// assert!(!"Bye".is_ascii_uppercase());
+ ///
+ /// // While all characters are uppercase, 'Ü' is not ascii
+ /// assert!(!"TSCHÜSS".is_ascii_uppercase());
+ /// ```
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_uppercase(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_uppercase())
+ }
+
+ /// Checks if all characters of this string are ASCII lowercase characters:
+ /// U+0061 'a' ... U+007A 'z'.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// // Only ascii uppercase characters
+ /// assert!("hello".is_ascii_lowercase());
+ ///
+ /// // While all characters are ascii, 'B' is not lowercase
+ /// assert!(!"Bye".is_ascii_lowercase());
+ ///
+ /// // While all characters are lowercase, 'Ü' is not ascii
+ /// assert!(!"tschüss".is_ascii_lowercase());
+ /// ```
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_lowercase(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_lowercase())
+ }
+
+ /// Checks if all characters of this string are ASCII alphanumeric
+ /// characters:
+ ///
+ /// - U+0041 'A' ... U+005A 'Z', or
+ /// - U+0061 'a' ... U+007A 'z', or
+ /// - U+0030 '0' ... U+0039 '9'.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_alphanumeric(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_alphanumeric())
+ }
+
+ /// Checks if all characters of this string are ASCII decimal digit:
+ /// U+0030 '0' ... U+0039 '9'.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_digit(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_digit())
+ }
+
+ /// Checks if all characters of this string are ASCII hexadecimal digits:
+ ///
+ /// - U+0030 '0' ... U+0039 '9', or
+ /// - U+0041 'A' ... U+0046 'F', or
+ /// - U+0061 'a' ... U+0066 'f'.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_hexdigit(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_hexdigit())
+ }
+
+ /// Checks if all characters of this string are ASCII punctuation
+ /// characters:
+ ///
+ /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
+ /// - U+003A ... U+0040 `: ; < = > ? @`, or
+ /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or
+ /// - U+007B ... U+007E `{ | } ~`
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_punctuation(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_punctuation())
+ }
+
+ /// Checks if all characters of this string are ASCII graphic characters:
+ /// U+0021 '@' ... U+007E '~'.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_graphic(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_graphic())
+ }
+
+ /// Checks if all characters of this string are ASCII whitespace characters:
+ /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
+ /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
+ ///
+ /// Rust uses the WhatWG Infra Standard's [definition of ASCII
+ /// whitespace][infra-aw]. There are several other definitions in
+ /// wide use. For instance, [the POSIX locale][pct] includes
+ /// U+000B VERTICAL TAB as well as all the above characters,
+ /// but—from the very same specification—[the default rule for
+ /// "field splitting" in the Bourne shell][bfs] considers *only*
+ /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
+ ///
+ /// If you are writing a program that will process an existing
+ /// file format, check what that format's definition of whitespace is
+ /// before using this function.
+ ///
+ /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
+ /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
+ /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_whitespace(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_whitespace())
+ }
+
+ /// Checks if all characters of this string are ASCII control characters:
+ ///
+ /// - U+0000 NUL ... U+001F UNIT SEPARATOR, or
+ /// - U+007F DELETE.
+ ///
+ /// Note that most ASCII whitespace characters are control
+ /// characters, but SPACE is not.
+ #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+ #[inline]
+ pub fn is_ascii_control(&self) -> bool {
+ self.bytes().all(|b| b.is_ascii_control())
+ }
}
/// Converts a boxed slice of bytes to a boxed string slice without checking
diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs
index 7a474c1f254..200264a2583 100644
--- a/src/libstd/ascii.rs
+++ b/src/libstd/ascii.rs
@@ -298,6 +298,10 @@ pub trait AsciiExt {
fn is_ascii_control(&self) -> bool { unimplemented!(); }
}
+// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is
+// possible once the stage0 compiler is new enough to contain the inherent
+// ascii methods for `[str]`. See FIXME comment further down.
+#[cfg(stage0)]
#[stable(feature = "rust1", since = "1.0.0")]
impl AsciiExt for str {
type Owned = String;
@@ -389,9 +393,9 @@ impl AsciiExt for str {
}
}
-// TODO(LukasKalbertodt): this impl block can be removed in the future. This is
+// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is
// possible once the stage0 compiler is new enough to contain the inherent
-// ascii methods for `[u8]`. See TODO comment further down.
+// ascii methods for `[u8]`. See FIXME comment further down.
#[cfg(stage0)]
#[stable(feature = "rust1", since = "1.0.0")]
impl AsciiExt for [u8] {
@@ -546,12 +550,18 @@ macro_rules! impl_by_delegating {
impl_by_delegating!(u8, u8);
impl_by_delegating!(char, char);
-// TODO(LukasKalbertodt): the macro invocation should replace the impl block
+// FIXME(LukasKalbertodt): the macro invocation should replace the impl block
// for `[u8]` above. But this is not possible until the stage0 compiler is new
// enough to contain the inherent ascii methods for `[u8]`.
#[cfg(not(stage0))]
impl_by_delegating!([u8], Vec<u8>);
+// FIXME(LukasKalbertodt): the macro invocation should replace the impl block
+// for `str` above. But this is not possible until the stage0 compiler is new
+// enough to contain the inherent ascii methods for `str`.
+#[cfg(not(stage0))]
+impl_by_delegating!(str, String);
+
/// An iterator over the escaped version of a byte.
///
/// This `struct` is created by the [`escape_default`] function. See its
diff --git a/src/libsyntax/feature_gate.rs b/src/libsyntax/feature_gate.rs
index 30451ec757a..195aac5292f 100644
--- a/src/libsyntax/feature_gate.rs
+++ b/src/libsyntax/feature_gate.rs
@@ -35,7 +35,6 @@ use visit::{self, FnKind, Visitor};
use parse::ParseSess;
use symbol::Symbol;
-use std::ascii::AsciiExt;
use std::env;
macro_rules! set {