Add the basic `ascii::Char` type

author: Scott McMurray <scottmcm@users.noreply.github.com> 2023-04-29 14:45:36 -0700
committer: Scott McMurray <scottmcm@users.noreply.github.com> 2023-05-03 22:09:33 -0700
commit: 8c781b0906209e81f3540d1495becddae9894a25 (patch)
tree: d081346058cea327d2c659352cc2b52cd98489fb
parent: 831c9298c8e6542e3ab395216e98aec21f60b470 (diff)
download: rust-8c781b0906209e81f3540d1495becddae9894a25.tar.gz
12 files changed, 724 insertions, 1 deletions
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
index a002421aeef..18f25aec5fe 100644
--- a/library/alloc/src/lib.rs
+++ b/library/alloc/src/lib.rs
@@ -101,6 +101,7 @@
 #![feature(array_into_iter_constructors)]
 #![feature(array_methods)]
 #![feature(array_windows)]
+#![feature(ascii_char)]
 #![feature(assert_matches)]
 #![feature(async_iterator)]
 #![feature(coerce_unsized)]
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index cf16a3424a0..b9ef76c109a 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -2527,6 +2527,15 @@ impl<T: fmt::Display + ?Sized> ToString for T {
 }
 
 #[cfg(not(no_global_oom_handling))]
+#[unstable(feature = "ascii_char", issue = "110998")]
+impl ToString for core::ascii::Char {
+    #[inline]
+    fn to_string(&self) -> String {
+        self.as_str().to_owned()
+    }
+}
+
+#[cfg(not(no_global_oom_handling))]
 #[stable(feature = "char_to_string_specialization", since = "1.46.0")]
 impl ToString for char {
     #[inline]
diff --git a/library/core/src/array/ascii.rs b/library/core/src/array/ascii.rs
new file mode 100644
index 00000000000..6750d7c0711
--- /dev/null
+++ b/library/core/src/array/ascii.rs
@@ -0,0 +1,34 @@
+use crate::ascii;
+
+#[cfg(not(test))]
+impl<const N: usize> [u8; N] {
+    /// Converts this array of bytes into a array of ASCII characters,
+    /// or returns `None` if any of the characters is non-ASCII.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
+        if self.is_ascii() {
+            // SAFETY: Just checked that it's ASCII
+            Some(unsafe { self.as_ascii_unchecked() })
+        } else {
+            None
+        }
+    }
+
+    /// Converts this array of bytes into a array of ASCII characters,
+    /// without checking whether they're valid.
+    ///
+    /// # Safety
+    ///
+    /// Every byte in the array must be in `0..=127`, or else this is UB.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char; N] {
+        let byte_ptr: *const [u8; N] = self;
+        let ascii_ptr = byte_ptr as *const [ascii::Char; N];
+        // SAFETY: The caller promised all the bytes are ASCII
+        unsafe { &*ascii_ptr }
+    }
+}
diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs
index 940558974e6..bdb4c975909 100644
--- a/library/core/src/array/mod.rs
+++ b/library/core/src/array/mod.rs
@@ -17,6 +17,7 @@ use crate::ops::{
 };
 use crate::slice::{Iter, IterMut};
 
+mod ascii;
 mod drain;
 mod equality;
 mod iter;
diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs
index 8a4cb78cc7f..6034c8afce9 100644
--- a/library/core/src/ascii.rs
+++ b/library/core/src/ascii.rs
@@ -14,6 +14,10 @@ use crate::iter::FusedIterator;
 use crate::ops::Range;
 use crate::str::from_utf8_unchecked;
 
+mod ascii_char;
+#[unstable(feature = "ascii_char", issue = "110998")]
+pub use ascii_char::AsciiChar as Char;
+
 /// An iterator over the escaped version of a byte.
 ///
 /// This `struct` is created by the [`escape_default`] function. See its
diff --git a/library/core/src/ascii/ascii_char.rs b/library/core/src/ascii/ascii_char.rs
new file mode 100644
index 00000000000..f093a0990d1
--- /dev/null
+++ b/library/core/src/ascii/ascii_char.rs
@@ -0,0 +1,565 @@
+//! This uses the name `AsciiChar`, even though it's not exposed that way right now,
+//! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
+//! suggestions from rustc if you get anything slightly wrong in here, and overall
+//! helps with clarity as we're also referring to `char` intentionally in here.
+
+use crate::fmt;
+use crate::mem::transmute;
+
+/// One of the 128 Unicode characters from U+0000 through U+007F,
+/// often known as the [ASCII] subset.
+///
+/// Officially, this is the first [block] in Unicode, _Basic Latin_.
+/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
+///
+/// This block was based on older 7-bit character code standards such as
+/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
+///
+/// # When to use this
+///
+/// The main advantage of this subset is that it's always valid UTF-8.  As such,
+/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
+/// ones) are O(1): *no* runtime checks are needed.
+///
+/// If you're consuming strings, you should usually handle Unicode and thus
+/// accept `str`s, not limit yourself to `ascii::Char`s.
+///
+/// However, certain formats are intentionally designed to produce ASCII-only
+/// output in order to be 8-bit-clean.  In those cases, it can be simpler and
+/// faster to generate `ascii::Char`s instead of dealing with the variable width
+/// properties of general UTF-8 encoded strings, while still allowing the result
+/// to be used freely with other Rust things that deal in general `str`s.
+///
+/// For example, a UUID library might offer a way to produce the string
+/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
+/// allocation yet still allow it to be used as UTF-8 via `as_str` without
+/// paying for validation (or needing `unsafe` code) the way it would if it
+/// were provided as a `[u8; 36]`.
+///
+/// # Layout
+///
+/// This type is guaranteed to have a size and alignment of 1 byte.
+///
+/// # Names
+///
+/// The variants on this type are [Unicode names][NamesList] of the characters
+/// in upper camel case, with a few tweaks:
+/// - For `<control>` characters, the primary alias name is used.
+/// - `LATIN` is dropped, as this block has no non-latin letters.
+/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
+/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
+///
+/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
+/// [block]: https://www.unicode.org/glossary/index.html#block
+/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
+/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
+/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+#[unstable(feature = "ascii_char", issue = "110998")]
+#[repr(u8)]
+pub enum AsciiChar {
+    /// U+0000
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Null = 0,
+    /// U+0001
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    StartOfHeading = 1,
+    /// U+0002
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    StartOfText = 2,
+    /// U+0003
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    EndOfText = 3,
+    /// U+0004
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    EndOfTransmission = 4,
+    /// U+0005
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Enquiry = 5,
+    /// U+0006
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Acknowledge = 6,
+    /// U+0007
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Bell = 7,
+    /// U+0008
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Backspace = 8,
+    /// U+0009
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CharacterTabulation = 9,
+    /// U+000A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LineFeed = 10,
+    /// U+000B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LineTabulation = 11,
+    /// U+000C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    FormFeed = 12,
+    /// U+000D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CarriageReturn = 13,
+    /// U+000E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    ShiftOut = 14,
+    /// U+000F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    ShiftIn = 15,
+    /// U+0010
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DataLinkEscape = 16,
+    /// U+0011
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DeviceControlOne = 17,
+    /// U+0012
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DeviceControlTwo = 18,
+    /// U+0013
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DeviceControlThree = 19,
+    /// U+0014
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DeviceControlFour = 20,
+    /// U+0015
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    NegativeAcknowledge = 21,
+    /// U+0016
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SynchronousIdle = 22,
+    /// U+0017
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    EndOfTransmissionBlock = 23,
+    /// U+0018
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Cancel = 24,
+    /// U+0019
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    EndOfMedium = 25,
+    /// U+001A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Substitute = 26,
+    /// U+001B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Escape = 27,
+    /// U+001C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    InformationSeparatorFour = 28,
+    /// U+001D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    InformationSeparatorThree = 29,
+    /// U+001E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    InformationSeparatorTwo = 30,
+    /// U+001F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    InformationSeparatorOne = 31,
+    /// U+0020
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Space = 32,
+    /// U+0021
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    ExclamationMark = 33,
+    /// U+0022
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    QuotationMark = 34,
+    /// U+0023
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    NumberSign = 35,
+    /// U+0024
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    DollarSign = 36,
+    /// U+0025
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    PercentSign = 37,
+    /// U+0026
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Ampersand = 38,
+    /// U+0027
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Apostrophe = 39,
+    /// U+0028
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LeftParenthesis = 40,
+    /// U+0029
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    RightParenthesis = 41,
+    /// U+002A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Asterisk = 42,
+    /// U+002B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    PlusSign = 43,
+    /// U+002C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Comma = 44,
+    /// U+002D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    HyphenMinus = 45,
+    /// U+002E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    FullStop = 46,
+    /// U+002F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Solidus = 47,
+    /// U+0030
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit0 = 48,
+    /// U+0031
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit1 = 49,
+    /// U+0032
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit2 = 50,
+    /// U+0033
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit3 = 51,
+    /// U+0034
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit4 = 52,
+    /// U+0035
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit5 = 53,
+    /// U+0036
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit6 = 54,
+    /// U+0037
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit7 = 55,
+    /// U+0038
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit8 = 56,
+    /// U+0039
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Digit9 = 57,
+    /// U+003A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Colon = 58,
+    /// U+003B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Semicolon = 59,
+    /// U+003C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LessThanSign = 60,
+    /// U+003D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    EqualsSign = 61,
+    /// U+003E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    GreaterThanSign = 62,
+    /// U+003F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    QuestionMark = 63,
+    /// U+0040
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CommercialAt = 64,
+    /// U+0041
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalA = 65,
+    /// U+0042
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalB = 66,
+    /// U+0043
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalC = 67,
+    /// U+0044
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalD = 68,
+    /// U+0045
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalE = 69,
+    /// U+0046
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalF = 70,
+    /// U+0047
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalG = 71,
+    /// U+0048
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalH = 72,
+    /// U+0049
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalI = 73,
+    /// U+004A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalJ = 74,
+    /// U+004B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalK = 75,
+    /// U+004C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalL = 76,
+    /// U+004D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalM = 77,
+    /// U+004E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalN = 78,
+    /// U+004F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalO = 79,
+    /// U+0050
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalP = 80,
+    /// U+0051
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalQ = 81,
+    /// U+0052
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalR = 82,
+    /// U+0053
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalS = 83,
+    /// U+0054
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalT = 84,
+    /// U+0055
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalU = 85,
+    /// U+0056
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalV = 86,
+    /// U+0057
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalW = 87,
+    /// U+0058
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalX = 88,
+    /// U+0059
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalY = 89,
+    /// U+005A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CapitalZ = 90,
+    /// U+005B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LeftSquareBracket = 91,
+    /// U+005C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    ReverseSolidus = 92,
+    /// U+005D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    RightSquareBracket = 93,
+    /// U+005E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    CircumflexAccent = 94,
+    /// U+005F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LowLine = 95,
+    /// U+0060
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    GraveAccent = 96,
+    /// U+0061
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallA = 97,
+    /// U+0062
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallB = 98,
+    /// U+0063
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallC = 99,
+    /// U+0064
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallD = 100,
+    /// U+0065
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallE = 101,
+    /// U+0066
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallF = 102,
+    /// U+0067
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallG = 103,
+    /// U+0068
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallH = 104,
+    /// U+0069
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallI = 105,
+    /// U+006A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallJ = 106,
+    /// U+006B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallK = 107,
+    /// U+006C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallL = 108,
+    /// U+006D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallM = 109,
+    /// U+006E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallN = 110,
+    /// U+006F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallO = 111,
+    /// U+0070
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallP = 112,
+    /// U+0071
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallQ = 113,
+    /// U+0072
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallR = 114,
+    /// U+0073
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallS = 115,
+    /// U+0074
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallT = 116,
+    /// U+0075
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallU = 117,
+    /// U+0076
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallV = 118,
+    /// U+0077
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallW = 119,
+    /// U+0078
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallX = 120,
+    /// U+0079
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallY = 121,
+    /// U+007A
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    SmallZ = 122,
+    /// U+007B
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    LeftCurlyBracket = 123,
+    /// U+007C
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    VerticalLine = 124,
+    /// U+007D
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    RightCurlyBracket = 125,
+    /// U+007E
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Tilde = 126,
+    /// U+007F
+    #[unstable(feature = "ascii_char_variants", issue = "110998")]
+    Delete = 127,
+}
+
+impl AsciiChar {
+    /// Creates an ascii character from the byte `b`,
+    /// or returns `None` if it's too large.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn from_u8(b: u8) -> Option<Self> {
+        if b <= 127 {
+            // SAFETY: Just checked that `b` is in-range
+            Some(unsafe { Self::from_u8_unchecked(b) })
+        } else {
+            None
+        }
+    }
+
+    /// Creates an ASCII character from the byte `b`,
+    /// without checking whether it's valid.
+    ///
+    /// # Safety
+    ///
+    /// `b` must be in `0..=127`, or else this is UB.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
+        // SAFETY: Our safety precondition is that `b` is in-range.
+        unsafe { transmute(b) }
+    }
+
+    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
+    /// `'0'`, `'1'`, …, `'9'` respectively.
+    ///
+    /// If `d >= 10`, returns `None`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn digit(d: u8) -> Option<Self> {
+        if d < 10 {
+            // SAFETY: Just checked it's in-range.
+            Some(unsafe { Self::digit_unchecked(d) })
+        } else {
+            None
+        }
+    }
+
+    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
+    /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
+    ///
+    /// # Safety
+    ///
+    /// This is immediate UB if called with `d > 64`.
+    ///
+    /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
+    /// Notably, it should not be expected to return hex digits, or any other
+    /// reasonable extension of the decimal digits.
+    ///
+    /// (This lose safety condition is intended to simplify soundness proofs
+    /// when writing code using this method, since the implementation doesn't
+    /// need something really specific, not to make those other arguments do
+    /// something useful. It might be tightened before stabilization.)
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const unsafe fn digit_unchecked(d: u8) -> Self {
+        debug_assert!(d < 10);
+
+        // SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
+        // so because `d` must be 64 or less the addition can return at most
+        // 112 (0x70), which doesn't overflow and is within the ASCII range.
+        unsafe {
+            let byte = b'0'.unchecked_add(d);
+            Self::from_u8_unchecked(byte)
+        }
+    }
+
+    /// Gets this ASCII character as a byte.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_u8(self) -> u8 {
+        self as u8
+    }
+
+    /// Gets this ASCII character as a `char` Unicode Scalar Value.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_char(self) -> char {
+        self as u8 as char
+    }
+
+    /// Views this ASCII character as a one-code-unit UTF-8 `str`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_str(&self) -> &str {
+        crate::slice::from_ref(self).as_str()
+    }
+}
+
+impl [AsciiChar] {
+    /// Views this slice of ASCII characters as a UTF-8 `str`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_str(&self) -> &str {
+        let ascii_ptr: *const Self = self;
+        let str_ptr = ascii_ptr as *const str;
+        // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
+        // code unit having the same value as the ASCII byte.
+        unsafe { &*str_ptr }
+    }
+
+    /// Views this slice of ASCII characters as a slice of `u8` bytes.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_bytes(&self) -> &[u8] {
+        self.as_str().as_bytes()
+    }
+}
+
+#[unstable(feature = "ascii_char", issue = "110998")]
+impl fmt::Display for AsciiChar {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        <str as fmt::Display>::fmt(self.as_str(), f)
+    }
+}
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 9bc97ea0bff..27e51054203 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1,5 +1,6 @@
 //! impl char {}
 
+use crate::ascii;
 use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::unicode::printable::is_printable;
@@ -1116,6 +1117,24 @@ impl char {
         *self as u32 <= 0x7F
     }
 
+    /// Returns `Some` if the value is within the ASCII range,
+    /// or `None` if it's not.
+    ///
+    /// This is preferred to [`Self::is_ascii`] when you're passing the value
+    /// along to something else that can take [`ascii::Char`] rather than
+    /// needing to check again for itself whether the value is in ASCII.
+    #[must_use]
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_ascii(&self) -> Option<ascii::Char> {
+        if self.is_ascii() {
+            // SAFETY: Just checked that this is ASCII.
+            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
+        } else {
+            None
+        }
+    }
+
     /// Makes a copy of the value in its ASCII upper case equivalent.
     ///
     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
index fdd7be625ed..08444421dca 100644
--- a/library/core/src/num/mod.rs
+++ b/library/core/src/num/mod.rs
@@ -472,7 +472,16 @@ impl u8 {
     #[rustc_const_stable(feature = "const_u8_is_ascii", since = "1.43.0")]
     #[inline]
     pub const fn is_ascii(&self) -> bool {
-        *self & 128 == 0
+        *self <= 127
+    }
+
+    /// If the value of this byte is within the ASCII range, returns it as an
+    /// [ASCII character](ascii::Char).  Otherwise, returns `None`.
+    #[must_use]
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[inline]
+    pub const fn as_ascii(&self) -> Option<ascii::Char> {
+        ascii::Char::from_u8(*self)
     }
 
     /// Makes a copy of the value in its ASCII upper case equivalent.
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
index 5e5399acc1b..7bae6692ad4 100644
--- a/library/core/src/slice/ascii.rs
+++ b/library/core/src/slice/ascii.rs
@@ -16,6 +16,36 @@ impl [u8] {
         is_ascii(self)
     }
 
+    /// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
+    /// [ASCII characters](`ascii::Char`), otherwise returns `None`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
+        if self.is_ascii() {
+            // SAFETY: Just checked that it's ASCII
+            Some(unsafe { self.as_ascii_unchecked() })
+        } else {
+            None
+        }
+    }
+
+    /// Converts this slice of bytes into a slice of ASCII characters,
+    /// without checking whether they're valid.
+    ///
+    /// # Safety
+    ///
+    /// Every byte in the slice must be in `0..=127`, or else this is UB.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
+        let byte_ptr: *const [u8] = self;
+        let ascii_ptr = byte_ptr as *const [ascii::Char];
+        // SAFETY: The caller promised all the bytes are ASCII
+        unsafe { &*ascii_ptr }
+    }
+
     /// Checks that two slices are an ASCII case-insensitive match.
     ///
     /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index a13107fd0de..66fa9cf6f64 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -16,6 +16,7 @@ mod validations;
 use self::pattern::Pattern;
 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
 
+use crate::ascii;
 use crate::char::{self, EscapeDebugExtArgs};
 use crate::mem;
 use crate::slice::{self, SliceIndex};
@@ -2366,6 +2367,16 @@ impl str {
         self.as_bytes().is_ascii()
     }
 
+    /// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
+    /// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
+        // Like in `is_ascii`, we can work on the bytes directly.
+        self.as_bytes().as_ascii()
+    }
+
     /// Checks that two strings are an ASCII case-insensitive match.
     ///
     /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
diff --git a/library/std/src/ascii.rs b/library/std/src/ascii.rs
index c29f015777f..b18ab50de12 100644
--- a/library/std/src/ascii.rs
+++ b/library/std/src/ascii.rs
@@ -16,6 +16,9 @@
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use core::ascii::{escape_default, EscapeDefault};
 
+#[unstable(feature = "ascii_char", issue = "110998")]
+pub use core::ascii::Char;
+
 /// Extension methods for ASCII-subset only operations.
 ///
 /// Be aware that operations on seemingly non-ASCII characters can sometimes
diff --git a/tests/codegen/ascii-char.rs b/tests/codegen/ascii-char.rs
new file mode 100644
index 00000000000..4167becf5e9
--- /dev/null
+++ b/tests/codegen/ascii-char.rs
@@ -0,0 +1,37 @@
+// compile-flags: -C opt-level=1
+// ignore-debug (the extra assertions get in the way)
+
+#![crate_type = "lib"]
+#![feature(ascii_char)]
+
+use std::ascii::Char as AsciiChar;
+
+// CHECK-LABEL: i8 @unwrap_digit_from_remainder(i32
+#[no_mangle]
+pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar {
+    // CHECK-NOT: icmp
+    // CHECK-NOT: panic
+
+    // CHECK: %[[R:.+]] = urem i32 %v, 10
+    // CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8
+    // CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48
+    // CHECK-NEXT: ret i8 %[[D]]
+
+    // CHECK-NOT: icmp
+    // CHECK-NOT: panic
+    AsciiChar::digit((v % 10) as u8).unwrap()
+}
+
+// CHECK-LABEL: i8 @unwrap_from_masked(i8
+#[no_mangle]
+pub fn unwrap_from_masked(b: u8) -> AsciiChar {
+    // CHECK-NOT: icmp
+    // CHECK-NOT: panic
+
+    // CHECK: %[[M:.+]] = and i8 %b, 127
+    // CHECK-NEXT: ret i8 %[[M]]
+
+    // CHECK-NOT: icmp
+    // CHECK-NOT: panic
+    AsciiChar::from_u8(b & 0x7f).unwrap()
+}
author	Scott McMurray <scottmcm@users.noreply.github.com>	2023-04-29 14:45:36 -0700
committer	Scott McMurray <scottmcm@users.noreply.github.com>	2023-05-03 22:09:33 -0700
commit	8c781b0906209e81f3540d1495becddae9894a25 (patch)
tree	d081346058cea327d2c659352cc2b52cd98489fb
parent	831c9298c8e6542e3ab395216e98aec21f60b470 (diff)
download	rust-8c781b0906209e81f3540d1495becddae9894a25.tar.gz