summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2023-04-29 14:45:36 -0700
committerScott McMurray <scottmcm@users.noreply.github.com>2023-05-03 22:09:33 -0700
commit8c781b0906209e81f3540d1495becddae9894a25 (patch)
treed081346058cea327d2c659352cc2b52cd98489fb
parent831c9298c8e6542e3ab395216e98aec21f60b470 (diff)
downloadrust-8c781b0906209e81f3540d1495becddae9894a25.tar.gz
Add the basic `ascii::Char` type
-rw-r--r--library/alloc/src/lib.rs1
-rw-r--r--library/alloc/src/string.rs9
-rw-r--r--library/core/src/array/ascii.rs34
-rw-r--r--library/core/src/array/mod.rs1
-rw-r--r--library/core/src/ascii.rs4
-rw-r--r--library/core/src/ascii/ascii_char.rs565
-rw-r--r--library/core/src/char/methods.rs19
-rw-r--r--library/core/src/num/mod.rs11
-rw-r--r--library/core/src/slice/ascii.rs30
-rw-r--r--library/core/src/str/mod.rs11
-rw-r--r--library/std/src/ascii.rs3
-rw-r--r--tests/codegen/ascii-char.rs37
12 files changed, 724 insertions, 1 deletions
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
index a002421aeef..18f25aec5fe 100644
--- a/library/alloc/src/lib.rs
+++ b/library/alloc/src/lib.rs
@@ -101,6 +101,7 @@
#![feature(array_into_iter_constructors)]
#![feature(array_methods)]
#![feature(array_windows)]
+#![feature(ascii_char)]
#![feature(assert_matches)]
#![feature(async_iterator)]
#![feature(coerce_unsized)]
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index cf16a3424a0..b9ef76c109a 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -2527,6 +2527,15 @@ impl<T: fmt::Display + ?Sized> ToString for T {
}
#[cfg(not(no_global_oom_handling))]
+#[unstable(feature = "ascii_char", issue = "110998")]
+impl ToString for core::ascii::Char {
+ #[inline]
+ fn to_string(&self) -> String {
+ self.as_str().to_owned()
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
#[stable(feature = "char_to_string_specialization", since = "1.46.0")]
impl ToString for char {
#[inline]
diff --git a/library/core/src/array/ascii.rs b/library/core/src/array/ascii.rs
new file mode 100644
index 00000000000..6750d7c0711
--- /dev/null
+++ b/library/core/src/array/ascii.rs
@@ -0,0 +1,34 @@
+use crate::ascii;
+
+#[cfg(not(test))]
+impl<const N: usize> [u8; N] {
+ /// Converts this array of bytes into a array of ASCII characters,
+ /// or returns `None` if any of the characters is non-ASCII.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
+ if self.is_ascii() {
+ // SAFETY: Just checked that it's ASCII
+ Some(unsafe { self.as_ascii_unchecked() })
+ } else {
+ None
+ }
+ }
+
+ /// Converts this array of bytes into a array of ASCII characters,
+ /// without checking whether they're valid.
+ ///
+ /// # Safety
+ ///
+ /// Every byte in the array must be in `0..=127`, or else this is UB.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char; N] {
+ let byte_ptr: *const [u8; N] = self;
+ let ascii_ptr = byte_ptr as *const [ascii::Char; N];
+ // SAFETY: The caller promised all the bytes are ASCII
+ unsafe { &*ascii_ptr }
+ }
+}
diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs
index 940558974e6..bdb4c975909 100644
--- a/library/core/src/array/mod.rs
+++ b/library/core/src/array/mod.rs
@@ -17,6 +17,7 @@ use crate::ops::{
};
use crate::slice::{Iter, IterMut};
+mod ascii;
mod drain;
mod equality;
mod iter;
diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs
index 8a4cb78cc7f..6034c8afce9 100644
--- a/library/core/src/ascii.rs
+++ b/library/core/src/ascii.rs
@@ -14,6 +14,10 @@ use crate::iter::FusedIterator;
use crate::ops::Range;
use crate::str::from_utf8_unchecked;
+mod ascii_char;
+#[unstable(feature = "ascii_char", issue = "110998")]
+pub use ascii_char::AsciiChar as Char;
+
/// An iterator over the escaped version of a byte.
///
/// This `struct` is created by the [`escape_default`] function. See its
diff --git a/library/core/src/ascii/ascii_char.rs b/library/core/src/ascii/ascii_char.rs
new file mode 100644
index 00000000000..f093a0990d1
--- /dev/null
+++ b/library/core/src/ascii/ascii_char.rs
@@ -0,0 +1,565 @@
+//! This uses the name `AsciiChar`, even though it's not exposed that way right now,
+//! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
+//! suggestions from rustc if you get anything slightly wrong in here, and overall
+//! helps with clarity as we're also referring to `char` intentionally in here.
+
+use crate::fmt;
+use crate::mem::transmute;
+
+/// One of the 128 Unicode characters from U+0000 through U+007F,
+/// often known as the [ASCII] subset.
+///
+/// Officially, this is the first [block] in Unicode, _Basic Latin_.
+/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
+///
+/// This block was based on older 7-bit character code standards such as
+/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
+///
+/// # When to use this
+///
+/// The main advantage of this subset is that it's always valid UTF-8. As such,
+/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
+/// ones) are O(1): *no* runtime checks are needed.
+///
+/// If you're consuming strings, you should usually handle Unicode and thus
+/// accept `str`s, not limit yourself to `ascii::Char`s.
+///
+/// However, certain formats are intentionally designed to produce ASCII-only
+/// output in order to be 8-bit-clean. In those cases, it can be simpler and
+/// faster to generate `ascii::Char`s instead of dealing with the variable width
+/// properties of general UTF-8 encoded strings, while still allowing the result
+/// to be used freely with other Rust things that deal in general `str`s.
+///
+/// For example, a UUID library might offer a way to produce the string
+/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
+/// allocation yet still allow it to be used as UTF-8 via `as_str` without
+/// paying for validation (or needing `unsafe` code) the way it would if it
+/// were provided as a `[u8; 36]`.
+///
+/// # Layout
+///
+/// This type is guaranteed to have a size and alignment of 1 byte.
+///
+/// # Names
+///
+/// The variants on this type are [Unicode names][NamesList] of the characters
+/// in upper camel case, with a few tweaks:
+/// - For `<control>` characters, the primary alias name is used.
+/// - `LATIN` is dropped, as this block has no non-latin letters.
+/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
+/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
+///
+/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
+/// [block]: https://www.unicode.org/glossary/index.html#block
+/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
+/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
+/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+#[unstable(feature = "ascii_char", issue = "110998")]
+#[repr(u8)]
+pub enum AsciiChar {
+ /// U+0000
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Null = 0,
+ /// U+0001
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ StartOfHeading = 1,
+ /// U+0002
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ StartOfText = 2,
+ /// U+0003
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ EndOfText = 3,
+ /// U+0004
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ EndOfTransmission = 4,
+ /// U+0005
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Enquiry = 5,
+ /// U+0006
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Acknowledge = 6,
+ /// U+0007
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Bell = 7,
+ /// U+0008
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Backspace = 8,
+ /// U+0009
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CharacterTabulation = 9,
+ /// U+000A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LineFeed = 10,
+ /// U+000B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LineTabulation = 11,
+ /// U+000C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ FormFeed = 12,
+ /// U+000D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CarriageReturn = 13,
+ /// U+000E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ ShiftOut = 14,
+ /// U+000F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ ShiftIn = 15,
+ /// U+0010
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DataLinkEscape = 16,
+ /// U+0011
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DeviceControlOne = 17,
+ /// U+0012
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DeviceControlTwo = 18,
+ /// U+0013
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DeviceControlThree = 19,
+ /// U+0014
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DeviceControlFour = 20,
+ /// U+0015
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ NegativeAcknowledge = 21,
+ /// U+0016
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SynchronousIdle = 22,
+ /// U+0017
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ EndOfTransmissionBlock = 23,
+ /// U+0018
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Cancel = 24,
+ /// U+0019
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ EndOfMedium = 25,
+ /// U+001A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Substitute = 26,
+ /// U+001B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Escape = 27,
+ /// U+001C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ InformationSeparatorFour = 28,
+ /// U+001D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ InformationSeparatorThree = 29,
+ /// U+001E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ InformationSeparatorTwo = 30,
+ /// U+001F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ InformationSeparatorOne = 31,
+ /// U+0020
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Space = 32,
+ /// U+0021
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ ExclamationMark = 33,
+ /// U+0022
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ QuotationMark = 34,
+ /// U+0023
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ NumberSign = 35,
+ /// U+0024
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ DollarSign = 36,
+ /// U+0025
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ PercentSign = 37,
+ /// U+0026
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Ampersand = 38,
+ /// U+0027
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Apostrophe = 39,
+ /// U+0028
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LeftParenthesis = 40,
+ /// U+0029
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ RightParenthesis = 41,
+ /// U+002A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Asterisk = 42,
+ /// U+002B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ PlusSign = 43,
+ /// U+002C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Comma = 44,
+ /// U+002D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ HyphenMinus = 45,
+ /// U+002E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ FullStop = 46,
+ /// U+002F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Solidus = 47,
+ /// U+0030
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit0 = 48,
+ /// U+0031
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit1 = 49,
+ /// U+0032
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit2 = 50,
+ /// U+0033
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit3 = 51,
+ /// U+0034
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit4 = 52,
+ /// U+0035
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit5 = 53,
+ /// U+0036
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit6 = 54,
+ /// U+0037
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit7 = 55,
+ /// U+0038
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit8 = 56,
+ /// U+0039
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Digit9 = 57,
+ /// U+003A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Colon = 58,
+ /// U+003B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Semicolon = 59,
+ /// U+003C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LessThanSign = 60,
+ /// U+003D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ EqualsSign = 61,
+ /// U+003E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ GreaterThanSign = 62,
+ /// U+003F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ QuestionMark = 63,
+ /// U+0040
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CommercialAt = 64,
+ /// U+0041
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalA = 65,
+ /// U+0042
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalB = 66,
+ /// U+0043
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalC = 67,
+ /// U+0044
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalD = 68,
+ /// U+0045
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalE = 69,
+ /// U+0046
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalF = 70,
+ /// U+0047
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalG = 71,
+ /// U+0048
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalH = 72,
+ /// U+0049
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalI = 73,
+ /// U+004A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalJ = 74,
+ /// U+004B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalK = 75,
+ /// U+004C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalL = 76,
+ /// U+004D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalM = 77,
+ /// U+004E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalN = 78,
+ /// U+004F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalO = 79,
+ /// U+0050
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalP = 80,
+ /// U+0051
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalQ = 81,
+ /// U+0052
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalR = 82,
+ /// U+0053
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalS = 83,
+ /// U+0054
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalT = 84,
+ /// U+0055
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalU = 85,
+ /// U+0056
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalV = 86,
+ /// U+0057
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalW = 87,
+ /// U+0058
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalX = 88,
+ /// U+0059
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalY = 89,
+ /// U+005A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CapitalZ = 90,
+ /// U+005B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LeftSquareBracket = 91,
+ /// U+005C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ ReverseSolidus = 92,
+ /// U+005D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ RightSquareBracket = 93,
+ /// U+005E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ CircumflexAccent = 94,
+ /// U+005F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LowLine = 95,
+ /// U+0060
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ GraveAccent = 96,
+ /// U+0061
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallA = 97,
+ /// U+0062
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallB = 98,
+ /// U+0063
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallC = 99,
+ /// U+0064
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallD = 100,
+ /// U+0065
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallE = 101,
+ /// U+0066
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallF = 102,
+ /// U+0067
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallG = 103,
+ /// U+0068
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallH = 104,
+ /// U+0069
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallI = 105,
+ /// U+006A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallJ = 106,
+ /// U+006B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallK = 107,
+ /// U+006C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallL = 108,
+ /// U+006D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallM = 109,
+ /// U+006E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallN = 110,
+ /// U+006F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallO = 111,
+ /// U+0070
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallP = 112,
+ /// U+0071
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallQ = 113,
+ /// U+0072
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallR = 114,
+ /// U+0073
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallS = 115,
+ /// U+0074
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallT = 116,
+ /// U+0075
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallU = 117,
+ /// U+0076
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallV = 118,
+ /// U+0077
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallW = 119,
+ /// U+0078
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallX = 120,
+ /// U+0079
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallY = 121,
+ /// U+007A
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ SmallZ = 122,
+ /// U+007B
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ LeftCurlyBracket = 123,
+ /// U+007C
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ VerticalLine = 124,
+ /// U+007D
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ RightCurlyBracket = 125,
+ /// U+007E
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Tilde = 126,
+ /// U+007F
+ #[unstable(feature = "ascii_char_variants", issue = "110998")]
+ Delete = 127,
+}
+
+impl AsciiChar {
+ /// Creates an ascii character from the byte `b`,
+ /// or returns `None` if it's too large.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn from_u8(b: u8) -> Option<Self> {
+ if b <= 127 {
+ // SAFETY: Just checked that `b` is in-range
+ Some(unsafe { Self::from_u8_unchecked(b) })
+ } else {
+ None
+ }
+ }
+
+ /// Creates an ASCII character from the byte `b`,
+ /// without checking whether it's valid.
+ ///
+ /// # Safety
+ ///
+ /// `b` must be in `0..=127`, or else this is UB.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
+ // SAFETY: Our safety precondition is that `b` is in-range.
+ unsafe { transmute(b) }
+ }
+
+ /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
+ /// `'0'`, `'1'`, …, `'9'` respectively.
+ ///
+ /// If `d >= 10`, returns `None`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn digit(d: u8) -> Option<Self> {
+ if d < 10 {
+ // SAFETY: Just checked it's in-range.
+ Some(unsafe { Self::digit_unchecked(d) })
+ } else {
+ None
+ }
+ }
+
+ /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
+ /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
+ ///
+ /// # Safety
+ ///
+ /// This is immediate UB if called with `d > 64`.
+ ///
+ /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
+ /// Notably, it should not be expected to return hex digits, or any other
+ /// reasonable extension of the decimal digits.
+ ///
+ /// (This lose safety condition is intended to simplify soundness proofs
+ /// when writing code using this method, since the implementation doesn't
+ /// need something really specific, not to make those other arguments do
+ /// something useful. It might be tightened before stabilization.)
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const unsafe fn digit_unchecked(d: u8) -> Self {
+ debug_assert!(d < 10);
+
+ // SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
+ // so because `d` must be 64 or less the addition can return at most
+ // 112 (0x70), which doesn't overflow and is within the ASCII range.
+ unsafe {
+ let byte = b'0'.unchecked_add(d);
+ Self::from_u8_unchecked(byte)
+ }
+ }
+
+ /// Gets this ASCII character as a byte.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_u8(self) -> u8 {
+ self as u8
+ }
+
+ /// Gets this ASCII character as a `char` Unicode Scalar Value.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_char(self) -> char {
+ self as u8 as char
+ }
+
+ /// Views this ASCII character as a one-code-unit UTF-8 `str`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_str(&self) -> &str {
+ crate::slice::from_ref(self).as_str()
+ }
+}
+
+impl [AsciiChar] {
+ /// Views this slice of ASCII characters as a UTF-8 `str`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_str(&self) -> &str {
+ let ascii_ptr: *const Self = self;
+ let str_ptr = ascii_ptr as *const str;
+ // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
+ // code unit having the same value as the ASCII byte.
+ unsafe { &*str_ptr }
+ }
+
+ /// Views this slice of ASCII characters as a slice of `u8` bytes.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_bytes(&self) -> &[u8] {
+ self.as_str().as_bytes()
+ }
+}
+
+#[unstable(feature = "ascii_char", issue = "110998")]
+impl fmt::Display for AsciiChar {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ <str as fmt::Display>::fmt(self.as_str(), f)
+ }
+}
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 9bc97ea0bff..27e51054203 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1,5 +1,6 @@
//! impl char {}
+use crate::ascii;
use crate::slice;
use crate::str::from_utf8_unchecked_mut;
use crate::unicode::printable::is_printable;
@@ -1116,6 +1117,24 @@ impl char {
*self as u32 <= 0x7F
}
+ /// Returns `Some` if the value is within the ASCII range,
+ /// or `None` if it's not.
+ ///
+ /// This is preferred to [`Self::is_ascii`] when you're passing the value
+ /// along to something else that can take [`ascii::Char`] rather than
+ /// needing to check again for itself whether the value is in ASCII.
+ #[must_use]
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_ascii(&self) -> Option<ascii::Char> {
+ if self.is_ascii() {
+ // SAFETY: Just checked that this is ASCII.
+ Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
+ } else {
+ None
+ }
+ }
+
/// Makes a copy of the value in its ASCII upper case equivalent.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
index fdd7be625ed..08444421dca 100644
--- a/library/core/src/num/mod.rs
+++ b/library/core/src/num/mod.rs
@@ -472,7 +472,16 @@ impl u8 {
#[rustc_const_stable(feature = "const_u8_is_ascii", since = "1.43.0")]
#[inline]
pub const fn is_ascii(&self) -> bool {
- *self & 128 == 0
+ *self <= 127
+ }
+
+ /// If the value of this byte is within the ASCII range, returns it as an
+ /// [ASCII character](ascii::Char). Otherwise, returns `None`.
+ #[must_use]
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[inline]
+ pub const fn as_ascii(&self) -> Option<ascii::Char> {
+ ascii::Char::from_u8(*self)
}
/// Makes a copy of the value in its ASCII upper case equivalent.
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
index 5e5399acc1b..7bae6692ad4 100644
--- a/library/core/src/slice/ascii.rs
+++ b/library/core/src/slice/ascii.rs
@@ -16,6 +16,36 @@ impl [u8] {
is_ascii(self)
}
+ /// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
+ /// [ASCII characters](`ascii::Char`), otherwise returns `None`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
+ if self.is_ascii() {
+ // SAFETY: Just checked that it's ASCII
+ Some(unsafe { self.as_ascii_unchecked() })
+ } else {
+ None
+ }
+ }
+
+ /// Converts this slice of bytes into a slice of ASCII characters,
+ /// without checking whether they're valid.
+ ///
+ /// # Safety
+ ///
+ /// Every byte in the slice must be in `0..=127`, or else this is UB.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
+ let byte_ptr: *const [u8] = self;
+ let ascii_ptr = byte_ptr as *const [ascii::Char];
+ // SAFETY: The caller promised all the bytes are ASCII
+ unsafe { &*ascii_ptr }
+ }
+
/// Checks that two slices are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index a13107fd0de..66fa9cf6f64 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -16,6 +16,7 @@ mod validations;
use self::pattern::Pattern;
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
+use crate::ascii;
use crate::char::{self, EscapeDebugExtArgs};
use crate::mem;
use crate::slice::{self, SliceIndex};
@@ -2366,6 +2367,16 @@ impl str {
self.as_bytes().is_ascii()
}
+ /// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
+ /// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
+ #[unstable(feature = "ascii_char", issue = "110998")]
+ #[must_use]
+ #[inline]
+ pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
+ // Like in `is_ascii`, we can work on the bytes directly.
+ self.as_bytes().as_ascii()
+ }
+
/// Checks that two strings are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
diff --git a/library/std/src/ascii.rs b/library/std/src/ascii.rs
index c29f015777f..b18ab50de12 100644
--- a/library/std/src/ascii.rs
+++ b/library/std/src/ascii.rs
@@ -16,6 +16,9 @@
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::ascii::{escape_default, EscapeDefault};
+#[unstable(feature = "ascii_char", issue = "110998")]
+pub use core::ascii::Char;
+
/// Extension methods for ASCII-subset only operations.
///
/// Be aware that operations on seemingly non-ASCII characters can sometimes
diff --git a/tests/codegen/ascii-char.rs b/tests/codegen/ascii-char.rs
new file mode 100644
index 00000000000..4167becf5e9
--- /dev/null
+++ b/tests/codegen/ascii-char.rs
@@ -0,0 +1,37 @@
+// compile-flags: -C opt-level=1
+// ignore-debug (the extra assertions get in the way)
+
+#![crate_type = "lib"]
+#![feature(ascii_char)]
+
+use std::ascii::Char as AsciiChar;
+
+// CHECK-LABEL: i8 @unwrap_digit_from_remainder(i32
+#[no_mangle]
+pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar {
+ // CHECK-NOT: icmp
+ // CHECK-NOT: panic
+
+ // CHECK: %[[R:.+]] = urem i32 %v, 10
+ // CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8
+ // CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48
+ // CHECK-NEXT: ret i8 %[[D]]
+
+ // CHECK-NOT: icmp
+ // CHECK-NOT: panic
+ AsciiChar::digit((v % 10) as u8).unwrap()
+}
+
+// CHECK-LABEL: i8 @unwrap_from_masked(i8
+#[no_mangle]
+pub fn unwrap_from_masked(b: u8) -> AsciiChar {
+ // CHECK-NOT: icmp
+ // CHECK-NOT: panic
+
+ // CHECK: %[[M:.+]] = and i8 %b, 127
+ // CHECK-NEXT: ret i8 %[[M]]
+
+ // CHECK-NOT: icmp
+ // CHECK-NOT: panic
+ AsciiChar::from_u8(b & 0x7f).unwrap()
+}