Skip to content

Commit 79ebf53

Browse files
authored
Rollup merge of #67585 - ranma42:fix/char-is-ascii-codegen, r=Amanieu
Improve `char::is_ascii_*` codegen This PR is an attempt to fix #65127 A couple of warnings: 1. the generated code might be further improved (in LLVM and/or MIR) by emitting better comparison sequences; in particular, this would improve the performance of "complex" checks such as those in `is_ascii_punctuation` 2. the second commit is currently marked "DO NOT MERGE", because it regresses SIMD on `u8` slices; this could likely be fixed by improving the computation/usage of demanded bits in LLVM An alternative approach to remove the code duplication might be the use of macros, but currently most of the duplication is actually in the doc comments, so maybe just keeping the redundancy could be ok
2 parents cd5441f + 4e7aeaf commit 79ebf53

File tree

1 file changed

+40
-10
lines changed

1 file changed

+40
-10
lines changed

src/libcore/char/methods.rs

+40-10
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,10 @@ impl char {
10751075
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
10761076
#[inline]
10771077
pub const fn is_ascii_alphabetic(&self) -> bool {
1078-
self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1078+
match *self {
1079+
'A'..='Z' | 'a'..='z' => true,
1080+
_ => false,
1081+
}
10791082
}
10801083

10811084
/// Checks if the value is an ASCII uppercase character:
@@ -1108,7 +1111,10 @@ impl char {
11081111
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
11091112
#[inline]
11101113
pub const fn is_ascii_uppercase(&self) -> bool {
1111-
self.is_ascii() && (*self as u8).is_ascii_uppercase()
1114+
match *self {
1115+
'A'..='Z' => true,
1116+
_ => false,
1117+
}
11121118
}
11131119

11141120
/// Checks if the value is an ASCII lowercase character:
@@ -1141,7 +1147,10 @@ impl char {
11411147
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
11421148
#[inline]
11431149
pub const fn is_ascii_lowercase(&self) -> bool {
1144-
self.is_ascii() && (*self as u8).is_ascii_lowercase()
1150+
match *self {
1151+
'a'..='z' => true,
1152+
_ => false,
1153+
}
11451154
}
11461155

11471156
/// Checks if the value is an ASCII alphanumeric character:
@@ -1177,7 +1186,10 @@ impl char {
11771186
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
11781187
#[inline]
11791188
pub const fn is_ascii_alphanumeric(&self) -> bool {
1180-
self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1189+
match *self {
1190+
'0'..='9' | 'A'..='Z' | 'a'..='z' => true,
1191+
_ => false,
1192+
}
11811193
}
11821194

11831195
/// Checks if the value is an ASCII decimal digit:
@@ -1210,7 +1222,10 @@ impl char {
12101222
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
12111223
#[inline]
12121224
pub const fn is_ascii_digit(&self) -> bool {
1213-
self.is_ascii() && (*self as u8).is_ascii_digit()
1225+
match *self {
1226+
'0'..='9' => true,
1227+
_ => false,
1228+
}
12141229
}
12151230

12161231
/// Checks if the value is an ASCII hexadecimal digit:
@@ -1246,7 +1261,10 @@ impl char {
12461261
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
12471262
#[inline]
12481263
pub const fn is_ascii_hexdigit(&self) -> bool {
1249-
self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1264+
match *self {
1265+
'0'..='9' | 'A'..='F' | 'a'..='f' => true,
1266+
_ => false,
1267+
}
12501268
}
12511269

12521270
/// Checks if the value is an ASCII punctuation character:
@@ -1283,7 +1301,10 @@ impl char {
12831301
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
12841302
#[inline]
12851303
pub const fn is_ascii_punctuation(&self) -> bool {
1286-
self.is_ascii() && (*self as u8).is_ascii_punctuation()
1304+
match *self {
1305+
'!'..='/' | ':'..='@' | '['..='`' | '{'..='~' => true,
1306+
_ => false,
1307+
}
12871308
}
12881309

12891310
/// Checks if the value is an ASCII graphic character:
@@ -1316,7 +1337,10 @@ impl char {
13161337
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
13171338
#[inline]
13181339
pub const fn is_ascii_graphic(&self) -> bool {
1319-
self.is_ascii() && (*self as u8).is_ascii_graphic()
1340+
match *self {
1341+
'!'..='~' => true,
1342+
_ => false,
1343+
}
13201344
}
13211345

13221346
/// Checks if the value is an ASCII whitespace character:
@@ -1366,7 +1390,10 @@ impl char {
13661390
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
13671391
#[inline]
13681392
pub const fn is_ascii_whitespace(&self) -> bool {
1369-
self.is_ascii() && (*self as u8).is_ascii_whitespace()
1393+
match *self {
1394+
'\t' | '\n' | '\x0C' | '\r' | ' ' => true,
1395+
_ => false,
1396+
}
13701397
}
13711398

13721399
/// Checks if the value is an ASCII control character:
@@ -1401,6 +1428,9 @@ impl char {
14011428
#[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
14021429
#[inline]
14031430
pub const fn is_ascii_control(&self) -> bool {
1404-
self.is_ascii() && (*self as u8).is_ascii_control()
1431+
match *self {
1432+
'\0'..='\x1F' | '\x7F' => true,
1433+
_ => false,
1434+
}
14051435
}
14061436
}

0 commit comments

Comments
 (0)