Refactor memchr to allow optimization

bugadani · bugadani · commit 89b8a97aead4 · 2020-09-27T15:10:48.000+02:00
diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
@@ -268,12 +268,14 @@ where
 }
 
 impl SliceContains for u8 {
+    #[inline]
     fn slice_contains(&self, x: &[Self]) -> bool {
         memchr::memchr(*self, x).is_some()
     }
 }
 
 impl SliceContains for i8 {
+    #[inline]
     fn slice_contains(&self, x: &[Self]) -> bool {
         let byte = *self as u8;
         // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`
diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
 // Use truncation.
 const LO_USIZE: usize = LO_U64 as usize;
 const HI_USIZE: usize = HI_U64 as usize;
+const USIZE_BYTES: usize = mem::size_of::<usize>();
 
 /// Returns `true` if `x` contains any zero byte.
 ///
@@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
 }
 
 /// Returns the first index matching the byte `x` in `text`.
+#[inline]
 pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
+    // Fast path for small slices
+    if text.len() < 2 * USIZE_BYTES {
+        return text.iter().position(|elt| *elt == x);
+    }
+
+    memchr_general_case(x, text)
+}
+
+fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
     // Scan for a single byte value by reading two `usize` words at a time.
     //
     // Split `text` in three parts
     // - unaligned initial part, before the first word aligned address in text
     // - body, scan by 2 words at a time
     // - the last remaining part, < 2 word size
+
+    // search up to an aligned boundary
     let len = text.len();
     let ptr = text.as_ptr();
-    let usize_bytes = mem::size_of::<usize>();
+    let mut offset = ptr.align_offset(USIZE_BYTES);
 
-    // search up to an aligned boundary
-    let mut offset = ptr.align_offset(usize_bytes);
     if offset > 0 {
         offset = cmp::min(offset, len);
         if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
 
     // search the body of the text
     let repeated_x = repeat_byte(x);
+    while offset <= len - 2 * USIZE_BYTES {
+        unsafe {
+            let u = *(ptr.add(offset) as *const usize);
+            let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
 
-    if len >= 2 * usize_bytes {
-        while offset <= len - 2 * usize_bytes {
-            unsafe {
-                let u = *(ptr.add(offset) as *const usize);
-                let v = *(ptr.add(offset + usize_bytes) as *const usize);
-
-                // break if there is a matching byte
-                let zu = contains_zero_byte(u ^ repeated_x);
-                let zv = contains_zero_byte(v ^ repeated_x);
-                if zu || zv {
-                    break;
-                }
+            // break if there is a matching byte
+            let zu = contains_zero_byte(u ^ repeated_x);
+            let zv = contains_zero_byte(v ^ repeated_x);
+            if zu || zv {
+                break;
             }
-            offset += usize_bytes * 2;
         }
+        offset += USIZE_BYTES * 2;
     }
 
     // Find the byte after the point the body loop stopped.
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
@@ -1633,6 +1633,7 @@ impl<T> [T] {
     /// assert!(!v.iter().any(|e| e == "hi"));
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[inline]
     pub fn contains(&self, x: &T) -> bool
     where
         T: PartialEq,
diff --git a/src/test/codegen/issue-75659.rs b/src/test/codegen/issue-75659.rs
@@ -0,0 +1,62 @@
+// This test checks that the call to memchr/slice_contains is optimized away
+// when searching in small slices.
+
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @foo1
+#[no_mangle]
+pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo2
+#[no_mangle]
+pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo3
+#[no_mangle]
+pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo4
+#[no_mangle]
+pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8
+#[no_mangle]
+pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8_i8
+#[no_mangle]
+pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    !data.contains(&x)
+}
+
+// Check that the general case isn't inlined
+// CHECK-LABEL: @foo80
+#[no_mangle]
+pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
+    // CHECK: call core::slice::memchr
+    data.contains(&x)
+}

Original file line number	Diff line number	Diff line change
`@@ -268,12 +268,14 @@ where`
`268`	`268`	`}`
`269`	`269`
`270`	`270`	`impl SliceContains for u8 {`
	`271`	`+ #[inline]`
`271`	`272`	`fn slice_contains(&self, x: &[Self]) -> bool {`
`272`	`273`	`memchr::memchr(*self, x).is_some()`
`273`	`274`	`}`
`274`	`275`	`}`
`275`	`276`
`276`	`277`	`impl SliceContains for i8 {`
	`278`	`+ #[inline]`
`277`	`279`	`fn slice_contains(&self, x: &[Self]) -> bool {`
`278`	`280`	`let byte = *self as u8;`
`279`	`281`	// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`