Skip to content

SimplifyCFG removes return statement #118855

@ilovepi

Description

@ilovepi

We're seeing a miscompile from Rust, but the IR seems valid, and the bug only occurs while lowering LLVM IR.

I've somewhat reduced the case down, but I haven't run llvm-reduce over it yet.

The folowing diff shows that after simplifycfg we drop the return branch altogether.

❯ diff /tmp/lto.small.ll /tmp/lto.small.bad.strip.ll
1c1
< ; ModuleID = '/tmp/lto.strip.ll'
---
> ; ModuleID = '/tmp/lto.small.ll'
17c17
<   br label %4
---
>   br label %3
19,29c19,23
< 3:                                                ; preds = %4
<   ret void
<
< 4:                                                ; preds = %6, %0
<   %5 = phi i8 [ %7, %6 ], [ -3, %0 ]
<   br i1 false, label %3, label %6
<
< 6:                                                ; preds = %4
<   %7 = add nuw nsw i8 %5, 1
<   call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef %5) #4
<   br label %4
---
> 3:                                                ; preds = %3, %0
>   %4 = phi i8 [ %5, %3 ], [ -3, %0 ]
>   %5 = add nuw nsw i8 %4, 1
>   call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef %4) #4
>   br label %3

For completeness I'm including the IR before and after simplifycfg:

Before simplifycfg

; ModuleID = '/tmp/lto.strip.ll'
source_filename = "lto_hang.577eccc79634c19c-cgu.0"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-fuchsia"

; Function Attrs: nounwind optsize shadowcallstack uwtable
declare hidden fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias nocapture noundef align 8 dereferenceable(24), i8 noundef) unnamed_addr #0

; Function Attrs: noinline nounwind optsize shadowcallstack uwtable
define hidden fastcc void @_RNvCs7vJBeK7brjY_8lto_hang11do_the_test() unnamed_addr #1 {
  %1 = alloca [24 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %1)
  %2 = getelementptr inbounds i8, ptr %1, i64 16
  store i16 -32640, ptr %2, align 8, !alias.scope !6
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %1, i8 0, i64 16, i1 false), !alias.scope !6
  call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef -4) #4
  br label %4

3:                                                ; preds = %4
  ret void

4:                                                ; preds = %6, %0
  %5 = phi i8 [ %7, %6 ], [ -3, %0 ]
  br i1 false, label %3, label %6

6:                                                ; preds = %4
  %7 = add nuw nsw i8 %5, 1
  call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef %5) #4
  br label %4
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3

attributes #0 = { nounwind optsize shadowcallstack uwtable "frame-pointer"="all" "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+crc,+aes,+neon,+fp-armv8,+neon,+fp-armv8,+sha2" }
attributes #1 = { noinline nounwind optsize shadowcallstack uwtable "frame-pointer"="all" "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+crc,+aes,+neon,+fp-armv8,+neon,+fp-armv8,+sha2" }
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { nounwind }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 4, !"EnableSplitLTOUnit", i32 1}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{!"rustc version 1.84.0-nightly (583b25d8d 2024-11-12)"}
!6 = !{!7}
!7 = distinct !{!7, !8, !"_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3new: argument 0"}
!8 = distinct !{!8, !"_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3new"}

After simplifycfg:

; ModuleID = '/tmp/lto.small.ll'
source_filename = "lto_hang.577eccc79634c19c-cgu.0"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-fuchsia"

; Function Attrs: nounwind optsize shadowcallstack uwtable
declare hidden fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias nocapture noundef align 8 dereferenceable(24), i8 noundef) unnamed_addr #0

; Function Attrs: noinline nounwind optsize shadowcallstack uwtable
define hidden fastcc void @_RNvCs7vJBeK7brjY_8lto_hang11do_the_test() unnamed_addr #1 {
  %1 = alloca [24 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %1)
  %2 = getelementptr inbounds i8, ptr %1, i64 16
  store i16 -32640, ptr %2, align 8, !alias.scope !6
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %1, i8 0, i64 16, i1 false), !alias.scope !6
  call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef -4) #4
  br label %3

3:                                                ; preds = %3, %0
  %4 = phi i8 [ %5, %3 ], [ -3, %0 ]
  %5 = add nuw nsw i8 %4, 1
  call fastcc void @_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3add(ptr noalias noundef nonnull align 8 dereferenceable(24) %1, i8 noundef %4) #4
  br label %3
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3

attributes #0 = { nounwind optsize shadowcallstack uwtable "frame-pointer"="all" "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+crc,+aes,+neon,+fp-armv8,+neon,+fp-armv8,+sha2" }
attributes #1 = { noinline nounwind optsize shadowcallstack uwtable "frame-pointer"="all" "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+crc,+aes,+neon,+fp-armv8,+neon,+fp-armv8,+sha2" }
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { nounwind }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 4, !"EnableSplitLTOUnit", i32 1}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{!"rustc version 1.84.0-nightly (583b25d8d 2024-11-12)"}
!6 = !{!7}
!7 = distinct !{!7, !8, !"_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3new: argument 0"}
!8 = distinct !{!8, !"_RNvMCs7vJBeK7brjY_8lto_hangNtB2_4Test3new"}

The after module was generated w/ opt -passes=simplifycfg < before.ll -S

Unfortunately, this issue originates from Rust (Fuchsia's toolchain, which uses a different cadence for release process than upstream), and it isn't clear when this behavior was introduced in the LLVM used by rustc, so we'll probably need to bisect quite a ways back. I did reproduce this with ToT LLVM, though, so its still an issue on the LLVM side.

If no one has a thought on the underlying issue, I'll pick up the investigation again tomorrow, reduce the test further, and try to kick of a bisect to run this down a bit more.

CC: @nikic @tmandry

Metadata

Metadata

Assignees

No one assigned

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions