diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 63493eb78c451..7b31eba9db638 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -5437,6 +5437,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, UsedAssumedInformation, AA::Interprocedural); + A.getOrCreateAAFor( + IRPosition::value(*LI->getPointerOperand())); continue; } if (auto *CI = dyn_cast(&I)) { @@ -5446,6 +5448,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { } if (auto *SI = dyn_cast(&I)) { A.getOrCreateAAFor(IRPosition::value(*SI)); + A.getOrCreateAAFor( + IRPosition::value(*SI->getPointerOperand())); continue; } if (auto *FI = dyn_cast(&I)) { diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll index fda72a6e31a0c..518ed97f42bc1 100644 --- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll @@ -116,6 +116,7 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized ; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5) ; CHECK-NEXT: ret i1 false ; ; diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index e45f746cbf439..f6bddcc4b72ba 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -267,10 +267,9 @@ define void @neg_empty_2() "kernel" { define void @pos_constant_loads() "kernel" { ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads ; CHECK-SAME: () #[[ATTR4]] { -; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspacecast (ptr addrspace(4) @GPtr4 to ptr), align 8 -; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4 -; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr -; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4 +; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4 ; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]] ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] ; CHECK-NEXT: call void @useI32(i32 [[E]]) @@ -301,7 +300,7 @@ define void @neg_loads() "kernel" { ; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4 ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @GS to ptr), align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4 ; CHECK-NEXT: call void @aligned_barrier() ; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4 ; CHECK-NEXT: call void @aligned_barrier() @@ -333,9 +332,8 @@ define void @pos_priv_mem() "kernel" { ; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[LOC]], align 4 -; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4 -; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr -; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4 +; CHECK-NEXT: store i32 [[B]], ptr addrspace(5) [[ARG]], align 4 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[LOC]], align 4 ; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4 ; CHECK-NEXT: ret void @@ -368,7 +366,7 @@ define void @neg_mem() "kernel" { ; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4 ; CHECK-NEXT: fence release ; CHECK-NEXT: call void @aligned_barrier() -; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4 ; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4 ; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll index 7a5bba75f6c48..d81f34f3c4273 100644 --- a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll +++ b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll @@ -22,7 +22,7 @@ define internal void @func() { ; CHECK-LABEL: define {{[^@]+}}@func ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load ptr, ptr null, align 4294967296 +; CHECK-NEXT: [[I:%.*]] = load ptr, ptr addrspace(5) null, align 4294967296 ; CHECK-NEXT: store i64 0, ptr [[I]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll index 03832b32c0b9a..fd4f3012e3f2b 100644 --- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll +++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll @@ -60,11 +60,11 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]] ; CHECK: region.guarded.i: ; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32 -; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16 +; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16 ; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]] ; CHECK: _Z3fooi.internalized.exit: ; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8 +; CHECK-NEXT: store ptr addrspace(3) @i_shared, ptr [[CAPTURED_VARS_ADDRS_I]], align 8 ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: call void @__kmpc_target_deinit() @@ -140,8 +140,8 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde ; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16 -; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8 +; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16 +; CHECK-NEXT: store ptr addrspace(3) @i.i_shared, ptr [[CAPTURED_VARS_ADDRS_I]], align 8 ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: call void @__kmpc_target_deinit() diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll index 2ac0ae5b307e4..ebe99d856d793 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll @@ -27,21 +27,22 @@ define i32 @fputs() { define internal i32 @__kmpc_target_init(ptr %0) { ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init ; AMDGPU-SAME: (ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { -; AMDGPU-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2 -; AMDGPU-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 2 -; AMDGPU-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 -; AMDGPU-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 -; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false -; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]] -; AMDGPU: 7: -; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648 -; AMDGPU-NEXT: br label [[TMP8]] +; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1) +; AMDGPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2 +; AMDGPU-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 2 +; AMDGPU-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; AMDGPU-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false +; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]] ; AMDGPU: 8: -; AMDGPU-NEXT: br label [[TMP10:%.*]] +; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648 +; AMDGPU-NEXT: br label [[TMP9]] ; AMDGPU: 9: -; AMDGPU-NEXT: unreachable +; AMDGPU-NEXT: br label [[TMP11:%.*]] ; AMDGPU: 10: +; AMDGPU-NEXT: unreachable +; AMDGPU: 11: ; AMDGPU-NEXT: ret i32 0 ; %2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2