diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 20f3b717f8b1d8..9a5a48333904c6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2375,6 +2375,7 @@ bool RISCVTTIImpl::canSplatOperand(Instruction *I, int Operand) const { case Intrinsic::vp_ssub_sat: case Intrinsic::usub_sat: case Intrinsic::vp_usub_sat: + case Intrinsic::vp_select: return Operand == 1; // These intrinsics are commutative. case Intrinsic::vp_add: diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index c91b02e8f15e47..197ba085c03599 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5498,3 +5498,86 @@ vector.body: ; preds = %vector.body, %entry for.cond.cleanup: ; preds = %vector.body ret void } + +define void @sink_splat_vp_select_op1(ptr %a, i32 %x, i32 %vl) { +; CHECK-LABEL: sink_splat_vp_select_op1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a4, 1 +; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: slli a5, a2, 32 +; CHECK-NEXT: add a2, a0, a4 +; CHECK-NEXT: srli a4, a5, 32 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: .LBB119_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmseq.vx v0, v8, a3 +; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bne a0, a2, .LBB119_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, ptr %a, i64 %index + %load = load <4 x i32>, ptr %0, align 4 + %cond = icmp eq <4 x i32> %load, splat (i32 42) + %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load, i32 %vl) + store <4 x i32> %1, ptr %0, align 4 + %index.next = add nuw i64 %index, 4 + %2 = icmp eq i64 %index.next, 1024 + br i1 %2, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +} + +define void @sink_splat_vp_select_op2(ptr %a, i32 %x, i32 %vl) { +; CHECK-LABEL: sink_splat_vp_select_op2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: slli a4, a2, 32 +; CHECK-NEXT: add a2, a0, a3 +; CHECK-NEXT: srli a3, a4, 32 +; CHECK-NEXT: .LBB120_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vmseq.vx v0, v9, a1 +; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bne a0, a2, .LBB120_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, ptr %a, i64 %index + %load = load <4 x i32>, ptr %0, align 4 + %cond = icmp eq <4 x i32> %load, splat (i32 42) + %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat, i32 %vl) + store <4 x i32> %1, ptr %0, align 4 + %index.next = add nuw i64 %index, 4 + %2 = icmp eq i64 %index.next, 1024 + br i1 %2, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +}