Skip to content

Commit

Permalink
[RISCV][TTI] Add llvm.vp.select into canSplatOperand. (llvm#117982)
Browse files Browse the repository at this point in the history
The second operand of llvm.vp.select is a splat operand , it can help
llvm.vp.select fold vv instructions to vx instructions.
  • Loading branch information
LiqinWeng authored Dec 2, 2024
1 parent 5248e1d commit ede5709
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2375,6 +2375,7 @@ bool RISCVTTIImpl::canSplatOperand(Instruction *I, int Operand) const {
case Intrinsic::vp_ssub_sat:
case Intrinsic::usub_sat:
case Intrinsic::vp_usub_sat:
case Intrinsic::vp_select:
return Operand == 1;
// These intrinsics are commutative.
case Intrinsic::vp_add:
Expand Down
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5498,3 +5498,86 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret void
}

define void @sink_splat_vp_select_op1(ptr %a, i32 %x, i32 %vl) {
; CHECK-LABEL: sink_splat_vp_select_op1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a4, 1
; CHECK-NEXT: li a3, 42
; CHECK-NEXT: slli a5, a2, 32
; CHECK-NEXT: add a2, a0, a4
; CHECK-NEXT: srli a4, a5, 32
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: .LBB119_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmseq.vx v0, v8, a3
; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bne a0, a2, .LBB119_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
br label %vector.body

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, ptr %a, i64 %index
%load = load <4 x i32>, ptr %0, align 4
%cond = icmp eq <4 x i32> %load, splat (i32 42)
%1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load, i32 %vl)
store <4 x i32> %1, ptr %0, align 4
%index.next = add nuw i64 %index, 4
%2 = icmp eq i64 %index.next, 1024
br i1 %2, label %for.cond.cleanup, label %vector.body

for.cond.cleanup:
ret void
}

define void @sink_splat_vp_select_op2(ptr %a, i32 %x, i32 %vl) {
; CHECK-LABEL: sink_splat_vp_select_op2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: li a1, 42
; CHECK-NEXT: slli a4, a2, 32
; CHECK-NEXT: add a2, a0, a3
; CHECK-NEXT: srli a3, a4, 32
; CHECK-NEXT: .LBB120_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vmseq.vx v0, v9, a1
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bne a0, a2, .LBB120_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
br label %vector.body

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds i32, ptr %a, i64 %index
%load = load <4 x i32>, ptr %0, align 4
%cond = icmp eq <4 x i32> %load, splat (i32 42)
%1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat, i32 %vl)
store <4 x i32> %1, ptr %0, align 4
%index.next = add nuw i64 %index, 4
%2 = icmp eq i64 %index.next, 1024
br i1 %2, label %for.cond.cleanup, label %vector.body

for.cond.cleanup:
ret void
}

0 comments on commit ede5709

Please sign in to comment.