[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

sun-jacobi · 2024-01-30T23:37:56Z

Similar to #78403, but for scalable vwadd(u).wv, given that #76785 is recommited.

Code

define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
    %ret = add <vscale x 8 x i64> %sa, %y
    ret <vscale x 8 x i64> %ret
}

Before this patch

Compiler Explorer

vwadd_wv_mask_v8i32:
        li      a0, 42
        vsetvli a1, zero, e32, m4, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v12, 0
        vmerge.vvm      v24, v12, v8, v0
        vwadd.wv        v8, v16, v24
        ret

After this patch

vwadd_wv_mask_v8i32:
        li a0, 42
        vsetvli a1, zero, e32, m4, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m4, tu, mu
        vwadd.wv v16, v16, v8, v0.t
        vmv8r.v v8, v16
        ret

…).wv.

llvmbot · 2024-01-30T23:38:25Z

@llvm/pr-subscribers-backend-risc-v

Author: Chia (sun-jacobi)

Changes

Similar to #78403, but for scalable vwadd(u).wv, given that #76785 is recommited.

Code

define &lt;vscale x 8 x i64&gt; @<!-- -->vwadd_wv_mask_v8i32(&lt;vscale x 8 x i32&gt; %x, &lt;vscale x 8 x i64&gt; %y) {
    %mask = icmp slt &lt;vscale x 8 x i32&gt; %x, shufflevector (&lt;vscale x 8 x i32&gt; insertelement (&lt;vscale x 8 x i32&gt; poison, i32 42, i64 0), &lt;vscale x 8 x i32&gt; poison, &lt;vscale x 8 x i32&gt; zeroinitializer)
    %a = select &lt;vscale x 8 x i1&gt; %mask, &lt;vscale x 8 x i32&gt; %x, &lt;vscale x 8 x i32&gt; zeroinitializer
    %sa = sext &lt;vscale x 8 x i32&gt; %a to &lt;vscale x 8 x i64&gt;
    %ret = add &lt;vscale x 8 x i64&gt; %sa, %y
    ret &lt;vscale x 8 x i64&gt; %ret
}

Before this patch

Compiler Explorer

vwadd_wv_mask_v8i32:
        li      a0, 42
        vsetvli a1, zero, e32, m4, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v12, 0
        vmerge.vvm      v24, v12, v8, v0
        vwadd.wv        v8, v16, v24
        ret

After this patch

vwadd_wv_mask_v8i32:
        li a0, 42
        vsetvli a1, zero, e32, m4, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m4, tu, mu
        vwadd.wv v16, v16, v8, v0.t
        vmv8r.v v8, v16
        ret

Full diff: https://github.com/llvm/llvm-project/pull/80079.diff

2 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+18-6)
(added) llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll (+90)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 82836346d8832..f63532ea07fab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13776,8 +13776,11 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
 
   SDValue Y = N->getOperand(0);
   SDValue MergeOp = N->getOperand(1);
-  if (MergeOp.getOpcode() != RISCVISD::VMERGE_VL)
+  unsigned MergeOpc = MergeOp.getOpcode();
+
+  if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
     return SDValue();
+
   SDValue X = MergeOp->getOperand(1);
 
   if (!MergeOp.hasOneUse())
@@ -13795,13 +13798,22 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
 
   // False value of MergeOp should be all zeros
   SDValue Z = MergeOp->getOperand(2);
-  if (Z.getOpcode() != ISD::INSERT_SUBVECTOR)
-    return SDValue();
-  if (!ISD::isBuildVectorAllZeros(Z.getOperand(1).getNode()))
-    return SDValue();
-  if (!isNullOrNullSplat(Z.getOperand(0)) && !Z.getOperand(0).isUndef())
+
+  // Scalable vector
+  if (MergeOpc == ISD::VSELECT &&
+      !ISD::isConstantSplatVectorAllZeros(Z.getNode()))
     return SDValue();
 
+  // Fixed-length vector
+  if (MergeOpc == RISCVISD::VMERGE_VL) {
+    if (Z.getOpcode() != ISD::INSERT_SUBVECTOR)
+      return SDValue();
+    if (!ISD::isBuildVectorAllZeros(Z.getOperand(1).getNode()))
+      return SDValue();
+    if (!isNullOrNullSplat(Z.getOperand(0)) && !Z.getOperand(0).isUndef())
+      return SDValue();
+  }
+
   return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
                      {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
                      N->getFlags());
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
new file mode 100644
index 0000000000000..ad7ad991e082c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwadd_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwadd.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = add <vscale x 8 x i64> %sa, %y
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwaddu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwaddu_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwaddu.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = add <vscale x 8 x i64> %sa, %y
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwaddu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vwaddu_vv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vwaddu.vv v16, v8, v12
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+    %ret = add <vscale x 8 x i64> %sa, %sy
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_commutative(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwadd.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = add <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwadd_wv_mask_v8i32_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 1
+; CHECK-NEXT:    vmerge.vvm v24, v12, v8, v0
+; CHECK-NEXT:    vwadd.wv v8, v16, v24
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = add <vscale x 8 x i64> %sa, %y
+    ret <vscale x 8 x i64> %ret
+}

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

lukel97

LGTM

[RISCV][Isel] Remove redundant vmerge for the scalable vector vwadd(u…

c3317fd

…).wv.

llvmbot added the backend:RISC-V label Jan 30, 2024

update vector-interleave.ll.

e034e74

sun-jacobi requested review from lukel97, topperc and yetingk January 31, 2024 03:50

lukel97 reviewed Jan 31, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

unify zero value checking for scalable and fixed-length.

0c67dba

sun-jacobi requested a review from lukel97 January 31, 2024 07:25

lukel97 reviewed Jan 31, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

drop the VMERGE_VL check.

2788180

sun-jacobi requested a review from lukel97 January 31, 2024 07:45

lukel97 approved these changes Jan 31, 2024

View reviewed changes

sun-jacobi merged commit dc5dca1 into llvm:main Jan 31, 2024

sun-jacobi deleted the vwaddw-scalable branch February 2, 2024 02:42

sun-jacobi mentioned this pull request Feb 3, 2024

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

Uh oh!

sun-jacobi commented Jan 30, 2024

Uh oh!

llvmbot commented Jan 30, 2024

Code

Before this patch

After this patch

Uh oh!

Uh oh!

Uh oh!

lukel97 left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv #80079

Uh oh!

Conversation

sun-jacobi commented Jan 30, 2024

Code

Before this patch

After this patch

Uh oh!

llvmbot commented Jan 30, 2024

Code

Before this patch

After this patch

Uh oh!

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants