From 48660477e5d836785d6e97eac10f90a9a6a48389 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jul 2024 13:34:15 +0800 Subject: [PATCH] [RISCV] Use VP strided load in concat_vectors combine After #98112 and #98111 this should be the last use of riscv_masked_strided_load. The diff is due to vp_load not having the same generic combine for bitcasts where `(conv (load x)) -> (load (conv*)x)`. I don't think this makes much of a difference on RVV, and it doesn't seem to affect other patterns. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 +++++---------- .../rvv/fixed-vectors-strided-load-combine.ll | 18 +++++++++--------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7972b9abc456c..a6f2641b48004 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16205,18 +16205,10 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, if (MustNegateStride) Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); - SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); - SDValue IntID = - DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, - Subtarget.getXLenVT()); - SDValue AllOneMask = DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, DAG.getConstant(1, DL, MVT::i1)); - SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), - BaseLd->getBasePtr(), Stride, AllOneMask}; - uint64_t MemSize; if (auto *ConstStride = dyn_cast(Stride); ConstStride && ConstStride->getSExtValue() >= 0) @@ -16232,8 +16224,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, Align); - SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, - Ops, WideVecVT, MMO); + SDValue StridedLoad = DAG.getStridedLoadVP( + WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride, + AllOneMask, + DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO); + for (SDValue Ld : N->ops()) DAG.makeEquivalentMemoryOrdering(cast(Ld), StridedLoad); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 0e1105848440a..cdf0d35843620 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -9,9 +9,9 @@ define void @widen_2xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: widen_2xv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 8 @@ -52,9 +52,9 @@ define void @widen_3xv4i16(ptr %x, ptr %z) { define void @widen_4xv4i16(ptr %x, ptr %z) { ; CHECK-LABEL: widen_4xv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 8 @@ -90,9 +90,9 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; ; RV64-MISALIGN-LABEL: widen_4xv4i16_unaligned: ; RV64-MISALIGN: # %bb.0: -; RV64-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-MISALIGN-NEXT: vle16.v v8, (a0) -; RV64-MISALIGN-NEXT: vse16.v v8, (a1) +; RV64-MISALIGN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-MISALIGN-NEXT: vle64.v v8, (a0) +; RV64-MISALIGN-NEXT: vse64.v v8, (a1) ; RV64-MISALIGN-NEXT: ret %a = load <4 x i16>, ptr %x, align 1 %b.gep = getelementptr i8, ptr %x, i64 8