diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b8ed1dba6303e..a1804848221dd 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24608,54 +24608,6 @@ Examples: llvm.experimental.vp.splice(, , 1, 2, 3); ==> index llvm.experimental.vp.splice(, , -2, 3, 2); ==> trailing elements - -.. _int_experimental_vp_splat: - - -'``llvm.experimental.vp.splat``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" -This is an overloaded intrinsic. - -:: - - declare <2 x double> @llvm.experimental.vp.splat.v2f64(double %scalar, <2 x i1> %mask, i32 %evl) - declare @llvm.experimental.vp.splat.nxv4i32(i32 %scalar, %mask, i32 %evl) - -Overview: -""""""""" - -The '``llvm.experimental.vp.splat.*``' intrinsic is to create a predicated splat -with specific effective vector length. - -Arguments: -"""""""""" - -The result is a vector and it is a splat of the first scalar argument. The -second argument ``mask`` is a vector mask and has the same number of elements as -the result. The third argument is the explicit vector length of the operation. - -Semantics: -"""""""""" - -This intrinsic splats a vector with ``evl`` elements of a scalar argument. -The lanes in the result vector disabled by ``mask`` are ``poison``. The -elements past ``evl`` are poison. - -Examples: -""""""""" - -.. code-block:: llvm - - %r = call <4 x float> @llvm.vp.splat.v4f32(float %a, <4 x i1> %mask, i32 %evl) - ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r - %e = insertelement <4 x float> poison, float %a, i32 0 - %s = shufflevector <4 x float> %e, <4 x float> poison, <4 x i32> zeroinitializer - %also.r = select <4 x i1> %mask, <4 x float> %s, <4 x float> poison - - .. _int_experimental_vp_reverse: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index c3c4718c3548f..35a4158a56da9 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2505,13 +2505,6 @@ def int_experimental_vp_reverse: llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; -def int_experimental_vp_splat: - DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMVectorElementType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; - def int_vp_is_fpclass: DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [ llvm_anyvector_ty, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index cd7343ff8df56..0b0c744487b92 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -748,13 +748,6 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE) ///// } Shuffles -// llvm.vp.splat(val,mask,vlen) -BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_splat, 1, 2) -BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_SPLAT, -1, experimental_vp_splat, 1, 2) -VP_PROPERTY_NO_FUNCTIONAL -HELPER_MAP_VPID_TO_VPSD(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT) -END_REGISTER_VP(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT) - #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC #undef BEGIN_REGISTER_VP_SDNODE diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 08c6b90cc8a74..b9377fabb8634 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -145,7 +145,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntRes_ScalarOp(N); break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; @@ -2008,7 +2007,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntOp_ScalarOp(N); break; case ISD::VSELECT: @@ -2363,9 +2361,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT) - return SDValue( - DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0); // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, // so just promote the operand in place. @@ -5456,7 +5451,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; - case ISD::EXPERIMENTAL_VP_SPLAT: case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; @@ -6100,10 +6094,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { EVT NOutElemVT = NOutVT.getVectorElementType(); SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); - if (N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1), - N->getOperand(2)); - return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ede522eff6df3..79384de151780 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -990,7 +990,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { bool SplitSETCC = false); void SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1143,7 +1142,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); - SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6e1e02f38113e..da3102d30e153 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1217,7 +1217,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; - case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); @@ -2191,16 +2190,6 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, } } -void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); - auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); - auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi); -} - void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -4875,7 +4864,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = WidenVecRes_ScalarOp(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; @@ -6587,9 +6575,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); } @@ -7132,10 +7117,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_FP_TO_XINT_SAT(N); break; - case ISD::EXPERIMENTAL_VP_SPLAT: - Res = WidenVecOp_VP_SPLAT(N, OpNo); - break; - case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -7655,13 +7636,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { report_fatal_error("Unable to widen vector store"); } -SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) { - assert(OpNo == 1 && "Can widen only mask operand of vp_splat"); - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), GetWidenedVector(N->getOperand(1)), - N->getOperand(2)); -} - SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of vp_store"); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 23a4d1b5c615e..281cbd4388b58 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -697,9 +697,6 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams( VPFunc = Intrinsic::getOrInsertDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); break; - case Intrinsic::experimental_vp_splat: - VPFunc = Intrinsic::getOrInsertDeclaration(M, VPID, ReturnType); - break; } assert(VPFunc && "Could not declare VP intrinsic"); return VPFunc; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 36416f9199f71..c595908fdedb7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -89,7 +89,7 @@ static cl::opt // TODO: Support more ops static const unsigned ZvfbfaVPOps[] = { - ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN, ISD::EXPERIMENTAL_VP_SPLAT}; + ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN}; static const unsigned ZvfbfaOps[] = { ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR, ISD::FADD, ISD::FSUB, ISD::FMUL}; @@ -795,8 +795,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, - ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF, - ISD::EXPERIMENTAL_VP_SPLAT}; + ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -811,7 +810,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM, - ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; + ISD::VP_REDUCE_FMAXIMUM}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -905,7 +904,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); - setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom); setOperationPromotedToType( ISD::VECTOR_SPLICE, VT, @@ -1250,12 +1248,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, - ISD::EXTRACT_VECTOR_ELT}, + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXTRACT_VECTOR_ELT}, VT, Custom); else - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, - EltVT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, @@ -1303,7 +1299,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); - setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Legal); setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); @@ -8892,8 +8887,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerVPSpliceExperimental(Op, DAG); case ISD::EXPERIMENTAL_VP_REVERSE: return lowerVPReverseExperimental(Op, DAG); - case ISD::EXPERIMENTAL_VP_SPLAT: - return lowerVPSplatExperimental(Op, DAG); case ISD::CLEAR_CACHE: { assert(getTargetMachine().getTargetTriple().isOSLinux() && "llvm.clear_cache only needs custom lower on Linux targets"); @@ -14113,47 +14106,6 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, return convertFromScalableVector(VT, Result, DAG, Subtarget); } -SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - SDValue Val = Op.getOperand(0); - SDValue Mask = Op.getOperand(1); - SDValue VL = Op.getOperand(2); - MVT VT = Op.getSimpleValueType(); - - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(VT); - MVT MaskVT = getMaskTypeFor(ContainerVT); - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - } - - SDValue Result; - if (VT.getScalarType() == MVT::i1) { - if (auto *C = dyn_cast(Val)) { - Result = - DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL, - ContainerVT, VL); - } else { - MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8); - SDValue LHS = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT), - DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL); - SDValue RHS = DAG.getConstant(0, DL, WidenVT); - Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, - {LHS, RHS, DAG.getCondCode(ISD::SETNE), - DAG.getUNDEF(ContainerVT), Mask, VL}); - } - } else { - Result = - lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget); - } - - if (!VT.isFixedLengthVector()) - return Result; - return convertFromScalableVector(VT, Result, DAG, Subtarget); -} - SDValue RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8a55a5634452c..2975e7d985e32 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -557,7 +557,6 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPMergeMask(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index afc2f2c9cd07b..ab49bc4263a71 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1622,16 +1622,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return Cost; } - case Intrinsic::experimental_vp_splat: { - auto LT = getTypeLegalizationCost(RetTy); - // TODO: Lower i1 experimental_vp_splat - if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1) - return InstructionCost::getInvalid(); - return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint() - ? RISCV::VFMV_V_F - : RISCV::VMV_V_X, - LT.second, CostKind); - } case Intrinsic::experimental_vp_splice: { // To support type-based query from vectorizer, set the index to 0. // Note that index only change the cost from vslide.vx to vslide.vi and in @@ -3416,11 +3406,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands( if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) continue; - // We are looking for a splat/vp.splat that can be sunk. - bool IsVPSplat = match(Op, m_Intrinsic( - m_Value(), m_Value(), m_Value())); - if (!IsVPSplat && - !match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()), + // We are looking for a splat that can be sunk. + if (!match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()), m_Value(), m_ZeroMask()))) continue; @@ -3437,16 +3424,11 @@ bool RISCVTTIImpl::isProfitableToSinkOperands( } // Sink any fpexts since they might be used in a widening fp pattern. - if (IsVPSplat) { - if (isa(Op->getOperand(0))) - Ops.push_back(&Op->getOperandUse(0)); - } else { - Use *InsertEltUse = &Op->getOperandUse(0); - auto *InsertElt = cast(InsertEltUse); - if (isa(InsertElt->getOperand(1))) - Ops.push_back(&InsertElt->getOperandUse(1)); - Ops.push_back(InsertEltUse); - } + Use *InsertEltUse = &Op->getOperandUse(0); + auto *InsertElt = cast(InsertEltUse); + if (isa(InsertElt->getOperand(1))) + Ops.push_back(&InsertElt->getOperandUse(1)); + Ops.push_back(InsertEltUse); Ops.push_back(&OpIdx.value()); } return true; diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index ba792d8f0955b..9e8f727978001 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -1579,157 +1579,6 @@ define void @vp_fdiv(){ ret void } -define void @splat() { -; CHECK-LABEL: 'splat' -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) - call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) - call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) - call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) - call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) - call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) - call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) - call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) - call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) - call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) - call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) - call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) - call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) - call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) - call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) - call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) - call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) - call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) - call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) - call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) - call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) - call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) - call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) - call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) - call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) - call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) - call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) - call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) - call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) - call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) - call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) - call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) - call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) - call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) - call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) - call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) - call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) - ret void -} - define void @splice() { ; CHECK-LABEL: 'splice' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call @llvm.experimental.vp.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1, zeroinitializer, i32 poison, i32 poison) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll deleted file mode 100644 index 7901f8c290543..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll +++ /dev/null @@ -1,636 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH_RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH_RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFBFA_RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA,ZVFBFA_RV64 - -define <1 x i8> @vp_splat_v1i8(i8 %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <1 x i8> @llvm.experimental.vp.splat.v1i8(i8 %val, <1 x i1> %m, i32 %evl) - ret <1 x i8> %splat -} - -define <2 x i8> @vp_splat_v2i8(i8 %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 %val, <2 x i1> %m, i32 %evl) - ret <2 x i8> %splat -} - -define <4 x i8> @vp_splat_v4i8(i8 %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 %val, <4 x i1> %m, i32 %evl) - ret <4 x i8> %splat -} - -define <8 x i8> @vp_splat_v8i8(i8 %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 %val, <8 x i1> %m, i32 %evl) - ret <8 x i8> %splat -} - -define <16 x i8> @vp_splat_v16i8(i8 %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 %val, <16 x i1> %m, i32 %evl) - ret <16 x i8> %splat -} - -define <32 x i8> @vp_splat_v32i8(i8 %val, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v32i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <32 x i8> @llvm.experimental.vp.splat.v32i8(i8 %val, <32 x i1> %m, i32 %evl) - ret <32 x i8> %splat -} - -define <64 x i8> @vp_splat_v64i8(i8 %val, <64 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v64i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <64 x i8> @llvm.experimental.vp.splat.v64i8(i8 %val, <64 x i1> %m, i32 %evl) - ret <64 x i8> %splat -} - -define <1 x i16> @vp_splat_v1i16(i16 %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <1 x i16> @llvm.experimental.vp.splat.v1i16(i16 %val, <1 x i1> %m, i32 %evl) - ret <1 x i16> %splat -} - -define <2 x i16> @vp_splat_v2i16(i16 %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 %val, <2 x i1> %m, i32 %evl) - ret <2 x i16> %splat -} - -define <4 x i16> @vp_splat_v4i16(i16 %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 %val, <4 x i1> %m, i32 %evl) - ret <4 x i16> %splat -} - -define <8 x i16> @vp_splat_v8i16(i16 %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 %val, <8 x i1> %m, i32 %evl) - ret <8 x i16> %splat -} - -define <16 x i16> @vp_splat_v16i16(i16 %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 %val, <16 x i1> %m, i32 %evl) - ret <16 x i16> %splat -} - -define <32 x i16> @vp_splat_v32i16(i16 %val, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v32i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <32 x i16> @llvm.experimental.vp.splat.v32i16(i16 %val, <32 x i1> %m, i32 %evl) - ret <32 x i16> %splat -} - -define <1 x i32> @vp_splat_v1i32(i32 %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <1 x i32> @llvm.experimental.vp.splat.v1i32(i32 %val, <1 x i1> %m, i32 %evl) - ret <1 x i32> %splat -} - -define <2 x i32> @vp_splat_v2i32(i32 %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 %val, <2 x i1> %m, i32 %evl) - ret <2 x i32> %splat -} - -define <4 x i32> @vp_splat_v4i32(i32 %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 %val, <4 x i1> %m, i32 %evl) - ret <4 x i32> %splat -} - -define <8 x i32> @vp_splat_v8i32(i32 %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 %val, <8 x i1> %m, i32 %evl) - ret <8 x i32> %splat -} - -define <16 x i32> @vp_splat_v16i32(i32 %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 %val, <16 x i1> %m, i32 %evl) - ret <16 x i32> %splat -} - -define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) { -; ZVFH_RV32-LABEL: vp_splat_v1i64: -; ZVFH_RV32: # %bb.0: -; ZVFH_RV32-NEXT: addi sp, sp, -16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH_RV32-NEXT: sw a0, 8(sp) -; ZVFH_RV32-NEXT: sw a1, 12(sp) -; ZVFH_RV32-NEXT: addi a0, sp, 8 -; ZVFH_RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFH_RV32-NEXT: addi sp, sp, 16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFH_RV32-NEXT: ret -; -; ZVFH_RV64-LABEL: vp_splat_v1i64: -; ZVFH_RV64: # %bb.0: -; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; ZVFH_RV64-NEXT: vmv.v.x v8, a0 -; ZVFH_RV64-NEXT: ret -; -; ZVFBFA_RV32-LABEL: vp_splat_v1i64: -; ZVFBFA_RV32: # %bb.0: -; ZVFBFA_RV32-NEXT: addi sp, sp, -16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFBFA_RV32-NEXT: sw a0, 8(sp) -; ZVFBFA_RV32-NEXT: sw a1, 12(sp) -; ZVFBFA_RV32-NEXT: addi a0, sp, 8 -; ZVFBFA_RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFBFA_RV32-NEXT: addi sp, sp, 16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFBFA_RV32-NEXT: ret -; -; ZVFBFA_RV64-LABEL: vp_splat_v1i64: -; ZVFBFA_RV64: # %bb.0: -; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 -; ZVFBFA_RV64-NEXT: ret - %splat = call <1 x i64> @llvm.experimental.vp.splat.v1i64(i64 %val, <1 x i1> %m, i32 %evl) - ret <1 x i64> %splat -} - -define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) { -; ZVFH_RV32-LABEL: vp_splat_v2i64: -; ZVFH_RV32: # %bb.0: -; ZVFH_RV32-NEXT: addi sp, sp, -16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH_RV32-NEXT: sw a0, 8(sp) -; ZVFH_RV32-NEXT: sw a1, 12(sp) -; ZVFH_RV32-NEXT: addi a0, sp, 8 -; ZVFH_RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFH_RV32-NEXT: addi sp, sp, 16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFH_RV32-NEXT: ret -; -; ZVFH_RV64-LABEL: vp_splat_v2i64: -; ZVFH_RV64: # %bb.0: -; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; ZVFH_RV64-NEXT: vmv.v.x v8, a0 -; ZVFH_RV64-NEXT: ret -; -; ZVFBFA_RV32-LABEL: vp_splat_v2i64: -; ZVFBFA_RV32: # %bb.0: -; ZVFBFA_RV32-NEXT: addi sp, sp, -16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFBFA_RV32-NEXT: sw a0, 8(sp) -; ZVFBFA_RV32-NEXT: sw a1, 12(sp) -; ZVFBFA_RV32-NEXT: addi a0, sp, 8 -; ZVFBFA_RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFBFA_RV32-NEXT: addi sp, sp, 16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFBFA_RV32-NEXT: ret -; -; ZVFBFA_RV64-LABEL: vp_splat_v2i64: -; ZVFBFA_RV64: # %bb.0: -; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 -; ZVFBFA_RV64-NEXT: ret - %splat = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 %val, <2 x i1> %m, i32 %evl) - ret <2 x i64> %splat -} - -define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) { -; ZVFH_RV32-LABEL: vp_splat_v4i64: -; ZVFH_RV32: # %bb.0: -; ZVFH_RV32-NEXT: addi sp, sp, -16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH_RV32-NEXT: sw a0, 8(sp) -; ZVFH_RV32-NEXT: sw a1, 12(sp) -; ZVFH_RV32-NEXT: addi a0, sp, 8 -; ZVFH_RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFH_RV32-NEXT: addi sp, sp, 16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFH_RV32-NEXT: ret -; -; ZVFH_RV64-LABEL: vp_splat_v4i64: -; ZVFH_RV64: # %bb.0: -; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; ZVFH_RV64-NEXT: vmv.v.x v8, a0 -; ZVFH_RV64-NEXT: ret -; -; ZVFBFA_RV32-LABEL: vp_splat_v4i64: -; ZVFBFA_RV32: # %bb.0: -; ZVFBFA_RV32-NEXT: addi sp, sp, -16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFBFA_RV32-NEXT: sw a0, 8(sp) -; ZVFBFA_RV32-NEXT: sw a1, 12(sp) -; ZVFBFA_RV32-NEXT: addi a0, sp, 8 -; ZVFBFA_RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFBFA_RV32-NEXT: addi sp, sp, 16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFBFA_RV32-NEXT: ret -; -; ZVFBFA_RV64-LABEL: vp_splat_v4i64: -; ZVFBFA_RV64: # %bb.0: -; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 -; ZVFBFA_RV64-NEXT: ret - %splat = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 %val, <4 x i1> %m, i32 %evl) - ret <4 x i64> %splat -} - -define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) { -; ZVFH_RV32-LABEL: vp_splat_v8i64: -; ZVFH_RV32: # %bb.0: -; ZVFH_RV32-NEXT: addi sp, sp, -16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH_RV32-NEXT: sw a0, 8(sp) -; ZVFH_RV32-NEXT: sw a1, 12(sp) -; ZVFH_RV32-NEXT: addi a0, sp, 8 -; ZVFH_RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; ZVFH_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFH_RV32-NEXT: addi sp, sp, 16 -; ZVFH_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFH_RV32-NEXT: ret -; -; ZVFH_RV64-LABEL: vp_splat_v8i64: -; ZVFH_RV64: # %bb.0: -; ZVFH_RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; ZVFH_RV64-NEXT: vmv.v.x v8, a0 -; ZVFH_RV64-NEXT: ret -; -; ZVFBFA_RV32-LABEL: vp_splat_v8i64: -; ZVFBFA_RV32: # %bb.0: -; ZVFBFA_RV32-NEXT: addi sp, sp, -16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFBFA_RV32-NEXT: sw a0, 8(sp) -; ZVFBFA_RV32-NEXT: sw a1, 12(sp) -; ZVFBFA_RV32-NEXT: addi a0, sp, 8 -; ZVFBFA_RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; ZVFBFA_RV32-NEXT: vlse64.v v8, (a0), zero -; ZVFBFA_RV32-NEXT: addi sp, sp, 16 -; ZVFBFA_RV32-NEXT: .cfi_def_cfa_offset 0 -; ZVFBFA_RV32-NEXT: ret -; -; ZVFBFA_RV64-LABEL: vp_splat_v8i64: -; ZVFBFA_RV64: # %bb.0: -; ZVFBFA_RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; ZVFBFA_RV64-NEXT: vmv.v.x v8, a0 -; ZVFBFA_RV64-NEXT: ret - %splat = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 %val, <8 x i1> %m, i32 %evl) - ret <8 x i64> %splat -} - -define <1 x bfloat> @vp_splat_v1bf16(bfloat %val, <1 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v1bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v1bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <1 x bfloat> @llvm.experimental.vp.splat.v1bf16(bfloat %val, <1 x i1> %m, i32 %evl) - ret <1 x bfloat> %splat -} - -define <2 x bfloat> @vp_splat_v2bf16(bfloat %val, <2 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v2bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v2bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat %val, <2 x i1> %m, i32 %evl) - ret <2 x bfloat> %splat -} - -define <4 x bfloat> @vp_splat_v4bf16(bfloat %val, <4 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v4bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v4bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat %val, <4 x i1> %m, i32 %evl) - ret <4 x bfloat> %splat -} - -define <8 x bfloat> @vp_splat_v8bf16(bfloat %val, <8 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v8bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v8bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat %val, <8 x i1> %m, i32 %evl) - ret <8 x bfloat> %splat -} - -define <16 x bfloat> @vp_splat_v16bf16(bfloat %val, <16 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v16bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v16bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat %val, <16 x i1> %m, i32 %evl) - ret <16 x bfloat> %splat -} - -define <32 x bfloat> @vp_splat_v32bf16(bfloat %val, <32 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_v32bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: fmv.x.w a1, fa0 -; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv.v.x v8, a1 -; ZVFH-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_v32bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call <32 x bfloat> @llvm.experimental.vp.splat.v32bf16(bfloat %val, <32 x i1> %m, i32 %evl) - ret <32 x bfloat> %splat -} - -define <1 x half> @vp_splat_v1f16(half %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <1 x half> @llvm.experimental.vp.splat.v1f16(half %val, <1 x i1> %m, i32 %evl) - ret <1 x half> %splat -} - -define <2 x half> @vp_splat_v2f16(half %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <2 x half> @llvm.experimental.vp.splat.v2f16(half %val, <2 x i1> %m, i32 %evl) - ret <2 x half> %splat -} - -define <4 x half> @vp_splat_v4f16(half %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <4 x half> @llvm.experimental.vp.splat.v4f16(half %val, <4 x i1> %m, i32 %evl) - ret <4 x half> %splat -} - -define <8 x half> @vp_splat_v8f16(half %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <8 x half> @llvm.experimental.vp.splat.v8f16(half %val, <8 x i1> %m, i32 %evl) - ret <8 x half> %splat -} - -define <16 x half> @vp_splat_v16f16(half %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <16 x half> @llvm.experimental.vp.splat.v16f16(half %val, <16 x i1> %m, i32 %evl) - ret <16 x half> %splat -} - -define <32 x half> @vp_splat_v32f16(half %val, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <32 x half> @llvm.experimental.vp.splat.v32f16(half %val, <32 x i1> %m, i32 %evl) - ret <32 x half> %splat -} - -define <1 x float> @vp_splat_v1f32(float %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <1 x float> @llvm.experimental.vp.splat.v1f32(float %val, <1 x i1> %m, i32 %evl) - ret <1 x float> %splat -} - -define <2 x float> @vp_splat_v2f32(float %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <2 x float> @llvm.experimental.vp.splat.v2f32(float %val, <2 x i1> %m, i32 %evl) - ret <2 x float> %splat -} - -define <4 x float> @vp_splat_v4f32(float %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <4 x float> @llvm.experimental.vp.splat.v4f32(float %val, <4 x i1> %m, i32 %evl) - ret <4 x float> %splat -} - -define <8 x float> @vp_splat_v8f32(float %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <8 x float> @llvm.experimental.vp.splat.v8f32(float %val, <8 x i1> %m, i32 %evl) - ret <8 x float> %splat -} - -define <16 x float> @vp_splat_v16f32(float %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <16 x float> @llvm.experimental.vp.splat.v16f32(float %val, <16 x i1> %m, i32 %evl) - ret <16 x float> %splat -} - -define <1 x double> @vp_splat_v1f64(double %val, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <1 x double> @llvm.experimental.vp.splat.v1f64(double %val, <1 x i1> %m, i32 %evl) - ret <1 x double> %splat -} - -define <2 x double> @vp_splat_v2f64(double %val, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <2 x double> @llvm.experimental.vp.splat.v2f64(double %val, <2 x i1> %m, i32 %evl) - ret <2 x double> %splat -} - -define <4 x double> @vp_splat_v4f64(double %val, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <4 x double> @llvm.experimental.vp.splat.v4f64(double %val, <4 x i1> %m, i32 %evl) - ret <4 x double> %splat -} - -define <8 x double> @vp_splat_v8f64(double %val, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call <8 x double> @llvm.experimental.vp.splat.v8f64(double %val, <8 x i1> %m, i32 %evl) - ret <8 x double> %splat -} - -define <16 x i31> @vp_splat_v16i31(i31 %val, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v16i31: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <16 x i31> @llvm.experimental.vp.splat.v16i31(i31 %val, <16 x i1> %m, i32 %evl) - ret <16 x i31> %splat -} - -define <15 x i32> @vp_splat_v15i32(i32 %val, <15 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v15i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <15 x i32> @llvm.experimental.vp.splat.v15i32(i32 %val, <15 x i1> %m, i32 %evl) - ret <15 x i32> %splat -} - -; Split case. -define <32 x i32> @vp_splat_v32i32(i32 %val, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_v32i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call <32 x i32> @llvm.experimental.vp.splat.v32i32(i32 %val, <32 x i1> %m, i32 %evl) - ret <32 x i32> %splat -} diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 519312766feeb..24e859d82a249 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5816,135 +5816,6 @@ for.cond.cleanup: ; preds = %vector.body ret void } -define void @sink_vp_splat(ptr nocapture %out, ptr nocapture %in) { -; CHECK-LABEL: sink_vp_splat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: li a4, 3 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: .LBB129_1: # %vector.body -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB129_2 Depth 2 -; CHECK-NEXT: vsetvli a6, a3, e32, m4, ta, ma -; CHECK-NEXT: slli a7, a2, 2 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: add t0, a1, a7 -; CHECK-NEXT: li t1, 1024 -; CHECK-NEXT: .LBB129_2: # %for.body424 -; CHECK-NEXT: # Parent Loop BB129_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vle32.v v12, (t0) -; CHECK-NEXT: addi t1, t1, -1 -; CHECK-NEXT: vmacc.vx v8, a4, v12 -; CHECK-NEXT: add t0, t0, a5 -; CHECK-NEXT: bnez t1, .LBB129_2 -; CHECK-NEXT: # %bb.3: # %vector.latch -; CHECK-NEXT: # in Loop: Header=BB129_1 Depth=1 -; CHECK-NEXT: add a7, a0, a7 -; CHECK-NEXT: sub a3, a3, a6 -; CHECK-NEXT: vse32.v v8, (a7) -; CHECK-NEXT: add a2, a2, a6 -; CHECK-NEXT: bnez a3, .LBB129_1 -; CHECK-NEXT: # %bb.4: # %for.cond.cleanup -; CHECK-NEXT: ret -entry: - br label %vector.body - -vector.body: ; preds = %vector.latch, %entry - %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.latch ] - %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.latch ] - %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 8, i1 true) - %vp.splat1 = tail call @llvm.experimental.vp.splat.nxv8i32(i32 0, splat(i1 true), i32 %evl) - %vp.splat2 = tail call @llvm.experimental.vp.splat.nxv8i32(i32 3, splat(i1 true), i32 %evl) - %evl.cast = zext i32 %evl to i64 - br label %for.body424 - -for.body424: ; preds = %for.body424, %vector.body - %scalar.phi = phi i64 [ 0, %vector.body ], [ %indvars.iv.next27, %for.body424 ] - %vector.phi = phi [ %vp.splat1, %vector.body ], [ %vp.binary26, %for.body424 ] - %arrayidx625 = getelementptr inbounds [1024 x i32], ptr %in, i64 %scalar.phi, i64 %scalar.ind - %widen.load = tail call @llvm.vp.load.nxv8i32.p0(ptr %arrayidx625, splat (i1 true), i32 %evl) - %vp.binary = tail call @llvm.vp.mul.nxv8i32( %widen.load, %vp.splat2, splat (i1 true), i32 %evl) - %vp.binary26 = tail call @llvm.vp.add.nxv8i32( %vector.phi, %vp.binary, splat (i1 true), i32 %evl) - %indvars.iv.next27 = add nuw nsw i64 %scalar.phi, 1 - %exitcond.not28 = icmp eq i64 %indvars.iv.next27, 1024 - br i1 %exitcond.not28, label %vector.latch, label %for.body424 - -vector.latch: ; preds = %for.body424 - %arrayidx830 = getelementptr inbounds i32, ptr %out, i64 %scalar.ind - tail call void @llvm.vp.store.nxv8i32.p0( %vp.binary26, ptr %arrayidx830, splat (i1 true), i32 %evl) - %remaining.trip.count = sub nuw i64 %trip.count, %evl.cast - %next.ind = add i64 %scalar.ind, %evl.cast - %6 = icmp eq i64 %remaining.trip.count, 0 - br i1 %6, label %for.cond.cleanup, label %vector.body - -for.cond.cleanup: ; preds = %vector.latch - ret void -} - -define void @sink_vp_splat_vfwadd_wf(ptr nocapture %in, float %f) { -; CHECK-LABEL: sink_vp_splat_vfwadd_wf: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: lui a3, 2 -; CHECK-NEXT: .LBB130_1: # %vector.body -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB130_2 Depth 2 -; CHECK-NEXT: vsetvli a4, a2, e8, m1, ta, ma -; CHECK-NEXT: slli a5, a1, 3 -; CHECK-NEXT: add a5, a0, a5 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: .LBB130_2: # %for.body419 -; CHECK-NEXT: # Parent Loop BB130_1 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: vse64.v v8, (a5) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: bnez a6, .LBB130_2 -; CHECK-NEXT: # %bb.3: # %vector.latch -; CHECK-NEXT: # in Loop: Header=BB130_1 Depth=1 -; CHECK-NEXT: sub a2, a2, a4 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: bnez a2, .LBB130_1 -; CHECK-NEXT: # %bb.4: # %for.cond.cleanup -; CHECK-NEXT: ret -entry: - %conv = fpext float %f to double - br label %vector.body - -vector.body: ; preds = %vector.latch, %entry - %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.latch ] - %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.latch ] - %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 8, i1 true) - %vp.splat = call @llvm.experimental.vp.splat.nxv8f64(double %conv, splat (i1 true), i32 %evl) - %evl.cast = zext i32 %evl to i64 - br label %for.body419 - -for.body419: ; preds = %for.body419, %vector.body - %scalar.phi = phi i64 [ 0, %vector.body ], [ %indvars.iv.next21, %for.body419 ] - %arrayidx620 = getelementptr inbounds [1024 x double], ptr %in, i64 %scalar.phi, i64 %scalar.ind - %widen.load = call @llvm.vp.load.nxv8f64.p0(ptr %arrayidx620, splat (i1 true), i32 %evl) - %vp.binary = call @llvm.vp.fadd.nxv8f64( %widen.load, %vp.splat, splat (i1 true), i32 %evl) - call void @llvm.vp.store.nxv8f64.p0( %vp.binary, ptr %arrayidx620, splat (i1 true), i32 %evl) - %indvars.iv.next21 = add nuw nsw i64 %scalar.phi, 1 - %exitcond.not22 = icmp eq i64 %indvars.iv.next21, 1024 - br i1 %exitcond.not22, label %vector.latch, label %for.body419 - -vector.latch: ; preds = %for.body419 - %remaining.trip.count = sub nuw i64 %trip.count, %evl.cast - %next.ind = add i64 %scalar.ind, %evl.cast - %cond = icmp eq i64 %remaining.trip.count, 0 - br i1 %cond, label %for.cond.cleanup, label %vector.body - -for.cond.cleanup: ; preds = %vector.latch - ret void -} - ;; This is exactly like sink_add_splat except that the splat has operands ;; which haven't been converted to undef. define void @sink_non_canonical_splat(ptr nocapture %a, i32 signext %x) { @@ -5953,13 +5824,13 @@ define void @sink_non_canonical_splat(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: .LBB131_1: # %vector.body +; CHECK-NEXT: .LBB129_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bne a0, a2, .LBB131_1 +; CHECK-NEXT: bne a0, a2, .LBB129_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll deleted file mode 100644 index e970758a2610b..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll +++ /dev/null @@ -1,107 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 - -define @vp_splat_nxv1i1_true_unmasked(i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i1_true_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i1(i1 true, splat (i1 true), i32 %evl) - ret %splat -} - -define @vp_splat_nxv1i1_false_unmasked(i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i1_false_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i1(i1 false, splat (i1 true), i32 %evl) - ret %splat -} - -define @vp_splat_nxv1i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv32i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32i1(i1 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv64i1(i1 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv64i1(i1 %val, %m, i32 %evl) - ret %splat -} - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32: {{.*}} -; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll deleted file mode 100644 index b8b2ba7c5e5d3..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll +++ /dev/null @@ -1,744 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFBFA -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zvfhmin,+experimental-zvfbfa -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFBFA - -define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv32i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv64i8(i8 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv64i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv64i8(i8 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv32i16(i16 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32i16(i16 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i32(i32 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2i32(i32 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4i32(i32 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8i32(i32 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16i32(i32 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1i64(i64 %val, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_splat_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1i64(i64 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2i64(i64 %val, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_splat_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2i64(i64 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4i64(i64 %val, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_splat_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4i64(i64 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8i64(i64 %val, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_splat_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_splat_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8i64(i64 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv1bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv1bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv1bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv2bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv2bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv2bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv4bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv4bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv4bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv8bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv8bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv8bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv16bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv16bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv16bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv32bf16(bfloat %val, %m, i32 zeroext %evl) { -; NOZFMIN-LABEL: vp_splat_nxv32bf16: -; NOZFMIN: # %bb.0: -; NOZFMIN-NEXT: fmv.x.w a1, fa0 -; NOZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; NOZFMIN-NEXT: vmv.v.x v8, a1 -; NOZFMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv32bf16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv32bf16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma -; ZVFBFA-NEXT: vfmv.v.f v8, fa0 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv1f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv1f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv1f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv1f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv2f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv2f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv2f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv2f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv4f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv4f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv4f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv4f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv8f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv8f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv8f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv8f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv16f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv16f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv16f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv16f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vp_splat_nxv32f16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vfmv.v.f v8, fa0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vp_splat_nxv32f16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-NEXT: ret -; -; ZFMIN-LABEL: vp_splat_nxv32f16: -; ZFMIN: # %bb.0: -; ZFMIN-NEXT: fmv.x.h a1, fa0 -; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZFMIN-NEXT: vmv.v.x v8, a1 -; ZFMIN-NEXT: ret -; -; ZVFBFA-LABEL: vp_splat_nxv32f16: -; ZVFBFA: # %bb.0: -; ZVFBFA-NEXT: fmv.x.h a1, fa0 -; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFBFA-NEXT: vmv.v.x v8, a1 -; ZVFBFA-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1f32(float %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1f32(float %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2f32(float %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2f32(float %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4f32(float %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4f32(float %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8f32(float %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8f32(float %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16f32(float %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16f32(float %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv1f64(double %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv1f64(double %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv2f64(double %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv2f64(double %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv4f64(double %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv4f64(double %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv8f64(double %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv8f64(double %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv16i31(i31 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16i31: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv16i31(i31 %val, %m, i32 %evl) - ret %splat -} - -define @vp_splat_nxv15i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv15i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv15i32(i32 %val, %m, i32 %evl) - ret %splat -} - -; Split case. -define @vp_splat_nxv32i32(i32 %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: ret - %splat = call @llvm.experimental.vp.splat.nxv32i32(i32 %val, %m, i32 %evl) - ret %splat -} diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index 0dd352a94f1c7..5dd979133da66 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -111,8 +111,6 @@ class VPIntrinsicTest : public testing::Test { "addrspace(1)*, i32, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " "i1>, i32) "; - Str << " declare <8 x i32> @llvm.experimental.vp.splat.v8i32(i32, <8 x " - "i1>, i32) "; for (const char *ReductionOpcode : ReductionIntOpcodes) Str << " declare i32 @llvm.vp.reduce." << ReductionOpcode