diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index adb6c941e852a..daac7776168f7 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -96,6 +96,10 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a") // Prefetch BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") +// Range Prefetch +TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n", ARMACLE_H, ALL_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiULLi", "n", ARMACLE_H, ALL_LANGUAGES, "") + // System Registers BUILTIN(__builtin_arm_rsr, "UicC*", "nc") BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc") diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d7f36c0f9b79a..8cb8f4debc2e9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -474,6 +474,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); + // Clang supports range prefetch intrinsics + Builder.defineMacro("__ARM_PREFETCH_RANGE", "1"); + if (FPU & NeonMode) { Builder.defineMacro("__ARM_NEON", "1"); // 64-bit NEON supports half, single and double precision operations. diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index d4b0b81d3d87f..15610fa055cd2 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -2660,6 +2660,56 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, return Builder.CreateCall(F, { Metadata, ArgValue }); } +static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, + const CallExpr *E) { + CodeGen::CGBuilderTy &Builder = CGF.Builder; + CodeGen::CodeGenModule &CGM = CGF.CGM; + SmallVector Ops; + + auto getIntArg = [&](unsigned ArgNo) { + Expr::EvalResult Result; + if (!E->getArg(ArgNo)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Expected constant argument to range prefetch."); + return Result.Val.getInt().getExtValue(); + }; + + Ops.push_back(CGF.EmitScalarExpr(E->getArg(0))); /*Addr*/ + Ops.push_back(CGF.EmitScalarExpr(E->getArg(1))); /*Access Kind*/ + Ops.push_back(CGF.EmitScalarExpr(E->getArg(2))); /*Policy*/ + + if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) { + auto Length = getIntArg(3); + auto Count = getIntArg(4) - 1; + auto Stride = getIntArg(5); + auto Distance = getIntArg(6); + + // Map ReuseDistance given in bytes to four bits representing decreasing + // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values + // are rounded up to the nearest power of 2, starting at 32KiB. Any value + // over the maximum is represented by 0 (distance not known). + if (Distance > 0) { + Distance = llvm::Log2_32_Ceil(Distance); + if (Distance < 15) + Distance = 15; + else if (Distance > 29) + Distance = 0; + else + Distance = 30 - Distance; + } + + uint64_t Mask22 = (1ULL << 22) - 1; + uint64_t Mask16 = (1ULL << 16) - 1; + uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) | + ((Count & Mask16) << 22) | (Length & Mask22); + + Ops.push_back(llvm::ConstantInt::get(Builder.getInt64Ty(), Metadata)); + } else + Ops.push_back(CGF.EmitScalarExpr(E->getArg(3))); + + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_range_prefetch), + Ops); +} + /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra /// argument that specifies the vector type. static bool HasExtraNeonArgument(unsigned BuiltinID) { @@ -5415,6 +5465,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); } + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch || + BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) + return EmitRangePrefetchBuiltin(*this, BuiltinID, E); + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 97f63e8ecf71f..622e8f3d6aa7b 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -98,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) { #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) +#define __pldx_range(access_kind, retention_policy, length, count, stride, \ + reuse_distance, addr) \ + __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length, \ + count, stride, reuse_distance) +#define __pld_range(access_kind, retention_policy, metadata, addr) \ + __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata) #endif /* 7.6.2 Instruction prefetch */ diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index a5164a94b57fa..c6e12cf4ae770 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1122,6 +1122,19 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1); } + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) { + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 3, -2097152, 2097151) || + SemaRef.BuiltinConstantArgRange(TheCall, 4, 1, 65536) || + SemaRef.BuiltinConstantArgRange(TheCall, 5, -2097152, 2097151); + } + + if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) { + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || + SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1); + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_wsr64 || BuiltinID == AArch64::BI__builtin_arm_rsr128 || diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 0f539cba5c758..607772d641a0b 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -164,6 +164,28 @@ void test_pld() { __pld(0); } +#if defined(__ARM_64BIT_STATE) && defined(__ARM_PREFETCH_RANGE) + +// AArch64-LABEL: @test_pld_range( +// AArch64-NEXT: entry: +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 [[MD:%.*]]) +// AArch64-NEXT: ret void +// +void test_pld_range(unsigned long md) { + __pld_range(0, 1, md, 0); +} + +// AArch64-LABEL: @test_pldx_range( +// AArch64-NEXT: entry: +// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 -576460477427613697) +// AArch64-NEXT: ret void +// +void test_pldx_range() { + __pldx_range(0, 1, 2097151, 65536, -2097152, 15, 0); +} + +#endif + // AArch32-LABEL: @test_pldx( // AArch32-NEXT: entry: // AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1) diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 86c2812434643..c1fd348371f38 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -62,6 +62,55 @@ void prefetch(void) { // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1) } +void range_prefetch(void) { + __builtin_arm_range_prefetch(0, 0, 0, 0); // pldkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0) + + __builtin_arm_range_prefetch(0, 0, 1, 0); // pldstrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0) + + __builtin_arm_range_prefetch(0, 1, 0, 0); // pstkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0) + + __builtin_arm_range_prefetch(0, 1, 1, 0); // pststrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0) +} + +void range_prefetch_x(void) { + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 0); // pldkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0) + __builtin_arm_range_prefetch_x(0, 0, 1, 0, 1, 0, 0); // pldstrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i64 0) + __builtin_arm_range_prefetch_x(0, 1, 0, 0, 1, 0, 0); // pstkeep + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i64 0) + __builtin_arm_range_prefetch_x(0, 1, 1, 0, 1, 0, 0); // pststrm + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i64 0) + + // Lower limits (length, count & stride) + __builtin_arm_range_prefetch_x(0, 0, 0, -2097152, 1, -2097152, 0); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752305520640) + + // Upper limits (length, count & stride) + __builtin_arm_range_prefetch_x(0, 0, 0, 2097151, 65536, 2097151, 0); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 576460752301326335) + + // Distance less than minumum, round up to first power of two (1111) + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -1152921504606846976) + + // Distance 1 over minimum, round up to next power of 2 (1110) + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 32769); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -2305843009213693952) + + // Distance is a power of two in range (1010) + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 1048576); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 -6917529027641081856) + + // Distance is out of range, set to 0 (0000) + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 1, 0, 536870913); + // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i64 0) +} + __attribute__((target("v8.5a"))) int32_t jcvt(double v) { //CHECK-LABEL: @jcvt( diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 4dd243e57a63e..137840f6d2864 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -41,6 +41,7 @@ // CHECK: __ARM_NEON_FP 0xE // CHECK: __ARM_NEON_SVE_BRIDGE 1 // CHECK: __ARM_PCS_AAPCS64 1 +// CHECK: __ARM_PREFETCH_RANGE 1 // CHECK-NOT: __ARM_PCS 1 // CHECK-NOT: __ARM_PCS_VFP 1 // CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1 diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index 460778f39d003..09e3fc926a309 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -32,6 +32,7 @@ // AARCH64-NEXT: #define __ARM_FP16_FORMAT_IEEE 1 // AARCH64-NEXT: #define __ARM_NEON_SVE_BRIDGE 1 // AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1 +// AARCH64-NEXT: #define __ARM_PREFETCH_RANGE 1 // AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4 // AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4 // AARCH64-NEXT: #define __ARM_STATE_ZA 1 diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c index f094162b3aadc..41cffd7ebb1a0 100644 --- a/clang/test/Sema/builtins-arm64.c +++ b/clang/test/Sema/builtins-arm64.c @@ -30,6 +30,19 @@ void test_prefetch(void) { __builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}} } +void test_range_prefetch(void) { + __builtin_arm_range_prefetch(0, 2, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch(0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + + __builtin_arm_range_prefetch_x(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, -2097153, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 2097152, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 65537, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, -2097153, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __builtin_arm_range_prefetch_x(0, 0, 0, 0, 0, 2097152, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}} +} + void test_trap(short s, unsigned short us) { __builtin_arm_trap(42); __builtin_arm_trap(65535); @@ -37,4 +50,4 @@ void test_trap(short s, unsigned short us) { __builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}} __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} -} \ No newline at end of file +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 1c86c6815f049..8a0459d1fa0a2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -76,6 +76,12 @@ def int_aarch64_prefetch : Intrinsic<[], ]>, ClangBuiltin<"__builtin_arm_prefetch">; +def int_aarch64_range_prefetch : Intrinsic<[], + [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly>, + ImmArg>, ImmArg>]>, + ClangBuiltin<"__builtin_arm_range_prefetch">; + //===----------------------------------------------------------------------===// // Data Barrier Instructions diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a1e14d8f25bf7..8bbc3aec486c2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6701,6 +6701,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call); break; } + case Intrinsic::aarch64_range_prefetch: { + Check(cast(Call.getArgOperand(1))->getZExtValue() < 2, + "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call); + Check(cast(Call.getArgOperand(2))->getZExtValue() < 2, + "stream argument to llvm.aarch64.range.prefetch must be 0 or 1", + Call); + break; + } case Intrinsic::callbr_landingpad: { const auto *CBR = dyn_cast(Call.getOperand(0)); Check(CBR, "intrinstic requires callbr operand", &Call); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2ce8f6d924a78..c294ea75b05c0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6161,6 +6161,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr); } + case Intrinsic::aarch64_range_prefetch: { + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(2); + + unsigned IsWrite = Op.getConstantOperandVal(3); + unsigned IsStream = Op.getConstantOperandVal(4); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + SDValue Metadata = Op.getOperand(5); + return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain, + DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr, + Metadata); + } case Intrinsic::aarch64_sme_str: case Intrinsic::aarch64_sme_ldr: { return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr); diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7d99786830e3d..c40a9e34b37a2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction { let hasSideEffects = 1; } +def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2); + let hasSideEffects = 1; +} + def G_UMULL : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); @@ -303,6 +309,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index da93a2b13fc11..710beb11f4dcc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -536,6 +536,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,3>]>; def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisPtrTy<2>]>; def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; @@ -1038,6 +1039,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, [SDNPHasChain, SDNPSideEffect]>; +def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH", + SDT_AArch64RANGE_PREFETCH, + [SDNPHasChain, SDNPSideEffect]>; + // {s|u}int to FP within a FP register. def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; @@ -10980,6 +10985,9 @@ def RPRFM: let DecoderNamespace = "Fallback"; } +def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm), + (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>; + //===----------------------------------------------------------------------===// // 128-bit Atomics (FEAT_LSE128) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 1025b2502211a..902378cc6f46d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1749,6 +1749,20 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_range_prefetch: { + auto &AddrVal = MI.getOperand(1); + + int64_t IsWrite = MI.getOperand(2).getImm(); + int64_t IsStream = MI.getOperand(3).getImm(); + unsigned PrfOp = (IsStream << 2) | IsWrite; + + MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH) + .addImm(PrfOp) + .add(AddrVal) + .addUse(MI.getOperand(4).getReg()); // Metadata + MI.eraseFromParent(); + return true; + } case Intrinsic::aarch64_neon_uaddv: case Intrinsic::aarch64_neon_saddv: case Intrinsic::aarch64_neon_umaxv: @@ -2506,4 +2520,4 @@ bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI, MRI.replaceRegWith(Dst, Fin); MI.eraseFromParent(); return true; -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/AArch64/range-prefetch.ll b/llvm/test/CodeGen/AArch64/range-prefetch.ll new file mode 100644 index 0000000000000..b0242c7b7e20a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/range-prefetch.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s + +define void @range_prefetch_metadata_accesses(ptr %a, i64 %metadata) { +; CHECK-LABEL: range_prefetch_metadata_accesses: +; CHECK: // %bb.0: +; CHECK-NEXT: rprfm pldkeep, x1, [x0] +; CHECK-NEXT: rprfm pstkeep, x1, [x0] +; CHECK-NEXT: rprfm pldstrm, x1, [x0] +; CHECK-NEXT: rprfm pststrm, x1, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 1, i64 %metadata) + call void @llvm.aarch64.range.prefetch(ptr %a, i32 1, i32 1, i64 %metadata) + ret void +} + +define void @range_prefetch_metadata_const(ptr %a) { +; CHECK-LABEL: range_prefetch_metadata_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #2097152 // =0x200000 +; CHECK-NEXT: movk x8, #2048, lsl #48 +; CHECK-NEXT: rprfm pldkeep, x8, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i64 576460752305520640) + ret void +} diff --git a/llvm/test/Verifier/AArch64/intrinsic-immarg.ll b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll new file mode 100644 index 0000000000000..e17c11d66dac4 --- /dev/null +++ b/llvm/test/Verifier/AArch64/intrinsic-immarg.ll @@ -0,0 +1,13 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +define void @range_prefetch(ptr %src, i64 %metadata) { + ; CHECK: write argument to llvm.aarch64.range.prefetch must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i64 %metadata) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 2, i32 0, i64 %metadata) + + ; CHECK-NEXT: stream argument to llvm.aarch64.range.prefetch must be 0 or 1 + ; CHECK-NEXT: call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i64 %metadata) + call void @llvm.aarch64.range.prefetch(ptr %src, i32 0, i32 2, i64 %metadata) + + ret void +}