Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone,
def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
}

let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in {
def SVAESD_X2 : SInst<"svaesd_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESDIMC_X2 : SInst<"svaesdimc_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESE_X2 : SInst<"svaese_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESEMC_X2 : SInst<"svaesemc_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;

def SVAESD_X4 : SInst<"svaesd_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESDIMC_X4 : SInst<"svaesdimc_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESE_X4 : SInst<"svaese_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
def SVAESEMC_X4 : SInst<"svaesemc_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;

def SVPMULL_PAIR_U64 : SInst<"svpmull_pair[_{d}_x2]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMULL_PAIR_N_U64 : SInst<"svpmull_pair[_n_{d}_x2]", "2da", "Ul", MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMLAL_PAIR_U64 : SInst<"svpmlal_pair[_{d}_x2]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMLAL_PAIR_N_U64 : SInst<"svpmlal_pair[_n_{d}_x2]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ACLE change describes variants of pmull & pmlal for _s64x2 and _f64x2, should these be included here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. That's actually an ACLE issue. Since this instruction is kind of a mix of pmullb and pmullt, the intrinsics should be the same as for those (so only u64 variant should exist).

}

let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in {
def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
}
Expand Down
217 changes: 217 additions & 0 deletions clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,4 +254,30 @@ void test_svdup_laneq(){
svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
}

__attribute__((target("+sve-aes2")))
void test_aes_x2_imm_0_3(svuint8x2_t op1, svuint8_t op2) {
svaesd_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaese_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaesemc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svaesd_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}

__attribute__((target("+sve-aes2")))
void test_aes_x4_imm_0_3(svuint8x4_t op1, svuint8_t op2) {
svaesd_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaese_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svaesemc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svaesd_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
9 changes: 8 additions & 1 deletion clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,21 @@ class Intrinsic {
unsigned getSplatIdx() const {
unsigned I = 1, Param = 0;
for (; I < Proto.size(); ++I, ++Param) {
assert(Proto[I] != '4' &&
"Handling for '4' prototype modifier not implemented");
if (Proto[I] == 'a' || Proto[I] == 'j' || Proto[I] == 'f' ||
Proto[I] == 'r' || Proto[I] == 'K' || Proto[I] == 'L' ||
Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!')
break;

if (Proto[I] == '2')
Param += 1;

// Multivector modifier can be skipped
if (Proto[I] == '.')
if (Proto[I] == '.') {
Param -= 1; // Adjust for the increment at the top of the loop
I += 2;
}
}
assert(I != Proto.size() && "Prototype has no splat operand");
return Param;
Expand Down
27 changes: 27 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;

// AES2
class SVE2_Crypto_LANE_X2_Intrinsic
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These intrinsic looks like they need to have this in front of them : GCCBuiltin<"__builtin_sve_svaesd_u8">,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot find reference to GCCBuiltin anywhere in the code. What is it supposed do ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I did not see they've changed the name. Now it is : ClangBuiltin

def int_aarch64_sve_aese : ClangBuiltin<"__builtin_sve_svaese_u8">,
DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],

Can you add this also to the new ones

Copy link
Contributor Author

@Lukacma Lukacma Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is any purpose to it though. This mapping of builtin to intrinsics is already generated by SVEEmitter so there is no point in doing it manually here. Unless I am missing smth of course ?

[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<3>>, IntrNoMem]>;
class SVE2_Crypto_LANE_X4_Intrinsic
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<5>>, IntrNoMem]>;

def int_aarch64_sve_aesd_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aesdimc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aese_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aesemc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;

def int_aarch64_sve_aesd_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aesdimc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aese_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aesemc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;

def int_aarch64_sve_pmull_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
}

65 changes: 53 additions & 12 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1973,27 +1973,28 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,

SDLoc DL(N);
EVT VT = N->getValueType(0);
unsigned FirstVecIdx = HasPred ? 2 : 1;
SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
SmallVector<SDValue, 4> Ops;

auto GetMultiVecOperand = [=](unsigned StartIdx) {
SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
auto GetMultiVecOperand = [&]() {
SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
OpsIter += NumVecs;
return createZMulTuple(Regs);
};

SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
if (HasPred)
Ops.push_back(*OpsIter++);

SDValue Zm;
Ops.push_back(GetMultiVecOperand());
if (IsZmMulti)
Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
Ops.push_back(GetMultiVecOperand());
else
Zm = N->getOperand(NumVecs + FirstVecIdx);
Ops.push_back(*OpsIter++);

// Append any remaining operands.
Ops.append(OpsIter, N->op_end());
SDNode *Intrinsic;
if (HasPred)
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
N->getOperand(1), Zdn, Zm);
else
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
SDValue SuperReg = SDValue(Intrinsic, 0);
for (unsigned i = 0; i < NumVecs; ++i)
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
Expand Down Expand Up @@ -6248,6 +6249,46 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
return;
case Intrinsic::aarch64_sve_aese_lane_x2:
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
return;
case Intrinsic::aarch64_sve_aesd_lane_x2:
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
return;
case Intrinsic::aarch64_sve_aesemc_lane_x2:
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
return;
case Intrinsic::aarch64_sve_aesdimc_lane_x2:
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
return;
case Intrinsic::aarch64_sve_aese_lane_x4:
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
return;
case Intrinsic::aarch64_sve_aesd_lane_x4:
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
return;
case Intrinsic::aarch64_sve_aesemc_lane_x4:
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
return;
case Intrinsic::aarch64_sve_aesdimc_lane_x4:
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
return;
case Intrinsic::aarch64_sve_pmlal_pair_x2:
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
return;
case Intrinsic::aarch64_sve_pmull_pair_x2: {
SDLoc DL(Node);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use some of the existing function to do this?
I did not look too much into it, but the SelectCVTIntrinsic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't find a function which would do what I need here. SelectCVTIntrinsics wouldn't work as it create Ztuple, which is not correct here

SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
SDNode *Res =
CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
SDValue SuperReg = SDValue(Res, 0);
for (unsigned I = 0; I < 2; I++)
ReplaceUses(SDValue(Node, I),
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
SuperReg));
CurDAG->RemoveDeadNode(Node);
return;
}
case Intrinsic::aarch64_sve_fcvtzs_x2:
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
return;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4219,12 +4219,12 @@ let Predicates = [HasSVEAES2, HasNonStreamingSVE_or_SSVE_AES] in {
def AESE_2ZZI_B : sve_crypto_binary_multi2<0b000, "aese">;
def AESD_2ZZI_B : sve_crypto_binary_multi2<0b010, "aesd">;
def AESEMC_2ZZI_B : sve_crypto_binary_multi2<0b100, "aesemc">;
def AESDMIC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
def AESDIMC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
// SVE_AES2 multi-vector instructions (x4)
def AESE_4ZZI_B : sve_crypto_binary_multi4<0b0000, "aese">;
def AESD_4ZZI_B : sve_crypto_binary_multi4<0b0100, "aesd">;
def AESEMC_4ZZI_B : sve_crypto_binary_multi4<0b1000, "aesemc">;
def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
def AESDIMC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;

// SVE_AES2 multi-vector polynomial multiply
def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">;
Expand Down
Loading