-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[AArch64] Add intrinsics for 9.6 crypto instructions #165545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
44338e5
d86b9eb
6bb197d
28de915
c38d53d
743c28f
d59eaa3
7ac37fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in { | |
| def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
| def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
| def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
|
|
||
| // AES2 | ||
| class SVE2_Crypto_LANE_X2_Intrinsic | ||
| : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These intrinsic looks like they need to have this in front of them : GCCBuiltin<"__builtin_sve_svaesd_u8">,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I cannot find reference to GCCBuiltin anywhere in the code. What is it supposed do ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I did not see they've changed the name. Now it is : ClangBuiltin def int_aarch64_sve_aese : ClangBuiltin<"__builtin_sve_svaese_u8">, Can you add this also to the new ones
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think there is any purpose to it though. This mapping of builtin to intrinsics is already generated by SVEEmitter so there is no point in doing it manually here. Unless I am missing smth of course ? |
||
| [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], | ||
| [ImmArg<ArgIndex<3>>, IntrNoMem]>; | ||
| class SVE2_Crypto_LANE_X4_Intrinsic | ||
| : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], | ||
| [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, | ||
| llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], | ||
| [ImmArg<ArgIndex<5>>, IntrNoMem]>; | ||
|
|
||
| def int_aarch64_sve_aesd_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aesdimc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aese_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aesemc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
|
|
||
| def int_aarch64_sve_aesd_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aesdimc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aese_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aesemc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
|
|
||
| def int_aarch64_sve_pmull_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], | ||
| [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; | ||
| def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], | ||
| [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1973,27 +1973,28 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, | |
|
|
||
| SDLoc DL(N); | ||
| EVT VT = N->getValueType(0); | ||
| unsigned FirstVecIdx = HasPred ? 2 : 1; | ||
| SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID | ||
| SmallVector<SDValue, 4> Ops; | ||
|
|
||
| auto GetMultiVecOperand = [=](unsigned StartIdx) { | ||
| SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs)); | ||
| auto GetMultiVecOperand = [&]() { | ||
| SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs); | ||
| OpsIter += NumVecs; | ||
| return createZMulTuple(Regs); | ||
| }; | ||
|
|
||
| SDValue Zdn = GetMultiVecOperand(FirstVecIdx); | ||
| if (HasPred) | ||
| Ops.push_back(*OpsIter++); | ||
|
|
||
| SDValue Zm; | ||
| Ops.push_back(GetMultiVecOperand()); | ||
| if (IsZmMulti) | ||
| Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); | ||
| Ops.push_back(GetMultiVecOperand()); | ||
| else | ||
| Zm = N->getOperand(NumVecs + FirstVecIdx); | ||
| Ops.push_back(*OpsIter++); | ||
|
|
||
| // Append any remaining operands. | ||
| Ops.append(OpsIter, N->op_end()); | ||
| SDNode *Intrinsic; | ||
| if (HasPred) | ||
| Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, | ||
| N->getOperand(1), Zdn, Zm); | ||
| else | ||
| Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); | ||
| Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); | ||
| SDValue SuperReg = SDValue(Intrinsic, 0); | ||
| for (unsigned i = 0; i < NumVecs; ++i) | ||
| ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( | ||
|
|
@@ -6248,6 +6249,46 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { | |
| AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D})) | ||
| SelectDestructiveMultiIntrinsic(Node, 4, true, Op); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aese_lane_x2: | ||
| SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesd_lane_x2: | ||
| SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesemc_lane_x2: | ||
| SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesdimc_lane_x2: | ||
| SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aese_lane_x4: | ||
| SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesd_lane_x4: | ||
| SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesemc_lane_x4: | ||
| SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_aesdimc_lane_x4: | ||
| SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B); | ||
| return; | ||
| case Intrinsic::aarch64_sve_pmlal_pair_x2: | ||
| SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q); | ||
| return; | ||
| case Intrinsic::aarch64_sve_pmull_pair_x2: { | ||
| SDLoc DL(Node); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use some of the existing function to do this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't find a function which would do what I need here. SelectCVTIntrinsics wouldn't work as it create Ztuple, which is not correct here |
||
| SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2)); | ||
| SDNode *Res = | ||
| CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs); | ||
| SDValue SuperReg = SDValue(Res, 0); | ||
| for (unsigned I = 0; I < 2; I++) | ||
| ReplaceUses(SDValue(Node, I), | ||
| CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, | ||
| SuperReg)); | ||
| CurDAG->RemoveDeadNode(Node); | ||
| return; | ||
| } | ||
| case Intrinsic::aarch64_sve_fcvtzs_x2: | ||
| SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); | ||
| return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The ACLE change describes variants of
pmull&pmlalfor_s64x2and_f64x2, should these be included here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. That's actually an ACLE issue. Since this instruction is kind of a mix of pmullb and pmullt, the intrinsics should be the same as for those (so only u64 variant should exist).