static void falsecndselect1(Vector<int> op1, Vector<int> op2) {
var result1 = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), op1);
Consume(result1);
}
G_M42078_IG01: ;; offset=0x0000
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M42078_IG02: ;; offset=0x0008
pfalse p0.b
add z0.s, p0/m, z0.s, z1.s
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
;; size=28 bbWeight=1 PerfScore 8.50
G_M42078_IG03: ;; offset=0x0024
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
static void falsecndselect2(Vector<int> op1, Vector<int> op2) {
var result2 = Sve.ConditionalSelect(Vector<int>.Zero, Sve.Add(op1, op2), op1);
Consume(result2);
}
G_M39997_IG01: ;; offset=0x0000
stp fp, lr, [sp, #-0x10]!
mov fp, sp
;; size=8 bbWeight=1 PerfScore 1.50
G_M39997_IG02: ;; offset=0x0008
ptrue p0.s
movi v16.4s, #0
cmpne p0.s, p0/z, z16.s, #0
add z0.s, p0/m, z0.s, z1.s
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
;; size=36 bbWeight=1 PerfScore 12.00
G_M39997_IG03: ;; offset=0x002C
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
stp fp, lr, [sp, #-0x10]!
mov fp, sp
movz x0, #0x72B8 // code for CSharpTutorials.Program:Consume[System.Numerics.Vector`1[int]](System.Numerics.Vector`1[int])
movk x0, #0x2218 LSL #16
movk x0, #0xE088 LSL #32
ldr x0, [x0]
blr x0
ldp fp, lr, [sp], #0x10
ret lr
This can potentially be applied to most predicated instructions. However, we should check each individually for any possible side effects such as exceptions.
Abs,
AbsoluteCompareGreaterThan,
AbsoluteCompareGreaterThanOrEqual,
AbsoluteCompareLessThan,
AbsoluteCompareLessThanOrEqual,
AbsoluteDifference,
AddAcross,
AddRotateComplex,
AddSequentialAcross,
AndAcross,
BooleanNot,
CompareEqual,
CompareGreaterThan,
CompareGreaterThanOrEqual,
CompareLessThan,
CompareLessThanOrEqual,
CompareNotEqualTo,
CompareUnordered,
ConvertToDouble,
ConvertToInt32,
ConvertToInt64,
ConvertToSingle,
ConvertToUInt32,
ConvertToUInt64,
CreateBreakPropagateMask,
Divide,
FusedMultiplyAdd,
FusedMultiplyAddNegated,
FusedMultiplySubtract,
FusedMultiplySubtractNegated,
LeadingSignCount,
LeadingZeroCount,
LoadVectorByteNonFaultingZeroExtendToInt16,
LoadVectorByteNonFaultingZeroExtendToInt32,
LoadVectorByteNonFaultingZeroExtendToInt64,
LoadVectorByteNonFaultingZeroExtendToUInt16,
LoadVectorByteNonFaultingZeroExtendToUInt32,
LoadVectorByteNonFaultingZeroExtendToUInt64,
LoadVectorInt16NonFaultingSignExtendToInt32,
LoadVectorInt16NonFaultingSignExtendToInt64,
LoadVectorInt16NonFaultingSignExtendToUInt32,
LoadVectorInt16NonFaultingSignExtendToUInt64,
LoadVectorInt32NonFaultingSignExtendToInt64,
LoadVectorInt32NonFaultingSignExtendToUInt64,
LoadVectorNonFaulting,
LoadVectorSByteNonFaultingSignExtendToInt16,
LoadVectorSByteNonFaultingSignExtendToInt32,
LoadVectorSByteNonFaultingSignExtendToInt64,
LoadVectorSByteNonFaultingSignExtendToUInt16,
LoadVectorSByteNonFaultingSignExtendToUInt32,
LoadVectorSByteNonFaultingSignExtendToUInt64,
LoadVectorUInt16NonFaultingZeroExtendToInt32,
LoadVectorUInt16NonFaultingZeroExtendToInt64,
LoadVectorUInt16NonFaultingZeroExtendToUInt32,
LoadVectorUInt16NonFaultingZeroExtendToUInt64,
LoadVectorUInt32NonFaultingZeroExtendToInt64,
LoadVectorUInt32NonFaultingZeroExtendToUInt64,
Max,
MaxAcross,
MaxNumber,
MaxNumberAcross,
Min,
MinAcross,
MinNumber,
MinNumberAcross,
Multiply,
MultiplyAdd,
MultiplyAddRotateComplex,
MultiplyExtended,
MultiplySubtract,
Negate,
Not,
OrAcross,
PopCount,
ReciprocalExponent,
ReverseBits,
ReverseElement16,
ReverseElement32,
ReverseElement8,
RoundAwayFromZero,
RoundToNearest,
RoundToNegativeInfinity,
RoundToPositiveInfinity,
RoundToZero,
Scale,
ShiftLeftLogical,
ShiftRightArithmetic,
ShiftRightArithmeticForDivide,
ShiftRightLogical,
SignExtend16,
SignExtend32,
SignExtend8,
Sqrt,
XorAcross,
ZeroExtend16,
ZeroExtend32,
ZeroExtend8,
Consider:
For both of these, because the mask is all false, the
ADDwill do nothing. This and the mask can be optimised away. Becoming:This can potentially be applied to most predicated instructions. However, we should check each individually for any possible side effects such as exceptions.
Full list of SVE1 APIs with HW_Flag_ExplicitMaskedOperation: