| | | 1 | | using System; |
| | | 2 | | |
| | | 3 | | namespace Renci.SshNet.Security.Chaos.NaCl.Internal.Ed25519Ref10 |
| | | 4 | | { |
| | | 5 | | internal static partial class FieldOperations |
| | | 6 | | { |
| | | 7 | | /* |
| | | 8 | | h = f * g |
| | | 9 | | Can overlap h with f or g. |
| | | 10 | | |
| | | 11 | | Preconditions: |
| | | 12 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
| | | 13 | | |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
| | | 14 | | |
| | | 15 | | Postconditions: |
| | | 16 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
| | | 17 | | */ |
| | | 18 | | |
| | | 19 | | /* |
| | | 20 | | Notes on implementation strategy: |
| | | 21 | | |
| | | 22 | | Using schoolbook multiplication. |
| | | 23 | | Karatsuba would save a little in some cost models. |
| | | 24 | | |
| | | 25 | | Most multiplications by 2 and 19 are 32-bit precomputations; |
| | | 26 | | cheaper than 64-bit postcomputations. |
| | | 27 | | |
| | | 28 | | There is one remaining multiplication by 19 in the carry chain; |
| | | 29 | | one *19 precomputation can be merged into this, |
| | | 30 | | but the resulting data flow is considerably less clean. |
| | | 31 | | |
| | | 32 | | There are 12 carries below. |
| | | 33 | | 10 of them are 2-way parallelizable and vectorizable. |
| | | 34 | | Can get away with 11 carries, but then data flow is much deeper. |
| | | 35 | | |
| | | 36 | | With tighter constraints on inputs can squeeze carries into int32. |
| | | 37 | | */ |
| | | 38 | | |
| | | 39 | | internal static void fe_mul(out FieldElement h, ref FieldElement f, ref FieldElement g) |
| | 3024968 | 40 | | { |
| | 3024968 | 41 | | Int32 f0 = f.x0; |
| | 3024968 | 42 | | Int32 f1 = f.x1; |
| | 3024968 | 43 | | Int32 f2 = f.x2; |
| | 3024968 | 44 | | Int32 f3 = f.x3; |
| | 3024968 | 45 | | Int32 f4 = f.x4; |
| | 3024968 | 46 | | Int32 f5 = f.x5; |
| | 3024968 | 47 | | Int32 f6 = f.x6; |
| | 3024968 | 48 | | Int32 f7 = f.x7; |
| | 3024968 | 49 | | Int32 f8 = f.x8; |
| | 3024968 | 50 | | Int32 f9 = f.x9; |
| | 3024968 | 51 | | Int32 g0 = g.x0; |
| | 3024968 | 52 | | Int32 g1 = g.x1; |
| | 3024968 | 53 | | Int32 g2 = g.x2; |
| | 3024968 | 54 | | Int32 g3 = g.x3; |
| | 3024968 | 55 | | Int32 g4 = g.x4; |
| | 3024968 | 56 | | Int32 g5 = g.x5; |
| | 3024968 | 57 | | Int32 g6 = g.x6; |
| | 3024968 | 58 | | Int32 g7 = g.x7; |
| | 3024968 | 59 | | Int32 g8 = g.x8; |
| | 3024968 | 60 | | Int32 g9 = g.x9; |
| | 3024968 | 61 | | Int32 g1_19 = 19 * g1; /* 1.959375*2^29 */ |
| | 3024968 | 62 | | Int32 g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ |
| | 3024968 | 63 | | Int32 g3_19 = 19 * g3; |
| | 3024968 | 64 | | Int32 g4_19 = 19 * g4; |
| | 3024968 | 65 | | Int32 g5_19 = 19 * g5; |
| | 3024968 | 66 | | Int32 g6_19 = 19 * g6; |
| | 3024968 | 67 | | Int32 g7_19 = 19 * g7; |
| | 3024968 | 68 | | Int32 g8_19 = 19 * g8; |
| | 3024968 | 69 | | Int32 g9_19 = 19 * g9; |
| | 3024968 | 70 | | Int32 f1_2 = 2 * f1; |
| | 3024968 | 71 | | Int32 f3_2 = 2 * f3; |
| | 3024968 | 72 | | Int32 f5_2 = 2 * f5; |
| | 3024968 | 73 | | Int32 f7_2 = 2 * f7; |
| | 3024968 | 74 | | Int32 f9_2 = 2 * f9; |
| | 3024968 | 75 | | Int64 f0g0 = f0 * (Int64)g0; |
| | 3024968 | 76 | | Int64 f0g1 = f0 * (Int64)g1; |
| | 3024968 | 77 | | Int64 f0g2 = f0 * (Int64)g2; |
| | 3024968 | 78 | | Int64 f0g3 = f0 * (Int64)g3; |
| | 3024968 | 79 | | Int64 f0g4 = f0 * (Int64)g4; |
| | 3024968 | 80 | | Int64 f0g5 = f0 * (Int64)g5; |
| | 3024968 | 81 | | Int64 f0g6 = f0 * (Int64)g6; |
| | 3024968 | 82 | | Int64 f0g7 = f0 * (Int64)g7; |
| | 3024968 | 83 | | Int64 f0g8 = f0 * (Int64)g8; |
| | 3024968 | 84 | | Int64 f0g9 = f0 * (Int64)g9; |
| | 3024968 | 85 | | Int64 f1g0 = f1 * (Int64)g0; |
| | 3024968 | 86 | | Int64 f1g1_2 = f1_2 * (Int64)g1; |
| | 3024968 | 87 | | Int64 f1g2 = f1 * (Int64)g2; |
| | 3024968 | 88 | | Int64 f1g3_2 = f1_2 * (Int64)g3; |
| | 3024968 | 89 | | Int64 f1g4 = f1 * (Int64)g4; |
| | 3024968 | 90 | | Int64 f1g5_2 = f1_2 * (Int64)g5; |
| | 3024968 | 91 | | Int64 f1g6 = f1 * (Int64)g6; |
| | 3024968 | 92 | | Int64 f1g7_2 = f1_2 * (Int64)g7; |
| | 3024968 | 93 | | Int64 f1g8 = f1 * (Int64)g8; |
| | 3024968 | 94 | | Int64 f1g9_38 = f1_2 * (Int64)g9_19; |
| | 3024968 | 95 | | Int64 f2g0 = f2 * (Int64)g0; |
| | 3024968 | 96 | | Int64 f2g1 = f2 * (Int64)g1; |
| | 3024968 | 97 | | Int64 f2g2 = f2 * (Int64)g2; |
| | 3024968 | 98 | | Int64 f2g3 = f2 * (Int64)g3; |
| | 3024968 | 99 | | Int64 f2g4 = f2 * (Int64)g4; |
| | 3024968 | 100 | | Int64 f2g5 = f2 * (Int64)g5; |
| | 3024968 | 101 | | Int64 f2g6 = f2 * (Int64)g6; |
| | 3024968 | 102 | | Int64 f2g7 = f2 * (Int64)g7; |
| | 3024968 | 103 | | Int64 f2g8_19 = f2 * (Int64)g8_19; |
| | 3024968 | 104 | | Int64 f2g9_19 = f2 * (Int64)g9_19; |
| | 3024968 | 105 | | Int64 f3g0 = f3 * (Int64)g0; |
| | 3024968 | 106 | | Int64 f3g1_2 = f3_2 * (Int64)g1; |
| | 3024968 | 107 | | Int64 f3g2 = f3 * (Int64)g2; |
| | 3024968 | 108 | | Int64 f3g3_2 = f3_2 * (Int64)g3; |
| | 3024968 | 109 | | Int64 f3g4 = f3 * (Int64)g4; |
| | 3024968 | 110 | | Int64 f3g5_2 = f3_2 * (Int64)g5; |
| | 3024968 | 111 | | Int64 f3g6 = f3 * (Int64)g6; |
| | 3024968 | 112 | | Int64 f3g7_38 = f3_2 * (Int64)g7_19; |
| | 3024968 | 113 | | Int64 f3g8_19 = f3 * (Int64)g8_19; |
| | 3024968 | 114 | | Int64 f3g9_38 = f3_2 * (Int64)g9_19; |
| | 3024968 | 115 | | Int64 f4g0 = f4 * (Int64)g0; |
| | 3024968 | 116 | | Int64 f4g1 = f4 * (Int64)g1; |
| | 3024968 | 117 | | Int64 f4g2 = f4 * (Int64)g2; |
| | 3024968 | 118 | | Int64 f4g3 = f4 * (Int64)g3; |
| | 3024968 | 119 | | Int64 f4g4 = f4 * (Int64)g4; |
| | 3024968 | 120 | | Int64 f4g5 = f4 * (Int64)g5; |
| | 3024968 | 121 | | Int64 f4g6_19 = f4 * (Int64)g6_19; |
| | 3024968 | 122 | | Int64 f4g7_19 = f4 * (Int64)g7_19; |
| | 3024968 | 123 | | Int64 f4g8_19 = f4 * (Int64)g8_19; |
| | 3024968 | 124 | | Int64 f4g9_19 = f4 * (Int64)g9_19; |
| | 3024968 | 125 | | Int64 f5g0 = f5 * (Int64)g0; |
| | 3024968 | 126 | | Int64 f5g1_2 = f5_2 * (Int64)g1; |
| | 3024968 | 127 | | Int64 f5g2 = f5 * (Int64)g2; |
| | 3024968 | 128 | | Int64 f5g3_2 = f5_2 * (Int64)g3; |
| | 3024968 | 129 | | Int64 f5g4 = f5 * (Int64)g4; |
| | 3024968 | 130 | | Int64 f5g5_38 = f5_2 * (Int64)g5_19; |
| | 3024968 | 131 | | Int64 f5g6_19 = f5 * (Int64)g6_19; |
| | 3024968 | 132 | | Int64 f5g7_38 = f5_2 * (Int64)g7_19; |
| | 3024968 | 133 | | Int64 f5g8_19 = f5 * (Int64)g8_19; |
| | 3024968 | 134 | | Int64 f5g9_38 = f5_2 * (Int64)g9_19; |
| | 3024968 | 135 | | Int64 f6g0 = f6 * (Int64)g0; |
| | 3024968 | 136 | | Int64 f6g1 = f6 * (Int64)g1; |
| | 3024968 | 137 | | Int64 f6g2 = f6 * (Int64)g2; |
| | 3024968 | 138 | | Int64 f6g3 = f6 * (Int64)g3; |
| | 3024968 | 139 | | Int64 f6g4_19 = f6 * (Int64)g4_19; |
| | 3024968 | 140 | | Int64 f6g5_19 = f6 * (Int64)g5_19; |
| | 3024968 | 141 | | Int64 f6g6_19 = f6 * (Int64)g6_19; |
| | 3024968 | 142 | | Int64 f6g7_19 = f6 * (Int64)g7_19; |
| | 3024968 | 143 | | Int64 f6g8_19 = f6 * (Int64)g8_19; |
| | 3024968 | 144 | | Int64 f6g9_19 = f6 * (Int64)g9_19; |
| | 3024968 | 145 | | Int64 f7g0 = f7 * (Int64)g0; |
| | 3024968 | 146 | | Int64 f7g1_2 = f7_2 * (Int64)g1; |
| | 3024968 | 147 | | Int64 f7g2 = f7 * (Int64)g2; |
| | 3024968 | 148 | | Int64 f7g3_38 = f7_2 * (Int64)g3_19; |
| | 3024968 | 149 | | Int64 f7g4_19 = f7 * (Int64)g4_19; |
| | 3024968 | 150 | | Int64 f7g5_38 = f7_2 * (Int64)g5_19; |
| | 3024968 | 151 | | Int64 f7g6_19 = f7 * (Int64)g6_19; |
| | 3024968 | 152 | | Int64 f7g7_38 = f7_2 * (Int64)g7_19; |
| | 3024968 | 153 | | Int64 f7g8_19 = f7 * (Int64)g8_19; |
| | 3024968 | 154 | | Int64 f7g9_38 = f7_2 * (Int64)g9_19; |
| | 3024968 | 155 | | Int64 f8g0 = f8 * (Int64)g0; |
| | 3024968 | 156 | | Int64 f8g1 = f8 * (Int64)g1; |
| | 3024968 | 157 | | Int64 f8g2_19 = f8 * (Int64)g2_19; |
| | 3024968 | 158 | | Int64 f8g3_19 = f8 * (Int64)g3_19; |
| | 3024968 | 159 | | Int64 f8g4_19 = f8 * (Int64)g4_19; |
| | 3024968 | 160 | | Int64 f8g5_19 = f8 * (Int64)g5_19; |
| | 3024968 | 161 | | Int64 f8g6_19 = f8 * (Int64)g6_19; |
| | 3024968 | 162 | | Int64 f8g7_19 = f8 * (Int64)g7_19; |
| | 3024968 | 163 | | Int64 f8g8_19 = f8 * (Int64)g8_19; |
| | 3024968 | 164 | | Int64 f8g9_19 = f8 * (Int64)g9_19; |
| | 3024968 | 165 | | Int64 f9g0 = f9 * (Int64)g0; |
| | 3024968 | 166 | | Int64 f9g1_38 = f9_2 * (Int64)g1_19; |
| | 3024968 | 167 | | Int64 f9g2_19 = f9 * (Int64)g2_19; |
| | 3024968 | 168 | | Int64 f9g3_38 = f9_2 * (Int64)g3_19; |
| | 3024968 | 169 | | Int64 f9g4_19 = f9 * (Int64)g4_19; |
| | 3024968 | 170 | | Int64 f9g5_38 = f9_2 * (Int64)g5_19; |
| | 3024968 | 171 | | Int64 f9g6_19 = f9 * (Int64)g6_19; |
| | 3024968 | 172 | | Int64 f9g7_38 = f9_2 * (Int64)g7_19; |
| | 3024968 | 173 | | Int64 f9g8_19 = f9 * (Int64)g8_19; |
| | 3024968 | 174 | | Int64 f9g9_38 = f9_2 * (Int64)g9_19; |
| | 3024968 | 175 | | Int64 h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; |
| | 3024968 | 176 | | Int64 h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19; |
| | 3024968 | 177 | | Int64 h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38; |
| | 3024968 | 178 | | Int64 h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19; |
| | 3024968 | 179 | | Int64 h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38; |
| | 3024968 | 180 | | Int64 h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19; |
| | 3024968 | 181 | | Int64 h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38; |
| | 3024968 | 182 | | Int64 h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19; |
| | 3024968 | 183 | | Int64 h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38; |
| | 3024968 | 184 | | Int64 h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0; |
| | | 185 | | Int64 carry0; |
| | | 186 | | Int64 carry1; |
| | | 187 | | Int64 carry2; |
| | | 188 | | Int64 carry3; |
| | | 189 | | Int64 carry4; |
| | | 190 | | Int64 carry5; |
| | | 191 | | Int64 carry6; |
| | | 192 | | Int64 carry7; |
| | | 193 | | Int64 carry8; |
| | | 194 | | Int64 carry9; |
| | | 195 | | |
| | | 196 | | /* |
| | | 197 | | |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) |
| | | 198 | | i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 |
| | | 199 | | |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) |
| | | 200 | | i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 |
| | | 201 | | */ |
| | | 202 | | |
| | 9074904 | 203 | | carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26; |
| | 9074904 | 204 | | carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26; |
| | | 205 | | /* |h0| <= 2^25 */ |
| | | 206 | | /* |h4| <= 2^25 */ |
| | | 207 | | /* |h1| <= 1.71*2^59 */ |
| | | 208 | | /* |h5| <= 1.71*2^59 */ |
| | | 209 | | |
| | 9074904 | 210 | | carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25; |
| | 9074904 | 211 | | carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25; |
| | | 212 | | /* |h1| <= 2^24; from now on fits into int32 */ |
| | | 213 | | /* |h5| <= 2^24; from now on fits into int32 */ |
| | | 214 | | /* |h2| <= 1.41*2^60 */ |
| | | 215 | | /* |h6| <= 1.41*2^60 */ |
| | | 216 | | |
| | 9074904 | 217 | | carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26; |
| | 9074904 | 218 | | carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26; |
| | | 219 | | /* |h2| <= 2^25; from now on fits into int32 unchanged */ |
| | | 220 | | /* |h6| <= 2^25; from now on fits into int32 unchanged */ |
| | | 221 | | /* |h3| <= 1.71*2^59 */ |
| | | 222 | | /* |h7| <= 1.71*2^59 */ |
| | | 223 | | |
| | 9074904 | 224 | | carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25; |
| | 9074904 | 225 | | carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25; |
| | | 226 | | /* |h3| <= 2^24; from now on fits into int32 unchanged */ |
| | | 227 | | /* |h7| <= 2^24; from now on fits into int32 unchanged */ |
| | | 228 | | /* |h4| <= 1.72*2^34 */ |
| | | 229 | | /* |h8| <= 1.41*2^60 */ |
| | | 230 | | |
| | 9074904 | 231 | | carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26; |
| | 9074904 | 232 | | carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26; |
| | | 233 | | /* |h4| <= 2^25; from now on fits into int32 unchanged */ |
| | | 234 | | /* |h8| <= 2^25; from now on fits into int32 unchanged */ |
| | | 235 | | /* |h5| <= 1.01*2^24 */ |
| | | 236 | | /* |h9| <= 1.71*2^59 */ |
| | | 237 | | |
| | 9074904 | 238 | | carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; |
| | | 239 | | /* |h9| <= 2^24; from now on fits into int32 unchanged */ |
| | | 240 | | /* |h0| <= 1.1*2^39 */ |
| | | 241 | | |
| | 9074904 | 242 | | carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26; |
| | | 243 | | /* |h0| <= 2^25; from now on fits into int32 unchanged */ |
| | | 244 | | /* |h1| <= 1.01*2^24 */ |
| | | 245 | | |
| | 3024968 | 246 | | h.x0 = (Int32)h0; |
| | 3024968 | 247 | | h.x1 = (Int32)h1; |
| | 3024968 | 248 | | h.x2 = (Int32)h2; |
| | 3024968 | 249 | | h.x3 = (Int32)h3; |
| | 3024968 | 250 | | h.x4 = (Int32)h4; |
| | 3024968 | 251 | | h.x5 = (Int32)h5; |
| | 3024968 | 252 | | h.x6 = (Int32)h6; |
| | 3024968 | 253 | | h.x7 = (Int32)h7; |
| | 3024968 | 254 | | h.x8 = (Int32)h8; |
| | 3024968 | 255 | | h.x9 = (Int32)h9; |
| | 3024968 | 256 | | } |
| | | 257 | | } |
| | | 258 | | } |