generic.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.] */
  56. #include <openssl/bn.h>
  57. #include <assert.h>
  58. #include "internal.h"
  59. // This file has two other implementations: x86 assembly language in
  60. // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
  61. #if defined(OPENSSL_NO_ASM) || \
  62. !(defined(OPENSSL_X86) || \
  63. (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
  64. #ifdef BN_ULLONG
  65. #define mul_add(r, a, w, c) \
  66. do { \
  67. BN_ULLONG t; \
  68. t = (BN_ULLONG)(w) * (a) + (r) + (c); \
  69. (r) = Lw(t); \
  70. (c) = Hw(t); \
  71. } while (0)
  72. #define mul(r, a, w, c) \
  73. do { \
  74. BN_ULLONG t; \
  75. t = (BN_ULLONG)(w) * (a) + (c); \
  76. (r) = Lw(t); \
  77. (c) = Hw(t); \
  78. } while (0)
  79. #define sqr(r0, r1, a) \
  80. do { \
  81. BN_ULLONG t; \
  82. t = (BN_ULLONG)(a) * (a); \
  83. (r0) = Lw(t); \
  84. (r1) = Hw(t); \
  85. } while (0)
  86. #else
  87. #define mul_add(r, a, w, c) \
  88. do { \
  89. BN_ULONG high, low, ret, tmp = (a); \
  90. ret = (r); \
  91. BN_UMULT_LOHI(low, high, w, tmp); \
  92. ret += (c); \
  93. (c) = (ret < (c)) ? 1 : 0; \
  94. (c) += high; \
  95. ret += low; \
  96. (c) += (ret < low) ? 1 : 0; \
  97. (r) = ret; \
  98. } while (0)
  99. #define mul(r, a, w, c) \
  100. do { \
  101. BN_ULONG high, low, ret, ta = (a); \
  102. BN_UMULT_LOHI(low, high, w, ta); \
  103. ret = low + (c); \
  104. (c) = high; \
  105. (c) += (ret < low) ? 1 : 0; \
  106. (r) = ret; \
  107. } while (0)
  108. #define sqr(r0, r1, a) \
  109. do { \
  110. BN_ULONG tmp = (a); \
  111. BN_UMULT_LOHI(r0, r1, tmp, tmp); \
  112. } while (0)
  113. #endif // !BN_ULLONG
  114. BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
  115. BN_ULONG w) {
  116. BN_ULONG c1 = 0;
  117. if (num == 0) {
  118. return c1;
  119. }
  120. while (num & ~3) {
  121. mul_add(rp[0], ap[0], w, c1);
  122. mul_add(rp[1], ap[1], w, c1);
  123. mul_add(rp[2], ap[2], w, c1);
  124. mul_add(rp[3], ap[3], w, c1);
  125. ap += 4;
  126. rp += 4;
  127. num -= 4;
  128. }
  129. while (num) {
  130. mul_add(rp[0], ap[0], w, c1);
  131. ap++;
  132. rp++;
  133. num--;
  134. }
  135. return c1;
  136. }
  137. BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
  138. BN_ULONG w) {
  139. BN_ULONG c1 = 0;
  140. if (num == 0) {
  141. return c1;
  142. }
  143. while (num & ~3) {
  144. mul(rp[0], ap[0], w, c1);
  145. mul(rp[1], ap[1], w, c1);
  146. mul(rp[2], ap[2], w, c1);
  147. mul(rp[3], ap[3], w, c1);
  148. ap += 4;
  149. rp += 4;
  150. num -= 4;
  151. }
  152. while (num) {
  153. mul(rp[0], ap[0], w, c1);
  154. ap++;
  155. rp++;
  156. num--;
  157. }
  158. return c1;
  159. }
  160. void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
  161. if (n == 0) {
  162. return;
  163. }
  164. while (n & ~3) {
  165. sqr(r[0], r[1], a[0]);
  166. sqr(r[2], r[3], a[1]);
  167. sqr(r[4], r[5], a[2]);
  168. sqr(r[6], r[7], a[3]);
  169. a += 4;
  170. r += 8;
  171. n -= 4;
  172. }
  173. while (n) {
  174. sqr(r[0], r[1], a[0]);
  175. a++;
  176. r += 2;
  177. n--;
  178. }
  179. }
  180. #ifdef BN_ULLONG
  181. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  182. size_t n) {
  183. BN_ULLONG ll = 0;
  184. if (n == 0) {
  185. return 0;
  186. }
  187. while (n & ~3) {
  188. ll += (BN_ULLONG)a[0] + b[0];
  189. r[0] = (BN_ULONG)ll;
  190. ll >>= BN_BITS2;
  191. ll += (BN_ULLONG)a[1] + b[1];
  192. r[1] = (BN_ULONG)ll;
  193. ll >>= BN_BITS2;
  194. ll += (BN_ULLONG)a[2] + b[2];
  195. r[2] = (BN_ULONG)ll;
  196. ll >>= BN_BITS2;
  197. ll += (BN_ULLONG)a[3] + b[3];
  198. r[3] = (BN_ULONG)ll;
  199. ll >>= BN_BITS2;
  200. a += 4;
  201. b += 4;
  202. r += 4;
  203. n -= 4;
  204. }
  205. while (n) {
  206. ll += (BN_ULLONG)a[0] + b[0];
  207. r[0] = (BN_ULONG)ll;
  208. ll >>= BN_BITS2;
  209. a++;
  210. b++;
  211. r++;
  212. n--;
  213. }
  214. return (BN_ULONG)ll;
  215. }
  216. #else // !BN_ULLONG
  217. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  218. size_t n) {
  219. BN_ULONG c, l, t;
  220. if (n == 0) {
  221. return (BN_ULONG)0;
  222. }
  223. c = 0;
  224. while (n & ~3) {
  225. t = a[0];
  226. t += c;
  227. c = (t < c);
  228. l = t + b[0];
  229. c += (l < t);
  230. r[0] = l;
  231. t = a[1];
  232. t += c;
  233. c = (t < c);
  234. l = t + b[1];
  235. c += (l < t);
  236. r[1] = l;
  237. t = a[2];
  238. t += c;
  239. c = (t < c);
  240. l = t + b[2];
  241. c += (l < t);
  242. r[2] = l;
  243. t = a[3];
  244. t += c;
  245. c = (t < c);
  246. l = t + b[3];
  247. c += (l < t);
  248. r[3] = l;
  249. a += 4;
  250. b += 4;
  251. r += 4;
  252. n -= 4;
  253. }
  254. while (n) {
  255. t = a[0];
  256. t += c;
  257. c = (t < c);
  258. l = t + b[0];
  259. c += (l < t);
  260. r[0] = l;
  261. a++;
  262. b++;
  263. r++;
  264. n--;
  265. }
  266. return (BN_ULONG)c;
  267. }
  268. #endif // !BN_ULLONG
  269. BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  270. size_t n) {
  271. BN_ULONG t1, t2;
  272. int c = 0;
  273. if (n == 0) {
  274. return (BN_ULONG)0;
  275. }
  276. while (n & ~3) {
  277. t1 = a[0];
  278. t2 = b[0];
  279. r[0] = t1 - t2 - c;
  280. if (t1 != t2) {
  281. c = (t1 < t2);
  282. }
  283. t1 = a[1];
  284. t2 = b[1];
  285. r[1] = t1 - t2 - c;
  286. if (t1 != t2) {
  287. c = (t1 < t2);
  288. }
  289. t1 = a[2];
  290. t2 = b[2];
  291. r[2] = t1 - t2 - c;
  292. if (t1 != t2) {
  293. c = (t1 < t2);
  294. }
  295. t1 = a[3];
  296. t2 = b[3];
  297. r[3] = t1 - t2 - c;
  298. if (t1 != t2) {
  299. c = (t1 < t2);
  300. }
  301. a += 4;
  302. b += 4;
  303. r += 4;
  304. n -= 4;
  305. }
  306. while (n) {
  307. t1 = a[0];
  308. t2 = b[0];
  309. r[0] = t1 - t2 - c;
  310. if (t1 != t2) {
  311. c = (t1 < t2);
  312. }
  313. a++;
  314. b++;
  315. r++;
  316. n--;
  317. }
  318. return c;
  319. }
  320. // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
  321. // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
  322. // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
  323. // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
  324. #ifdef BN_ULLONG
  325. // Keep in mind that additions to multiplication result can not overflow,
  326. // because its high half cannot be all-ones.
  327. #define mul_add_c(a, b, c0, c1, c2) \
  328. do { \
  329. BN_ULONG hi; \
  330. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  331. t += (c0); /* no carry */ \
  332. (c0) = (BN_ULONG)Lw(t); \
  333. hi = (BN_ULONG)Hw(t); \
  334. (c1) += (hi); \
  335. if ((c1) < hi) { \
  336. (c2)++; \
  337. } \
  338. } while (0)
  339. #define mul_add_c2(a, b, c0, c1, c2) \
  340. do { \
  341. BN_ULONG hi; \
  342. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  343. BN_ULLONG tt = t + (c0); /* no carry */ \
  344. (c0) = (BN_ULONG)Lw(tt); \
  345. hi = (BN_ULONG)Hw(tt); \
  346. (c1) += hi; \
  347. if ((c1) < hi) { \
  348. (c2)++; \
  349. } \
  350. t += (c0); /* no carry */ \
  351. (c0) = (BN_ULONG)Lw(t); \
  352. hi = (BN_ULONG)Hw(t); \
  353. (c1) += hi; \
  354. if ((c1) < hi) { \
  355. (c2)++; \
  356. } \
  357. } while (0)
  358. #define sqr_add_c(a, i, c0, c1, c2) \
  359. do { \
  360. BN_ULONG hi; \
  361. BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
  362. t += (c0); /* no carry */ \
  363. (c0) = (BN_ULONG)Lw(t); \
  364. hi = (BN_ULONG)Hw(t); \
  365. (c1) += hi; \
  366. if ((c1) < hi) { \
  367. (c2)++; \
  368. } \
  369. } while (0)
  370. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  371. #else
  372. // Keep in mind that additions to hi can not overflow, because the high word of
  373. // a multiplication result cannot be all-ones.
  374. #define mul_add_c(a, b, c0, c1, c2) \
  375. do { \
  376. BN_ULONG ta = (a), tb = (b); \
  377. BN_ULONG lo, hi; \
  378. BN_UMULT_LOHI(lo, hi, ta, tb); \
  379. (c0) += lo; \
  380. hi += ((c0) < lo) ? 1 : 0; \
  381. (c1) += hi; \
  382. (c2) += ((c1) < hi) ? 1 : 0; \
  383. } while (0)
  384. #define mul_add_c2(a, b, c0, c1, c2) \
  385. do { \
  386. BN_ULONG ta = (a), tb = (b); \
  387. BN_ULONG lo, hi, tt; \
  388. BN_UMULT_LOHI(lo, hi, ta, tb); \
  389. (c0) += lo; \
  390. tt = hi + (((c0) < lo) ? 1 : 0); \
  391. (c1) += tt; \
  392. (c2) += ((c1) < tt) ? 1 : 0; \
  393. (c0) += lo; \
  394. hi += (c0 < lo) ? 1 : 0; \
  395. (c1) += hi; \
  396. (c2) += ((c1) < hi) ? 1 : 0; \
  397. } while (0)
  398. #define sqr_add_c(a, i, c0, c1, c2) \
  399. do { \
  400. BN_ULONG ta = (a)[i]; \
  401. BN_ULONG lo, hi; \
  402. BN_UMULT_LOHI(lo, hi, ta, ta); \
  403. (c0) += lo; \
  404. hi += (c0 < lo) ? 1 : 0; \
  405. (c1) += hi; \
  406. (c2) += ((c1) < hi) ? 1 : 0; \
  407. } while (0)
  408. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  409. #endif // !BN_ULLONG
  410. void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
  411. BN_ULONG c1, c2, c3;
  412. c1 = 0;
  413. c2 = 0;
  414. c3 = 0;
  415. mul_add_c(a[0], b[0], c1, c2, c3);
  416. r[0] = c1;
  417. c1 = 0;
  418. mul_add_c(a[0], b[1], c2, c3, c1);
  419. mul_add_c(a[1], b[0], c2, c3, c1);
  420. r[1] = c2;
  421. c2 = 0;
  422. mul_add_c(a[2], b[0], c3, c1, c2);
  423. mul_add_c(a[1], b[1], c3, c1, c2);
  424. mul_add_c(a[0], b[2], c3, c1, c2);
  425. r[2] = c3;
  426. c3 = 0;
  427. mul_add_c(a[0], b[3], c1, c2, c3);
  428. mul_add_c(a[1], b[2], c1, c2, c3);
  429. mul_add_c(a[2], b[1], c1, c2, c3);
  430. mul_add_c(a[3], b[0], c1, c2, c3);
  431. r[3] = c1;
  432. c1 = 0;
  433. mul_add_c(a[4], b[0], c2, c3, c1);
  434. mul_add_c(a[3], b[1], c2, c3, c1);
  435. mul_add_c(a[2], b[2], c2, c3, c1);
  436. mul_add_c(a[1], b[3], c2, c3, c1);
  437. mul_add_c(a[0], b[4], c2, c3, c1);
  438. r[4] = c2;
  439. c2 = 0;
  440. mul_add_c(a[0], b[5], c3, c1, c2);
  441. mul_add_c(a[1], b[4], c3, c1, c2);
  442. mul_add_c(a[2], b[3], c3, c1, c2);
  443. mul_add_c(a[3], b[2], c3, c1, c2);
  444. mul_add_c(a[4], b[1], c3, c1, c2);
  445. mul_add_c(a[5], b[0], c3, c1, c2);
  446. r[5] = c3;
  447. c3 = 0;
  448. mul_add_c(a[6], b[0], c1, c2, c3);
  449. mul_add_c(a[5], b[1], c1, c2, c3);
  450. mul_add_c(a[4], b[2], c1, c2, c3);
  451. mul_add_c(a[3], b[3], c1, c2, c3);
  452. mul_add_c(a[2], b[4], c1, c2, c3);
  453. mul_add_c(a[1], b[5], c1, c2, c3);
  454. mul_add_c(a[0], b[6], c1, c2, c3);
  455. r[6] = c1;
  456. c1 = 0;
  457. mul_add_c(a[0], b[7], c2, c3, c1);
  458. mul_add_c(a[1], b[6], c2, c3, c1);
  459. mul_add_c(a[2], b[5], c2, c3, c1);
  460. mul_add_c(a[3], b[4], c2, c3, c1);
  461. mul_add_c(a[4], b[3], c2, c3, c1);
  462. mul_add_c(a[5], b[2], c2, c3, c1);
  463. mul_add_c(a[6], b[1], c2, c3, c1);
  464. mul_add_c(a[7], b[0], c2, c3, c1);
  465. r[7] = c2;
  466. c2 = 0;
  467. mul_add_c(a[7], b[1], c3, c1, c2);
  468. mul_add_c(a[6], b[2], c3, c1, c2);
  469. mul_add_c(a[5], b[3], c3, c1, c2);
  470. mul_add_c(a[4], b[4], c3, c1, c2);
  471. mul_add_c(a[3], b[5], c3, c1, c2);
  472. mul_add_c(a[2], b[6], c3, c1, c2);
  473. mul_add_c(a[1], b[7], c3, c1, c2);
  474. r[8] = c3;
  475. c3 = 0;
  476. mul_add_c(a[2], b[7], c1, c2, c3);
  477. mul_add_c(a[3], b[6], c1, c2, c3);
  478. mul_add_c(a[4], b[5], c1, c2, c3);
  479. mul_add_c(a[5], b[4], c1, c2, c3);
  480. mul_add_c(a[6], b[3], c1, c2, c3);
  481. mul_add_c(a[7], b[2], c1, c2, c3);
  482. r[9] = c1;
  483. c1 = 0;
  484. mul_add_c(a[7], b[3], c2, c3, c1);
  485. mul_add_c(a[6], b[4], c2, c3, c1);
  486. mul_add_c(a[5], b[5], c2, c3, c1);
  487. mul_add_c(a[4], b[6], c2, c3, c1);
  488. mul_add_c(a[3], b[7], c2, c3, c1);
  489. r[10] = c2;
  490. c2 = 0;
  491. mul_add_c(a[4], b[7], c3, c1, c2);
  492. mul_add_c(a[5], b[6], c3, c1, c2);
  493. mul_add_c(a[6], b[5], c3, c1, c2);
  494. mul_add_c(a[7], b[4], c3, c1, c2);
  495. r[11] = c3;
  496. c3 = 0;
  497. mul_add_c(a[7], b[5], c1, c2, c3);
  498. mul_add_c(a[6], b[6], c1, c2, c3);
  499. mul_add_c(a[5], b[7], c1, c2, c3);
  500. r[12] = c1;
  501. c1 = 0;
  502. mul_add_c(a[6], b[7], c2, c3, c1);
  503. mul_add_c(a[7], b[6], c2, c3, c1);
  504. r[13] = c2;
  505. c2 = 0;
  506. mul_add_c(a[7], b[7], c3, c1, c2);
  507. r[14] = c3;
  508. r[15] = c1;
  509. }
  510. void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
  511. BN_ULONG c1, c2, c3;
  512. c1 = 0;
  513. c2 = 0;
  514. c3 = 0;
  515. mul_add_c(a[0], b[0], c1, c2, c3);
  516. r[0] = c1;
  517. c1 = 0;
  518. mul_add_c(a[0], b[1], c2, c3, c1);
  519. mul_add_c(a[1], b[0], c2, c3, c1);
  520. r[1] = c2;
  521. c2 = 0;
  522. mul_add_c(a[2], b[0], c3, c1, c2);
  523. mul_add_c(a[1], b[1], c3, c1, c2);
  524. mul_add_c(a[0], b[2], c3, c1, c2);
  525. r[2] = c3;
  526. c3 = 0;
  527. mul_add_c(a[0], b[3], c1, c2, c3);
  528. mul_add_c(a[1], b[2], c1, c2, c3);
  529. mul_add_c(a[2], b[1], c1, c2, c3);
  530. mul_add_c(a[3], b[0], c1, c2, c3);
  531. r[3] = c1;
  532. c1 = 0;
  533. mul_add_c(a[3], b[1], c2, c3, c1);
  534. mul_add_c(a[2], b[2], c2, c3, c1);
  535. mul_add_c(a[1], b[3], c2, c3, c1);
  536. r[4] = c2;
  537. c2 = 0;
  538. mul_add_c(a[2], b[3], c3, c1, c2);
  539. mul_add_c(a[3], b[2], c3, c1, c2);
  540. r[5] = c3;
  541. c3 = 0;
  542. mul_add_c(a[3], b[3], c1, c2, c3);
  543. r[6] = c1;
  544. r[7] = c2;
  545. }
  546. void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
  547. BN_ULONG c1, c2, c3;
  548. c1 = 0;
  549. c2 = 0;
  550. c3 = 0;
  551. sqr_add_c(a, 0, c1, c2, c3);
  552. r[0] = c1;
  553. c1 = 0;
  554. sqr_add_c2(a, 1, 0, c2, c3, c1);
  555. r[1] = c2;
  556. c2 = 0;
  557. sqr_add_c(a, 1, c3, c1, c2);
  558. sqr_add_c2(a, 2, 0, c3, c1, c2);
  559. r[2] = c3;
  560. c3 = 0;
  561. sqr_add_c2(a, 3, 0, c1, c2, c3);
  562. sqr_add_c2(a, 2, 1, c1, c2, c3);
  563. r[3] = c1;
  564. c1 = 0;
  565. sqr_add_c(a, 2, c2, c3, c1);
  566. sqr_add_c2(a, 3, 1, c2, c3, c1);
  567. sqr_add_c2(a, 4, 0, c2, c3, c1);
  568. r[4] = c2;
  569. c2 = 0;
  570. sqr_add_c2(a, 5, 0, c3, c1, c2);
  571. sqr_add_c2(a, 4, 1, c3, c1, c2);
  572. sqr_add_c2(a, 3, 2, c3, c1, c2);
  573. r[5] = c3;
  574. c3 = 0;
  575. sqr_add_c(a, 3, c1, c2, c3);
  576. sqr_add_c2(a, 4, 2, c1, c2, c3);
  577. sqr_add_c2(a, 5, 1, c1, c2, c3);
  578. sqr_add_c2(a, 6, 0, c1, c2, c3);
  579. r[6] = c1;
  580. c1 = 0;
  581. sqr_add_c2(a, 7, 0, c2, c3, c1);
  582. sqr_add_c2(a, 6, 1, c2, c3, c1);
  583. sqr_add_c2(a, 5, 2, c2, c3, c1);
  584. sqr_add_c2(a, 4, 3, c2, c3, c1);
  585. r[7] = c2;
  586. c2 = 0;
  587. sqr_add_c(a, 4, c3, c1, c2);
  588. sqr_add_c2(a, 5, 3, c3, c1, c2);
  589. sqr_add_c2(a, 6, 2, c3, c1, c2);
  590. sqr_add_c2(a, 7, 1, c3, c1, c2);
  591. r[8] = c3;
  592. c3 = 0;
  593. sqr_add_c2(a, 7, 2, c1, c2, c3);
  594. sqr_add_c2(a, 6, 3, c1, c2, c3);
  595. sqr_add_c2(a, 5, 4, c1, c2, c3);
  596. r[9] = c1;
  597. c1 = 0;
  598. sqr_add_c(a, 5, c2, c3, c1);
  599. sqr_add_c2(a, 6, 4, c2, c3, c1);
  600. sqr_add_c2(a, 7, 3, c2, c3, c1);
  601. r[10] = c2;
  602. c2 = 0;
  603. sqr_add_c2(a, 7, 4, c3, c1, c2);
  604. sqr_add_c2(a, 6, 5, c3, c1, c2);
  605. r[11] = c3;
  606. c3 = 0;
  607. sqr_add_c(a, 6, c1, c2, c3);
  608. sqr_add_c2(a, 7, 5, c1, c2, c3);
  609. r[12] = c1;
  610. c1 = 0;
  611. sqr_add_c2(a, 7, 6, c2, c3, c1);
  612. r[13] = c2;
  613. c2 = 0;
  614. sqr_add_c(a, 7, c3, c1, c2);
  615. r[14] = c3;
  616. r[15] = c1;
  617. }
  618. void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
  619. BN_ULONG c1, c2, c3;
  620. c1 = 0;
  621. c2 = 0;
  622. c3 = 0;
  623. sqr_add_c(a, 0, c1, c2, c3);
  624. r[0] = c1;
  625. c1 = 0;
  626. sqr_add_c2(a, 1, 0, c2, c3, c1);
  627. r[1] = c2;
  628. c2 = 0;
  629. sqr_add_c(a, 1, c3, c1, c2);
  630. sqr_add_c2(a, 2, 0, c3, c1, c2);
  631. r[2] = c3;
  632. c3 = 0;
  633. sqr_add_c2(a, 3, 0, c1, c2, c3);
  634. sqr_add_c2(a, 2, 1, c1, c2, c3);
  635. r[3] = c1;
  636. c1 = 0;
  637. sqr_add_c(a, 2, c2, c3, c1);
  638. sqr_add_c2(a, 3, 1, c2, c3, c1);
  639. r[4] = c2;
  640. c2 = 0;
  641. sqr_add_c2(a, 3, 2, c3, c1, c2);
  642. r[5] = c3;
  643. c3 = 0;
  644. sqr_add_c(a, 3, c1, c2, c3);
  645. r[6] = c1;
  646. r[7] = c2;
  647. }
  648. #undef mul_add
  649. #undef mul
  650. #undef sqr
  651. #undef mul_add_c
  652. #undef mul_add_c2
  653. #undef sqr_add_c
  654. #undef sqr_add_c2
  655. #endif