generic.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.] */
  56. #include <openssl/bn.h>
  57. #include <assert.h>
  58. #include "internal.h"
  59. // This file has two other implementations: x86 assembly language in
  60. // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
  61. #if defined(OPENSSL_NO_ASM) || \
  62. !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
  63. #ifdef BN_ULLONG
  64. #define mul_add(r, a, w, c) \
  65. do { \
  66. BN_ULLONG t; \
  67. t = (BN_ULLONG)(w) * (a) + (r) + (c); \
  68. (r) = Lw(t); \
  69. (c) = Hw(t); \
  70. } while (0)
  71. #define mul(r, a, w, c) \
  72. do { \
  73. BN_ULLONG t; \
  74. t = (BN_ULLONG)(w) * (a) + (c); \
  75. (r) = Lw(t); \
  76. (c) = Hw(t); \
  77. } while (0)
  78. #define sqr(r0, r1, a) \
  79. do { \
  80. BN_ULLONG t; \
  81. t = (BN_ULLONG)(a) * (a); \
  82. (r0) = Lw(t); \
  83. (r1) = Hw(t); \
  84. } while (0)
  85. #else
  86. #define mul_add(r, a, w, c) \
  87. do { \
  88. BN_ULONG high, low, ret, tmp = (a); \
  89. ret = (r); \
  90. BN_UMULT_LOHI(low, high, w, tmp); \
  91. ret += (c); \
  92. (c) = (ret < (c)) ? 1 : 0; \
  93. (c) += high; \
  94. ret += low; \
  95. (c) += (ret < low) ? 1 : 0; \
  96. (r) = ret; \
  97. } while (0)
  98. #define mul(r, a, w, c) \
  99. do { \
  100. BN_ULONG high, low, ret, ta = (a); \
  101. BN_UMULT_LOHI(low, high, w, ta); \
  102. ret = low + (c); \
  103. (c) = high; \
  104. (c) += (ret < low) ? 1 : 0; \
  105. (r) = ret; \
  106. } while (0)
  107. #define sqr(r0, r1, a) \
  108. do { \
  109. BN_ULONG tmp = (a); \
  110. BN_UMULT_LOHI(r0, r1, tmp, tmp); \
  111. } while (0)
  112. #endif // !BN_ULLONG
  113. BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
  114. BN_ULONG w) {
  115. BN_ULONG c1 = 0;
  116. if (num == 0) {
  117. return c1;
  118. }
  119. while (num & ~3) {
  120. mul_add(rp[0], ap[0], w, c1);
  121. mul_add(rp[1], ap[1], w, c1);
  122. mul_add(rp[2], ap[2], w, c1);
  123. mul_add(rp[3], ap[3], w, c1);
  124. ap += 4;
  125. rp += 4;
  126. num -= 4;
  127. }
  128. while (num) {
  129. mul_add(rp[0], ap[0], w, c1);
  130. ap++;
  131. rp++;
  132. num--;
  133. }
  134. return c1;
  135. }
  136. BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
  137. BN_ULONG w) {
  138. BN_ULONG c1 = 0;
  139. if (num == 0) {
  140. return c1;
  141. }
  142. while (num & ~3) {
  143. mul(rp[0], ap[0], w, c1);
  144. mul(rp[1], ap[1], w, c1);
  145. mul(rp[2], ap[2], w, c1);
  146. mul(rp[3], ap[3], w, c1);
  147. ap += 4;
  148. rp += 4;
  149. num -= 4;
  150. }
  151. while (num) {
  152. mul(rp[0], ap[0], w, c1);
  153. ap++;
  154. rp++;
  155. num--;
  156. }
  157. return c1;
  158. }
  159. void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
  160. if (n == 0) {
  161. return;
  162. }
  163. while (n & ~3) {
  164. sqr(r[0], r[1], a[0]);
  165. sqr(r[2], r[3], a[1]);
  166. sqr(r[4], r[5], a[2]);
  167. sqr(r[6], r[7], a[3]);
  168. a += 4;
  169. r += 8;
  170. n -= 4;
  171. }
  172. while (n) {
  173. sqr(r[0], r[1], a[0]);
  174. a++;
  175. r += 2;
  176. n--;
  177. }
  178. }
  179. #ifdef BN_ULLONG
  180. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  181. size_t n) {
  182. BN_ULLONG ll = 0;
  183. if (n == 0) {
  184. return 0;
  185. }
  186. while (n & ~3) {
  187. ll += (BN_ULLONG)a[0] + b[0];
  188. r[0] = (BN_ULONG)ll;
  189. ll >>= BN_BITS2;
  190. ll += (BN_ULLONG)a[1] + b[1];
  191. r[1] = (BN_ULONG)ll;
  192. ll >>= BN_BITS2;
  193. ll += (BN_ULLONG)a[2] + b[2];
  194. r[2] = (BN_ULONG)ll;
  195. ll >>= BN_BITS2;
  196. ll += (BN_ULLONG)a[3] + b[3];
  197. r[3] = (BN_ULONG)ll;
  198. ll >>= BN_BITS2;
  199. a += 4;
  200. b += 4;
  201. r += 4;
  202. n -= 4;
  203. }
  204. while (n) {
  205. ll += (BN_ULLONG)a[0] + b[0];
  206. r[0] = (BN_ULONG)ll;
  207. ll >>= BN_BITS2;
  208. a++;
  209. b++;
  210. r++;
  211. n--;
  212. }
  213. return (BN_ULONG)ll;
  214. }
  215. #else // !BN_ULLONG
  216. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  217. size_t n) {
  218. BN_ULONG c, l, t;
  219. if (n == 0) {
  220. return (BN_ULONG)0;
  221. }
  222. c = 0;
  223. while (n & ~3) {
  224. t = a[0];
  225. t += c;
  226. c = (t < c);
  227. l = t + b[0];
  228. c += (l < t);
  229. r[0] = l;
  230. t = a[1];
  231. t += c;
  232. c = (t < c);
  233. l = t + b[1];
  234. c += (l < t);
  235. r[1] = l;
  236. t = a[2];
  237. t += c;
  238. c = (t < c);
  239. l = t + b[2];
  240. c += (l < t);
  241. r[2] = l;
  242. t = a[3];
  243. t += c;
  244. c = (t < c);
  245. l = t + b[3];
  246. c += (l < t);
  247. r[3] = l;
  248. a += 4;
  249. b += 4;
  250. r += 4;
  251. n -= 4;
  252. }
  253. while (n) {
  254. t = a[0];
  255. t += c;
  256. c = (t < c);
  257. l = t + b[0];
  258. c += (l < t);
  259. r[0] = l;
  260. a++;
  261. b++;
  262. r++;
  263. n--;
  264. }
  265. return (BN_ULONG)c;
  266. }
  267. #endif // !BN_ULLONG
  268. BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  269. size_t n) {
  270. BN_ULONG t1, t2;
  271. int c = 0;
  272. if (n == 0) {
  273. return (BN_ULONG)0;
  274. }
  275. while (n & ~3) {
  276. t1 = a[0];
  277. t2 = b[0];
  278. r[0] = t1 - t2 - c;
  279. if (t1 != t2) {
  280. c = (t1 < t2);
  281. }
  282. t1 = a[1];
  283. t2 = b[1];
  284. r[1] = t1 - t2 - c;
  285. if (t1 != t2) {
  286. c = (t1 < t2);
  287. }
  288. t1 = a[2];
  289. t2 = b[2];
  290. r[2] = t1 - t2 - c;
  291. if (t1 != t2) {
  292. c = (t1 < t2);
  293. }
  294. t1 = a[3];
  295. t2 = b[3];
  296. r[3] = t1 - t2 - c;
  297. if (t1 != t2) {
  298. c = (t1 < t2);
  299. }
  300. a += 4;
  301. b += 4;
  302. r += 4;
  303. n -= 4;
  304. }
  305. while (n) {
  306. t1 = a[0];
  307. t2 = b[0];
  308. r[0] = t1 - t2 - c;
  309. if (t1 != t2) {
  310. c = (t1 < t2);
  311. }
  312. a++;
  313. b++;
  314. r++;
  315. n--;
  316. }
  317. return c;
  318. }
  319. // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
  320. // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
  321. // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
  322. // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
  323. #ifdef BN_ULLONG
  324. // Keep in mind that additions to multiplication result can not overflow,
  325. // because its high half cannot be all-ones.
  326. #define mul_add_c(a, b, c0, c1, c2) \
  327. do { \
  328. BN_ULONG hi; \
  329. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  330. t += (c0); /* no carry */ \
  331. (c0) = (BN_ULONG)Lw(t); \
  332. hi = (BN_ULONG)Hw(t); \
  333. (c1) += (hi); \
  334. if ((c1) < hi) { \
  335. (c2)++; \
  336. } \
  337. } while (0)
  338. #define mul_add_c2(a, b, c0, c1, c2) \
  339. do { \
  340. BN_ULONG hi; \
  341. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  342. BN_ULLONG tt = t + (c0); /* no carry */ \
  343. (c0) = (BN_ULONG)Lw(tt); \
  344. hi = (BN_ULONG)Hw(tt); \
  345. (c1) += hi; \
  346. if ((c1) < hi) { \
  347. (c2)++; \
  348. } \
  349. t += (c0); /* no carry */ \
  350. (c0) = (BN_ULONG)Lw(t); \
  351. hi = (BN_ULONG)Hw(t); \
  352. (c1) += hi; \
  353. if ((c1) < hi) { \
  354. (c2)++; \
  355. } \
  356. } while (0)
  357. #define sqr_add_c(a, i, c0, c1, c2) \
  358. do { \
  359. BN_ULONG hi; \
  360. BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
  361. t += (c0); /* no carry */ \
  362. (c0) = (BN_ULONG)Lw(t); \
  363. hi = (BN_ULONG)Hw(t); \
  364. (c1) += hi; \
  365. if ((c1) < hi) { \
  366. (c2)++; \
  367. } \
  368. } while (0)
  369. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  370. #else
  371. // Keep in mind that additions to hi can not overflow, because the high word of
  372. // a multiplication result cannot be all-ones.
  373. #define mul_add_c(a, b, c0, c1, c2) \
  374. do { \
  375. BN_ULONG ta = (a), tb = (b); \
  376. BN_ULONG lo, hi; \
  377. BN_UMULT_LOHI(lo, hi, ta, tb); \
  378. (c0) += lo; \
  379. hi += ((c0) < lo) ? 1 : 0; \
  380. (c1) += hi; \
  381. (c2) += ((c1) < hi) ? 1 : 0; \
  382. } while (0)
  383. #define mul_add_c2(a, b, c0, c1, c2) \
  384. do { \
  385. BN_ULONG ta = (a), tb = (b); \
  386. BN_ULONG lo, hi, tt; \
  387. BN_UMULT_LOHI(lo, hi, ta, tb); \
  388. (c0) += lo; \
  389. tt = hi + (((c0) < lo) ? 1 : 0); \
  390. (c1) += tt; \
  391. (c2) += ((c1) < tt) ? 1 : 0; \
  392. (c0) += lo; \
  393. hi += (c0 < lo) ? 1 : 0; \
  394. (c1) += hi; \
  395. (c2) += ((c1) < hi) ? 1 : 0; \
  396. } while (0)
  397. #define sqr_add_c(a, i, c0, c1, c2) \
  398. do { \
  399. BN_ULONG ta = (a)[i]; \
  400. BN_ULONG lo, hi; \
  401. BN_UMULT_LOHI(lo, hi, ta, ta); \
  402. (c0) += lo; \
  403. hi += (c0 < lo) ? 1 : 0; \
  404. (c1) += hi; \
  405. (c2) += ((c1) < hi) ? 1 : 0; \
  406. } while (0)
  407. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  408. #endif // !BN_ULLONG
  409. void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
  410. BN_ULONG c1, c2, c3;
  411. c1 = 0;
  412. c2 = 0;
  413. c3 = 0;
  414. mul_add_c(a[0], b[0], c1, c2, c3);
  415. r[0] = c1;
  416. c1 = 0;
  417. mul_add_c(a[0], b[1], c2, c3, c1);
  418. mul_add_c(a[1], b[0], c2, c3, c1);
  419. r[1] = c2;
  420. c2 = 0;
  421. mul_add_c(a[2], b[0], c3, c1, c2);
  422. mul_add_c(a[1], b[1], c3, c1, c2);
  423. mul_add_c(a[0], b[2], c3, c1, c2);
  424. r[2] = c3;
  425. c3 = 0;
  426. mul_add_c(a[0], b[3], c1, c2, c3);
  427. mul_add_c(a[1], b[2], c1, c2, c3);
  428. mul_add_c(a[2], b[1], c1, c2, c3);
  429. mul_add_c(a[3], b[0], c1, c2, c3);
  430. r[3] = c1;
  431. c1 = 0;
  432. mul_add_c(a[4], b[0], c2, c3, c1);
  433. mul_add_c(a[3], b[1], c2, c3, c1);
  434. mul_add_c(a[2], b[2], c2, c3, c1);
  435. mul_add_c(a[1], b[3], c2, c3, c1);
  436. mul_add_c(a[0], b[4], c2, c3, c1);
  437. r[4] = c2;
  438. c2 = 0;
  439. mul_add_c(a[0], b[5], c3, c1, c2);
  440. mul_add_c(a[1], b[4], c3, c1, c2);
  441. mul_add_c(a[2], b[3], c3, c1, c2);
  442. mul_add_c(a[3], b[2], c3, c1, c2);
  443. mul_add_c(a[4], b[1], c3, c1, c2);
  444. mul_add_c(a[5], b[0], c3, c1, c2);
  445. r[5] = c3;
  446. c3 = 0;
  447. mul_add_c(a[6], b[0], c1, c2, c3);
  448. mul_add_c(a[5], b[1], c1, c2, c3);
  449. mul_add_c(a[4], b[2], c1, c2, c3);
  450. mul_add_c(a[3], b[3], c1, c2, c3);
  451. mul_add_c(a[2], b[4], c1, c2, c3);
  452. mul_add_c(a[1], b[5], c1, c2, c3);
  453. mul_add_c(a[0], b[6], c1, c2, c3);
  454. r[6] = c1;
  455. c1 = 0;
  456. mul_add_c(a[0], b[7], c2, c3, c1);
  457. mul_add_c(a[1], b[6], c2, c3, c1);
  458. mul_add_c(a[2], b[5], c2, c3, c1);
  459. mul_add_c(a[3], b[4], c2, c3, c1);
  460. mul_add_c(a[4], b[3], c2, c3, c1);
  461. mul_add_c(a[5], b[2], c2, c3, c1);
  462. mul_add_c(a[6], b[1], c2, c3, c1);
  463. mul_add_c(a[7], b[0], c2, c3, c1);
  464. r[7] = c2;
  465. c2 = 0;
  466. mul_add_c(a[7], b[1], c3, c1, c2);
  467. mul_add_c(a[6], b[2], c3, c1, c2);
  468. mul_add_c(a[5], b[3], c3, c1, c2);
  469. mul_add_c(a[4], b[4], c3, c1, c2);
  470. mul_add_c(a[3], b[5], c3, c1, c2);
  471. mul_add_c(a[2], b[6], c3, c1, c2);
  472. mul_add_c(a[1], b[7], c3, c1, c2);
  473. r[8] = c3;
  474. c3 = 0;
  475. mul_add_c(a[2], b[7], c1, c2, c3);
  476. mul_add_c(a[3], b[6], c1, c2, c3);
  477. mul_add_c(a[4], b[5], c1, c2, c3);
  478. mul_add_c(a[5], b[4], c1, c2, c3);
  479. mul_add_c(a[6], b[3], c1, c2, c3);
  480. mul_add_c(a[7], b[2], c1, c2, c3);
  481. r[9] = c1;
  482. c1 = 0;
  483. mul_add_c(a[7], b[3], c2, c3, c1);
  484. mul_add_c(a[6], b[4], c2, c3, c1);
  485. mul_add_c(a[5], b[5], c2, c3, c1);
  486. mul_add_c(a[4], b[6], c2, c3, c1);
  487. mul_add_c(a[3], b[7], c2, c3, c1);
  488. r[10] = c2;
  489. c2 = 0;
  490. mul_add_c(a[4], b[7], c3, c1, c2);
  491. mul_add_c(a[5], b[6], c3, c1, c2);
  492. mul_add_c(a[6], b[5], c3, c1, c2);
  493. mul_add_c(a[7], b[4], c3, c1, c2);
  494. r[11] = c3;
  495. c3 = 0;
  496. mul_add_c(a[7], b[5], c1, c2, c3);
  497. mul_add_c(a[6], b[6], c1, c2, c3);
  498. mul_add_c(a[5], b[7], c1, c2, c3);
  499. r[12] = c1;
  500. c1 = 0;
  501. mul_add_c(a[6], b[7], c2, c3, c1);
  502. mul_add_c(a[7], b[6], c2, c3, c1);
  503. r[13] = c2;
  504. c2 = 0;
  505. mul_add_c(a[7], b[7], c3, c1, c2);
  506. r[14] = c3;
  507. r[15] = c1;
  508. }
  509. void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
  510. BN_ULONG c1, c2, c3;
  511. c1 = 0;
  512. c2 = 0;
  513. c3 = 0;
  514. mul_add_c(a[0], b[0], c1, c2, c3);
  515. r[0] = c1;
  516. c1 = 0;
  517. mul_add_c(a[0], b[1], c2, c3, c1);
  518. mul_add_c(a[1], b[0], c2, c3, c1);
  519. r[1] = c2;
  520. c2 = 0;
  521. mul_add_c(a[2], b[0], c3, c1, c2);
  522. mul_add_c(a[1], b[1], c3, c1, c2);
  523. mul_add_c(a[0], b[2], c3, c1, c2);
  524. r[2] = c3;
  525. c3 = 0;
  526. mul_add_c(a[0], b[3], c1, c2, c3);
  527. mul_add_c(a[1], b[2], c1, c2, c3);
  528. mul_add_c(a[2], b[1], c1, c2, c3);
  529. mul_add_c(a[3], b[0], c1, c2, c3);
  530. r[3] = c1;
  531. c1 = 0;
  532. mul_add_c(a[3], b[1], c2, c3, c1);
  533. mul_add_c(a[2], b[2], c2, c3, c1);
  534. mul_add_c(a[1], b[3], c2, c3, c1);
  535. r[4] = c2;
  536. c2 = 0;
  537. mul_add_c(a[2], b[3], c3, c1, c2);
  538. mul_add_c(a[3], b[2], c3, c1, c2);
  539. r[5] = c3;
  540. c3 = 0;
  541. mul_add_c(a[3], b[3], c1, c2, c3);
  542. r[6] = c1;
  543. r[7] = c2;
  544. }
  545. void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
  546. BN_ULONG c1, c2, c3;
  547. c1 = 0;
  548. c2 = 0;
  549. c3 = 0;
  550. sqr_add_c(a, 0, c1, c2, c3);
  551. r[0] = c1;
  552. c1 = 0;
  553. sqr_add_c2(a, 1, 0, c2, c3, c1);
  554. r[1] = c2;
  555. c2 = 0;
  556. sqr_add_c(a, 1, c3, c1, c2);
  557. sqr_add_c2(a, 2, 0, c3, c1, c2);
  558. r[2] = c3;
  559. c3 = 0;
  560. sqr_add_c2(a, 3, 0, c1, c2, c3);
  561. sqr_add_c2(a, 2, 1, c1, c2, c3);
  562. r[3] = c1;
  563. c1 = 0;
  564. sqr_add_c(a, 2, c2, c3, c1);
  565. sqr_add_c2(a, 3, 1, c2, c3, c1);
  566. sqr_add_c2(a, 4, 0, c2, c3, c1);
  567. r[4] = c2;
  568. c2 = 0;
  569. sqr_add_c2(a, 5, 0, c3, c1, c2);
  570. sqr_add_c2(a, 4, 1, c3, c1, c2);
  571. sqr_add_c2(a, 3, 2, c3, c1, c2);
  572. r[5] = c3;
  573. c3 = 0;
  574. sqr_add_c(a, 3, c1, c2, c3);
  575. sqr_add_c2(a, 4, 2, c1, c2, c3);
  576. sqr_add_c2(a, 5, 1, c1, c2, c3);
  577. sqr_add_c2(a, 6, 0, c1, c2, c3);
  578. r[6] = c1;
  579. c1 = 0;
  580. sqr_add_c2(a, 7, 0, c2, c3, c1);
  581. sqr_add_c2(a, 6, 1, c2, c3, c1);
  582. sqr_add_c2(a, 5, 2, c2, c3, c1);
  583. sqr_add_c2(a, 4, 3, c2, c3, c1);
  584. r[7] = c2;
  585. c2 = 0;
  586. sqr_add_c(a, 4, c3, c1, c2);
  587. sqr_add_c2(a, 5, 3, c3, c1, c2);
  588. sqr_add_c2(a, 6, 2, c3, c1, c2);
  589. sqr_add_c2(a, 7, 1, c3, c1, c2);
  590. r[8] = c3;
  591. c3 = 0;
  592. sqr_add_c2(a, 7, 2, c1, c2, c3);
  593. sqr_add_c2(a, 6, 3, c1, c2, c3);
  594. sqr_add_c2(a, 5, 4, c1, c2, c3);
  595. r[9] = c1;
  596. c1 = 0;
  597. sqr_add_c(a, 5, c2, c3, c1);
  598. sqr_add_c2(a, 6, 4, c2, c3, c1);
  599. sqr_add_c2(a, 7, 3, c2, c3, c1);
  600. r[10] = c2;
  601. c2 = 0;
  602. sqr_add_c2(a, 7, 4, c3, c1, c2);
  603. sqr_add_c2(a, 6, 5, c3, c1, c2);
  604. r[11] = c3;
  605. c3 = 0;
  606. sqr_add_c(a, 6, c1, c2, c3);
  607. sqr_add_c2(a, 7, 5, c1, c2, c3);
  608. r[12] = c1;
  609. c1 = 0;
  610. sqr_add_c2(a, 7, 6, c2, c3, c1);
  611. r[13] = c2;
  612. c2 = 0;
  613. sqr_add_c(a, 7, c3, c1, c2);
  614. r[14] = c3;
  615. r[15] = c1;
  616. }
  617. void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
  618. BN_ULONG c1, c2, c3;
  619. c1 = 0;
  620. c2 = 0;
  621. c3 = 0;
  622. sqr_add_c(a, 0, c1, c2, c3);
  623. r[0] = c1;
  624. c1 = 0;
  625. sqr_add_c2(a, 1, 0, c2, c3, c1);
  626. r[1] = c2;
  627. c2 = 0;
  628. sqr_add_c(a, 1, c3, c1, c2);
  629. sqr_add_c2(a, 2, 0, c3, c1, c2);
  630. r[2] = c3;
  631. c3 = 0;
  632. sqr_add_c2(a, 3, 0, c1, c2, c3);
  633. sqr_add_c2(a, 2, 1, c1, c2, c3);
  634. r[3] = c1;
  635. c1 = 0;
  636. sqr_add_c(a, 2, c2, c3, c1);
  637. sqr_add_c2(a, 3, 1, c2, c3, c1);
  638. r[4] = c2;
  639. c2 = 0;
  640. sqr_add_c2(a, 3, 2, c3, c1, c2);
  641. r[5] = c3;
  642. c3 = 0;
  643. sqr_add_c(a, 3, c1, c2, c3);
  644. r[6] = c1;
  645. r[7] = c2;
  646. }
  647. #undef mul_add
  648. #undef mul
  649. #undef sqr
  650. #undef mul_add_c
  651. #undef mul_add_c2
  652. #undef sqr_add_c
  653. #undef sqr_add_c2
  654. #endif