generic.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.] */
  56. #include <openssl/bn.h>
  57. #include <assert.h>
  58. #include "internal.h"
  59. /* This file has two other implementations: x86 assembly language in
  60. * asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. */
  61. #if defined(OPENSSL_NO_ASM) || \
  62. !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
  63. #ifdef BN_ULLONG
  64. #define mul_add(r, a, w, c) \
  65. { \
  66. BN_ULLONG t; \
  67. t = (BN_ULLONG)w * (a) + (r) + (c); \
  68. (r) = Lw(t); \
  69. (c) = Hw(t); \
  70. }
  71. #define mul(r, a, w, c) \
  72. { \
  73. BN_ULLONG t; \
  74. t = (BN_ULLONG)w * (a) + (c); \
  75. (r) = Lw(t); \
  76. (c) = Hw(t); \
  77. }
  78. #define sqr(r0, r1, a) \
  79. { \
  80. BN_ULLONG t; \
  81. t = (BN_ULLONG)(a) * (a); \
  82. (r0) = Lw(t); \
  83. (r1) = Hw(t); \
  84. }
  85. #else
  86. #define mul_add(r, a, w, c) \
  87. { \
  88. BN_ULONG high, low, ret, tmp = (a); \
  89. ret = (r); \
  90. BN_UMULT_LOHI(low, high, w, tmp); \
  91. ret += (c); \
  92. (c) = (ret < (c)) ? 1 : 0; \
  93. (c) += high; \
  94. ret += low; \
  95. (c) += (ret < low) ? 1 : 0; \
  96. (r) = ret; \
  97. }
  98. #define mul(r, a, w, c) \
  99. { \
  100. BN_ULONG high, low, ret, ta = (a); \
  101. BN_UMULT_LOHI(low, high, w, ta); \
  102. ret = low + (c); \
  103. (c) = high; \
  104. (c) += (ret < low) ? 1 : 0; \
  105. (r) = ret; \
  106. }
  107. #define sqr(r0, r1, a) \
  108. { \
  109. BN_ULONG tmp = (a); \
  110. BN_UMULT_LOHI(r0, r1, tmp, tmp); \
  111. }
  112. #endif /* !BN_ULLONG */
  113. BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
  114. BN_ULONG w) {
  115. BN_ULONG c1 = 0;
  116. assert(num >= 0);
  117. if (num <= 0) {
  118. return c1;
  119. }
  120. while (num & ~3) {
  121. mul_add(rp[0], ap[0], w, c1);
  122. mul_add(rp[1], ap[1], w, c1);
  123. mul_add(rp[2], ap[2], w, c1);
  124. mul_add(rp[3], ap[3], w, c1);
  125. ap += 4;
  126. rp += 4;
  127. num -= 4;
  128. }
  129. while (num) {
  130. mul_add(rp[0], ap[0], w, c1);
  131. ap++;
  132. rp++;
  133. num--;
  134. }
  135. return c1;
  136. }
  137. BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) {
  138. BN_ULONG c1 = 0;
  139. assert(num >= 0);
  140. if (num <= 0) {
  141. return c1;
  142. }
  143. while (num & ~3) {
  144. mul(rp[0], ap[0], w, c1);
  145. mul(rp[1], ap[1], w, c1);
  146. mul(rp[2], ap[2], w, c1);
  147. mul(rp[3], ap[3], w, c1);
  148. ap += 4;
  149. rp += 4;
  150. num -= 4;
  151. }
  152. while (num) {
  153. mul(rp[0], ap[0], w, c1);
  154. ap++;
  155. rp++;
  156. num--;
  157. }
  158. return c1;
  159. }
  160. void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) {
  161. assert(n >= 0);
  162. if (n <= 0) {
  163. return;
  164. }
  165. while (n & ~3) {
  166. sqr(r[0], r[1], a[0]);
  167. sqr(r[2], r[3], a[1]);
  168. sqr(r[4], r[5], a[2]);
  169. sqr(r[6], r[7], a[3]);
  170. a += 4;
  171. r += 8;
  172. n -= 4;
  173. }
  174. while (n) {
  175. sqr(r[0], r[1], a[0]);
  176. a++;
  177. r += 2;
  178. n--;
  179. }
  180. }
  181. #if defined(BN_ULLONG)
  182. BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
  183. return (BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2) | l) / (BN_ULLONG)d);
  184. }
  185. #else
  186. /* Divide h,l by d and return the result. */
  187. BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
  188. BN_ULONG dh, dl, q, ret = 0, th, tl, t;
  189. int i, count = 2;
  190. if (d == 0) {
  191. return BN_MASK2;
  192. }
  193. i = BN_num_bits_word(d);
  194. assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
  195. i = BN_BITS2 - i;
  196. if (h >= d) {
  197. h -= d;
  198. }
  199. if (i) {
  200. d <<= i;
  201. h = (h << i) | (l >> (BN_BITS2 - i));
  202. l <<= i;
  203. }
  204. dh = (d & BN_MASK2h) >> BN_BITS4;
  205. dl = (d & BN_MASK2l);
  206. for (;;) {
  207. if ((h >> BN_BITS4) == dh) {
  208. q = BN_MASK2l;
  209. } else {
  210. q = h / dh;
  211. }
  212. th = q * dh;
  213. tl = dl * q;
  214. for (;;) {
  215. t = h - th;
  216. if ((t & BN_MASK2h) ||
  217. ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) {
  218. break;
  219. }
  220. q--;
  221. th -= dh;
  222. tl -= dl;
  223. }
  224. t = (tl >> BN_BITS4);
  225. tl = (tl << BN_BITS4) & BN_MASK2h;
  226. th += t;
  227. if (l < tl) {
  228. th++;
  229. }
  230. l -= tl;
  231. if (h < th) {
  232. h += d;
  233. q--;
  234. }
  235. h -= th;
  236. if (--count == 0) {
  237. break;
  238. }
  239. ret = q << BN_BITS4;
  240. h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
  241. l = (l & BN_MASK2l) << BN_BITS4;
  242. }
  243. ret |= q;
  244. return ret;
  245. }
  246. #endif /* !defined(BN_ULLONG) */
  247. #ifdef BN_ULLONG
  248. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  249. int n) {
  250. BN_ULLONG ll = 0;
  251. assert(n >= 0);
  252. if (n <= 0) {
  253. return (BN_ULONG)0;
  254. }
  255. while (n & ~3) {
  256. ll += (BN_ULLONG)a[0] + b[0];
  257. r[0] = (BN_ULONG)ll & BN_MASK2;
  258. ll >>= BN_BITS2;
  259. ll += (BN_ULLONG)a[1] + b[1];
  260. r[1] = (BN_ULONG)ll & BN_MASK2;
  261. ll >>= BN_BITS2;
  262. ll += (BN_ULLONG)a[2] + b[2];
  263. r[2] = (BN_ULONG)ll & BN_MASK2;
  264. ll >>= BN_BITS2;
  265. ll += (BN_ULLONG)a[3] + b[3];
  266. r[3] = (BN_ULONG)ll & BN_MASK2;
  267. ll >>= BN_BITS2;
  268. a += 4;
  269. b += 4;
  270. r += 4;
  271. n -= 4;
  272. }
  273. while (n) {
  274. ll += (BN_ULLONG)a[0] + b[0];
  275. r[0] = (BN_ULONG)ll & BN_MASK2;
  276. ll >>= BN_BITS2;
  277. a++;
  278. b++;
  279. r++;
  280. n--;
  281. }
  282. return (BN_ULONG)ll;
  283. }
  284. #else /* !BN_ULLONG */
  285. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  286. int n) {
  287. BN_ULONG c, l, t;
  288. assert(n >= 0);
  289. if (n <= 0) {
  290. return (BN_ULONG)0;
  291. }
  292. c = 0;
  293. while (n & ~3) {
  294. t = a[0];
  295. t = (t + c) & BN_MASK2;
  296. c = (t < c);
  297. l = (t + b[0]) & BN_MASK2;
  298. c += (l < t);
  299. r[0] = l;
  300. t = a[1];
  301. t = (t + c) & BN_MASK2;
  302. c = (t < c);
  303. l = (t + b[1]) & BN_MASK2;
  304. c += (l < t);
  305. r[1] = l;
  306. t = a[2];
  307. t = (t + c) & BN_MASK2;
  308. c = (t < c);
  309. l = (t + b[2]) & BN_MASK2;
  310. c += (l < t);
  311. r[2] = l;
  312. t = a[3];
  313. t = (t + c) & BN_MASK2;
  314. c = (t < c);
  315. l = (t + b[3]) & BN_MASK2;
  316. c += (l < t);
  317. r[3] = l;
  318. a += 4;
  319. b += 4;
  320. r += 4;
  321. n -= 4;
  322. }
  323. while (n) {
  324. t = a[0];
  325. t = (t + c) & BN_MASK2;
  326. c = (t < c);
  327. l = (t + b[0]) & BN_MASK2;
  328. c += (l < t);
  329. r[0] = l;
  330. a++;
  331. b++;
  332. r++;
  333. n--;
  334. }
  335. return (BN_ULONG)c;
  336. }
  337. #endif /* !BN_ULLONG */
  338. BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  339. int n) {
  340. BN_ULONG t1, t2;
  341. int c = 0;
  342. assert(n >= 0);
  343. if (n <= 0) {
  344. return (BN_ULONG)0;
  345. }
  346. while (n & ~3) {
  347. t1 = a[0];
  348. t2 = b[0];
  349. r[0] = (t1 - t2 - c) & BN_MASK2;
  350. if (t1 != t2) {
  351. c = (t1 < t2);
  352. }
  353. t1 = a[1];
  354. t2 = b[1];
  355. r[1] = (t1 - t2 - c) & BN_MASK2;
  356. if (t1 != t2) {
  357. c = (t1 < t2);
  358. }
  359. t1 = a[2];
  360. t2 = b[2];
  361. r[2] = (t1 - t2 - c) & BN_MASK2;
  362. if (t1 != t2) {
  363. c = (t1 < t2);
  364. }
  365. t1 = a[3];
  366. t2 = b[3];
  367. r[3] = (t1 - t2 - c) & BN_MASK2;
  368. if (t1 != t2) {
  369. c = (t1 < t2);
  370. }
  371. a += 4;
  372. b += 4;
  373. r += 4;
  374. n -= 4;
  375. }
  376. while (n) {
  377. t1 = a[0];
  378. t2 = b[0];
  379. r[0] = (t1 - t2 - c) & BN_MASK2;
  380. if (t1 != t2) {
  381. c = (t1 < t2);
  382. }
  383. a++;
  384. b++;
  385. r++;
  386. n--;
  387. }
  388. return c;
  389. }
  390. /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
  391. /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
  392. /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
  393. /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
  394. #ifdef BN_ULLONG
  395. /* Keep in mind that additions to multiplication result can not overflow,
  396. * because its high half cannot be all-ones. */
  397. #define mul_add_c(a, b, c0, c1, c2) \
  398. do { \
  399. BN_ULONG hi; \
  400. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  401. t += c0; /* no carry */ \
  402. c0 = (BN_ULONG)Lw(t); \
  403. hi = (BN_ULONG)Hw(t); \
  404. c1 = (c1 + hi) & BN_MASK2; \
  405. if (c1 < hi) \
  406. c2++; \
  407. } while (0)
  408. #define mul_add_c2(a, b, c0, c1, c2) \
  409. do { \
  410. BN_ULONG hi; \
  411. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  412. BN_ULLONG tt = t + c0; /* no carry */ \
  413. c0 = (BN_ULONG)Lw(tt); \
  414. hi = (BN_ULONG)Hw(tt); \
  415. c1 = (c1 + hi) & BN_MASK2; \
  416. if (c1 < hi) \
  417. c2++; \
  418. t += c0; /* no carry */ \
  419. c0 = (BN_ULONG)Lw(t); \
  420. hi = (BN_ULONG)Hw(t); \
  421. c1 = (c1 + hi) & BN_MASK2; \
  422. if (c1 < hi) \
  423. c2++; \
  424. } while (0)
  425. #define sqr_add_c(a, i, c0, c1, c2) \
  426. do { \
  427. BN_ULONG hi; \
  428. BN_ULLONG t = (BN_ULLONG)a[i] * a[i]; \
  429. t += c0; /* no carry */ \
  430. c0 = (BN_ULONG)Lw(t); \
  431. hi = (BN_ULONG)Hw(t); \
  432. c1 = (c1 + hi) & BN_MASK2; \
  433. if (c1 < hi) \
  434. c2++; \
  435. } while (0)
  436. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  437. #else
  438. /* Keep in mind that additions to hi can not overflow, because the high word of
  439. * a multiplication result cannot be all-ones. */
  440. #define mul_add_c(a, b, c0, c1, c2) \
  441. do { \
  442. BN_ULONG ta = (a), tb = (b); \
  443. BN_ULONG lo, hi; \
  444. BN_UMULT_LOHI(lo, hi, ta, tb); \
  445. c0 += lo; \
  446. hi += (c0 < lo) ? 1 : 0; \
  447. c1 += hi; \
  448. c2 += (c1 < hi) ? 1 : 0; \
  449. } while (0)
  450. #define mul_add_c2(a, b, c0, c1, c2) \
  451. do { \
  452. BN_ULONG ta = (a), tb = (b); \
  453. BN_ULONG lo, hi, tt; \
  454. BN_UMULT_LOHI(lo, hi, ta, tb); \
  455. c0 += lo; \
  456. tt = hi + ((c0 < lo) ? 1 : 0); \
  457. c1 += tt; \
  458. c2 += (c1 < tt) ? 1 : 0; \
  459. c0 += lo; \
  460. hi += (c0 < lo) ? 1 : 0; \
  461. c1 += hi; \
  462. c2 += (c1 < hi) ? 1 : 0; \
  463. } while (0)
  464. #define sqr_add_c(a, i, c0, c1, c2) \
  465. do { \
  466. BN_ULONG ta = (a)[i]; \
  467. BN_ULONG lo, hi; \
  468. BN_UMULT_LOHI(lo, hi, ta, ta); \
  469. c0 += lo; \
  470. hi += (c0 < lo) ? 1 : 0; \
  471. c1 += hi; \
  472. c2 += (c1 < hi) ? 1 : 0; \
  473. } while (0)
  474. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  475. #endif /* !BN_ULLONG */
  476. void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
  477. BN_ULONG c1, c2, c3;
  478. c1 = 0;
  479. c2 = 0;
  480. c3 = 0;
  481. mul_add_c(a[0], b[0], c1, c2, c3);
  482. r[0] = c1;
  483. c1 = 0;
  484. mul_add_c(a[0], b[1], c2, c3, c1);
  485. mul_add_c(a[1], b[0], c2, c3, c1);
  486. r[1] = c2;
  487. c2 = 0;
  488. mul_add_c(a[2], b[0], c3, c1, c2);
  489. mul_add_c(a[1], b[1], c3, c1, c2);
  490. mul_add_c(a[0], b[2], c3, c1, c2);
  491. r[2] = c3;
  492. c3 = 0;
  493. mul_add_c(a[0], b[3], c1, c2, c3);
  494. mul_add_c(a[1], b[2], c1, c2, c3);
  495. mul_add_c(a[2], b[1], c1, c2, c3);
  496. mul_add_c(a[3], b[0], c1, c2, c3);
  497. r[3] = c1;
  498. c1 = 0;
  499. mul_add_c(a[4], b[0], c2, c3, c1);
  500. mul_add_c(a[3], b[1], c2, c3, c1);
  501. mul_add_c(a[2], b[2], c2, c3, c1);
  502. mul_add_c(a[1], b[3], c2, c3, c1);
  503. mul_add_c(a[0], b[4], c2, c3, c1);
  504. r[4] = c2;
  505. c2 = 0;
  506. mul_add_c(a[0], b[5], c3, c1, c2);
  507. mul_add_c(a[1], b[4], c3, c1, c2);
  508. mul_add_c(a[2], b[3], c3, c1, c2);
  509. mul_add_c(a[3], b[2], c3, c1, c2);
  510. mul_add_c(a[4], b[1], c3, c1, c2);
  511. mul_add_c(a[5], b[0], c3, c1, c2);
  512. r[5] = c3;
  513. c3 = 0;
  514. mul_add_c(a[6], b[0], c1, c2, c3);
  515. mul_add_c(a[5], b[1], c1, c2, c3);
  516. mul_add_c(a[4], b[2], c1, c2, c3);
  517. mul_add_c(a[3], b[3], c1, c2, c3);
  518. mul_add_c(a[2], b[4], c1, c2, c3);
  519. mul_add_c(a[1], b[5], c1, c2, c3);
  520. mul_add_c(a[0], b[6], c1, c2, c3);
  521. r[6] = c1;
  522. c1 = 0;
  523. mul_add_c(a[0], b[7], c2, c3, c1);
  524. mul_add_c(a[1], b[6], c2, c3, c1);
  525. mul_add_c(a[2], b[5], c2, c3, c1);
  526. mul_add_c(a[3], b[4], c2, c3, c1);
  527. mul_add_c(a[4], b[3], c2, c3, c1);
  528. mul_add_c(a[5], b[2], c2, c3, c1);
  529. mul_add_c(a[6], b[1], c2, c3, c1);
  530. mul_add_c(a[7], b[0], c2, c3, c1);
  531. r[7] = c2;
  532. c2 = 0;
  533. mul_add_c(a[7], b[1], c3, c1, c2);
  534. mul_add_c(a[6], b[2], c3, c1, c2);
  535. mul_add_c(a[5], b[3], c3, c1, c2);
  536. mul_add_c(a[4], b[4], c3, c1, c2);
  537. mul_add_c(a[3], b[5], c3, c1, c2);
  538. mul_add_c(a[2], b[6], c3, c1, c2);
  539. mul_add_c(a[1], b[7], c3, c1, c2);
  540. r[8] = c3;
  541. c3 = 0;
  542. mul_add_c(a[2], b[7], c1, c2, c3);
  543. mul_add_c(a[3], b[6], c1, c2, c3);
  544. mul_add_c(a[4], b[5], c1, c2, c3);
  545. mul_add_c(a[5], b[4], c1, c2, c3);
  546. mul_add_c(a[6], b[3], c1, c2, c3);
  547. mul_add_c(a[7], b[2], c1, c2, c3);
  548. r[9] = c1;
  549. c1 = 0;
  550. mul_add_c(a[7], b[3], c2, c3, c1);
  551. mul_add_c(a[6], b[4], c2, c3, c1);
  552. mul_add_c(a[5], b[5], c2, c3, c1);
  553. mul_add_c(a[4], b[6], c2, c3, c1);
  554. mul_add_c(a[3], b[7], c2, c3, c1);
  555. r[10] = c2;
  556. c2 = 0;
  557. mul_add_c(a[4], b[7], c3, c1, c2);
  558. mul_add_c(a[5], b[6], c3, c1, c2);
  559. mul_add_c(a[6], b[5], c3, c1, c2);
  560. mul_add_c(a[7], b[4], c3, c1, c2);
  561. r[11] = c3;
  562. c3 = 0;
  563. mul_add_c(a[7], b[5], c1, c2, c3);
  564. mul_add_c(a[6], b[6], c1, c2, c3);
  565. mul_add_c(a[5], b[7], c1, c2, c3);
  566. r[12] = c1;
  567. c1 = 0;
  568. mul_add_c(a[6], b[7], c2, c3, c1);
  569. mul_add_c(a[7], b[6], c2, c3, c1);
  570. r[13] = c2;
  571. c2 = 0;
  572. mul_add_c(a[7], b[7], c3, c1, c2);
  573. r[14] = c3;
  574. r[15] = c1;
  575. }
  576. void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
  577. BN_ULONG c1, c2, c3;
  578. c1 = 0;
  579. c2 = 0;
  580. c3 = 0;
  581. mul_add_c(a[0], b[0], c1, c2, c3);
  582. r[0] = c1;
  583. c1 = 0;
  584. mul_add_c(a[0], b[1], c2, c3, c1);
  585. mul_add_c(a[1], b[0], c2, c3, c1);
  586. r[1] = c2;
  587. c2 = 0;
  588. mul_add_c(a[2], b[0], c3, c1, c2);
  589. mul_add_c(a[1], b[1], c3, c1, c2);
  590. mul_add_c(a[0], b[2], c3, c1, c2);
  591. r[2] = c3;
  592. c3 = 0;
  593. mul_add_c(a[0], b[3], c1, c2, c3);
  594. mul_add_c(a[1], b[2], c1, c2, c3);
  595. mul_add_c(a[2], b[1], c1, c2, c3);
  596. mul_add_c(a[3], b[0], c1, c2, c3);
  597. r[3] = c1;
  598. c1 = 0;
  599. mul_add_c(a[3], b[1], c2, c3, c1);
  600. mul_add_c(a[2], b[2], c2, c3, c1);
  601. mul_add_c(a[1], b[3], c2, c3, c1);
  602. r[4] = c2;
  603. c2 = 0;
  604. mul_add_c(a[2], b[3], c3, c1, c2);
  605. mul_add_c(a[3], b[2], c3, c1, c2);
  606. r[5] = c3;
  607. c3 = 0;
  608. mul_add_c(a[3], b[3], c1, c2, c3);
  609. r[6] = c1;
  610. r[7] = c2;
  611. }
  612. void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) {
  613. BN_ULONG c1, c2, c3;
  614. c1 = 0;
  615. c2 = 0;
  616. c3 = 0;
  617. sqr_add_c(a, 0, c1, c2, c3);
  618. r[0] = c1;
  619. c1 = 0;
  620. sqr_add_c2(a, 1, 0, c2, c3, c1);
  621. r[1] = c2;
  622. c2 = 0;
  623. sqr_add_c(a, 1, c3, c1, c2);
  624. sqr_add_c2(a, 2, 0, c3, c1, c2);
  625. r[2] = c3;
  626. c3 = 0;
  627. sqr_add_c2(a, 3, 0, c1, c2, c3);
  628. sqr_add_c2(a, 2, 1, c1, c2, c3);
  629. r[3] = c1;
  630. c1 = 0;
  631. sqr_add_c(a, 2, c2, c3, c1);
  632. sqr_add_c2(a, 3, 1, c2, c3, c1);
  633. sqr_add_c2(a, 4, 0, c2, c3, c1);
  634. r[4] = c2;
  635. c2 = 0;
  636. sqr_add_c2(a, 5, 0, c3, c1, c2);
  637. sqr_add_c2(a, 4, 1, c3, c1, c2);
  638. sqr_add_c2(a, 3, 2, c3, c1, c2);
  639. r[5] = c3;
  640. c3 = 0;
  641. sqr_add_c(a, 3, c1, c2, c3);
  642. sqr_add_c2(a, 4, 2, c1, c2, c3);
  643. sqr_add_c2(a, 5, 1, c1, c2, c3);
  644. sqr_add_c2(a, 6, 0, c1, c2, c3);
  645. r[6] = c1;
  646. c1 = 0;
  647. sqr_add_c2(a, 7, 0, c2, c3, c1);
  648. sqr_add_c2(a, 6, 1, c2, c3, c1);
  649. sqr_add_c2(a, 5, 2, c2, c3, c1);
  650. sqr_add_c2(a, 4, 3, c2, c3, c1);
  651. r[7] = c2;
  652. c2 = 0;
  653. sqr_add_c(a, 4, c3, c1, c2);
  654. sqr_add_c2(a, 5, 3, c3, c1, c2);
  655. sqr_add_c2(a, 6, 2, c3, c1, c2);
  656. sqr_add_c2(a, 7, 1, c3, c1, c2);
  657. r[8] = c3;
  658. c3 = 0;
  659. sqr_add_c2(a, 7, 2, c1, c2, c3);
  660. sqr_add_c2(a, 6, 3, c1, c2, c3);
  661. sqr_add_c2(a, 5, 4, c1, c2, c3);
  662. r[9] = c1;
  663. c1 = 0;
  664. sqr_add_c(a, 5, c2, c3, c1);
  665. sqr_add_c2(a, 6, 4, c2, c3, c1);
  666. sqr_add_c2(a, 7, 3, c2, c3, c1);
  667. r[10] = c2;
  668. c2 = 0;
  669. sqr_add_c2(a, 7, 4, c3, c1, c2);
  670. sqr_add_c2(a, 6, 5, c3, c1, c2);
  671. r[11] = c3;
  672. c3 = 0;
  673. sqr_add_c(a, 6, c1, c2, c3);
  674. sqr_add_c2(a, 7, 5, c1, c2, c3);
  675. r[12] = c1;
  676. c1 = 0;
  677. sqr_add_c2(a, 7, 6, c2, c3, c1);
  678. r[13] = c2;
  679. c2 = 0;
  680. sqr_add_c(a, 7, c3, c1, c2);
  681. r[14] = c3;
  682. r[15] = c1;
  683. }
  684. void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
  685. BN_ULONG c1, c2, c3;
  686. c1 = 0;
  687. c2 = 0;
  688. c3 = 0;
  689. sqr_add_c(a, 0, c1, c2, c3);
  690. r[0] = c1;
  691. c1 = 0;
  692. sqr_add_c2(a, 1, 0, c2, c3, c1);
  693. r[1] = c2;
  694. c2 = 0;
  695. sqr_add_c(a, 1, c3, c1, c2);
  696. sqr_add_c2(a, 2, 0, c3, c1, c2);
  697. r[2] = c3;
  698. c3 = 0;
  699. sqr_add_c2(a, 3, 0, c1, c2, c3);
  700. sqr_add_c2(a, 2, 1, c1, c2, c3);
  701. r[3] = c1;
  702. c1 = 0;
  703. sqr_add_c(a, 2, c2, c3, c1);
  704. sqr_add_c2(a, 3, 1, c2, c3, c1);
  705. r[4] = c2;
  706. c2 = 0;
  707. sqr_add_c2(a, 3, 2, c3, c1, c2);
  708. r[5] = c3;
  709. c3 = 0;
  710. sqr_add_c(a, 3, c1, c2, c3);
  711. r[6] = c1;
  712. r[7] = c2;
  713. }
  714. #endif