exponentiation.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <string.h>
  111. #include <openssl/cpu.h>
  112. #include <openssl/err.h>
  113. #include <openssl/mem.h>
  114. #include "internal.h"
  115. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  116. #define OPENSSL_BN_ASM_MONT5
  117. #define RSAZ_ENABLED
  118. #include "rsaz_exp.h"
  119. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  120. const BN_ULONG *np, const BN_ULONG *n0, int num,
  121. int power);
  122. void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
  123. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  124. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  125. const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
  126. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  127. const BN_ULONG *not_used, const BN_ULONG *np,
  128. const BN_ULONG *n0, int num);
  129. #endif
  130. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  131. int i, bits, ret = 0;
  132. BIGNUM *v, *rr;
  133. BN_CTX_start(ctx);
  134. if (r == a || r == p) {
  135. rr = BN_CTX_get(ctx);
  136. } else {
  137. rr = r;
  138. }
  139. v = BN_CTX_get(ctx);
  140. if (rr == NULL || v == NULL) {
  141. goto err;
  142. }
  143. if (BN_copy(v, a) == NULL) {
  144. goto err;
  145. }
  146. bits = BN_num_bits(p);
  147. if (BN_is_odd(p)) {
  148. if (BN_copy(rr, a) == NULL) {
  149. goto err;
  150. }
  151. } else {
  152. if (!BN_one(rr)) {
  153. goto err;
  154. }
  155. }
  156. for (i = 1; i < bits; i++) {
  157. if (!BN_sqr(v, v, ctx)) {
  158. goto err;
  159. }
  160. if (BN_is_bit_set(p, i)) {
  161. if (!BN_mul(rr, rr, v, ctx)) {
  162. goto err;
  163. }
  164. }
  165. }
  166. if (r != rr && !BN_copy(r, rr)) {
  167. goto err;
  168. }
  169. ret = 1;
  170. err:
  171. BN_CTX_end(ctx);
  172. return ret;
  173. }
  174. /* maximum precomputation table size for *variable* sliding windows */
  175. #define TABLE_SIZE 32
  176. typedef struct bn_recp_ctx_st {
  177. BIGNUM N; /* the divisor */
  178. BIGNUM Nr; /* the reciprocal */
  179. int num_bits;
  180. int shift;
  181. int flags;
  182. } BN_RECP_CTX;
  183. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  184. BN_init(&recp->N);
  185. BN_init(&recp->Nr);
  186. recp->num_bits = 0;
  187. recp->shift = 0;
  188. recp->flags = 0;
  189. }
  190. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  191. if (recp == NULL) {
  192. return;
  193. }
  194. BN_free(&recp->N);
  195. BN_free(&recp->Nr);
  196. }
  197. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  198. if (!BN_copy(&(recp->N), d)) {
  199. return 0;
  200. }
  201. BN_zero(&recp->Nr);
  202. recp->num_bits = BN_num_bits(d);
  203. recp->shift = 0;
  204. return 1;
  205. }
  206. /* len is the expected size of the result We actually calculate with an extra
  207. * word of precision, so we can do faster division if the remainder is not
  208. * required.
  209. * r := 2^len / m */
  210. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  211. int ret = -1;
  212. BIGNUM *t;
  213. BN_CTX_start(ctx);
  214. t = BN_CTX_get(ctx);
  215. if (t == NULL) {
  216. goto err;
  217. }
  218. if (!BN_set_bit(t, len)) {
  219. goto err;
  220. }
  221. if (!BN_div(r, NULL, t, m, ctx)) {
  222. goto err;
  223. }
  224. ret = len;
  225. err:
  226. BN_CTX_end(ctx);
  227. return ret;
  228. }
  229. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  230. BN_RECP_CTX *recp, BN_CTX *ctx) {
  231. int i, j, ret = 0;
  232. BIGNUM *a, *b, *d, *r;
  233. BN_CTX_start(ctx);
  234. a = BN_CTX_get(ctx);
  235. b = BN_CTX_get(ctx);
  236. if (dv != NULL) {
  237. d = dv;
  238. } else {
  239. d = BN_CTX_get(ctx);
  240. }
  241. if (rem != NULL) {
  242. r = rem;
  243. } else {
  244. r = BN_CTX_get(ctx);
  245. }
  246. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  247. goto err;
  248. }
  249. if (BN_ucmp(m, &recp->N) < 0) {
  250. BN_zero(d);
  251. if (!BN_copy(r, m)) {
  252. goto err;
  253. }
  254. BN_CTX_end(ctx);
  255. return 1;
  256. }
  257. /* We want the remainder
  258. * Given input of ABCDEF / ab
  259. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  260. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  261. i = BN_num_bits(m);
  262. j = recp->num_bits << 1;
  263. if (j > i) {
  264. i = j;
  265. }
  266. /* Nr := round(2^i / N) */
  267. if (i != recp->shift) {
  268. recp->shift =
  269. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  270. ctx); /* BN_reciprocal returns i, or -1 for an error */
  271. }
  272. if (recp->shift == -1) {
  273. goto err;
  274. }
  275. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  276. * BN_num_bits(N)))|
  277. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  278. * BN_num_bits(N)))|
  279. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  280. * = |m/N| */
  281. if (!BN_rshift(a, m, recp->num_bits)) {
  282. goto err;
  283. }
  284. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  285. goto err;
  286. }
  287. if (!BN_rshift(d, b, i - recp->num_bits)) {
  288. goto err;
  289. }
  290. d->neg = 0;
  291. if (!BN_mul(b, &(recp->N), d, ctx)) {
  292. goto err;
  293. }
  294. if (!BN_usub(r, m, b)) {
  295. goto err;
  296. }
  297. r->neg = 0;
  298. j = 0;
  299. while (BN_ucmp(r, &(recp->N)) >= 0) {
  300. if (j++ > 2) {
  301. OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
  302. goto err;
  303. }
  304. if (!BN_usub(r, r, &(recp->N))) {
  305. goto err;
  306. }
  307. if (!BN_add_word(d, 1)) {
  308. goto err;
  309. }
  310. }
  311. r->neg = BN_is_zero(r) ? 0 : m->neg;
  312. d->neg = m->neg ^ recp->N.neg;
  313. ret = 1;
  314. err:
  315. BN_CTX_end(ctx);
  316. return ret;
  317. }
  318. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  319. BN_RECP_CTX *recp, BN_CTX *ctx) {
  320. int ret = 0;
  321. BIGNUM *a;
  322. const BIGNUM *ca;
  323. BN_CTX_start(ctx);
  324. a = BN_CTX_get(ctx);
  325. if (a == NULL) {
  326. goto err;
  327. }
  328. if (y != NULL) {
  329. if (x == y) {
  330. if (!BN_sqr(a, x, ctx)) {
  331. goto err;
  332. }
  333. } else {
  334. if (!BN_mul(a, x, y, ctx)) {
  335. goto err;
  336. }
  337. }
  338. ca = a;
  339. } else {
  340. ca = x; /* Just do the mod */
  341. }
  342. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  343. err:
  344. BN_CTX_end(ctx);
  345. return ret;
  346. }
  347. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  348. * functions
  349. *
  350. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  351. * multiplications is a constant plus on average
  352. *
  353. * 2^(w-1) + (b-w)/(w+1);
  354. *
  355. * here 2^(w-1) is for precomputing the table (we actually need entries only
  356. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  357. * approximation for the expected number of w-bit windows, not counting the
  358. * first one.
  359. *
  360. * Thus we should use
  361. *
  362. * w >= 6 if b > 671
  363. * w = 5 if 671 > b > 239
  364. * w = 4 if 239 > b > 79
  365. * w = 3 if 79 > b > 23
  366. * w <= 2 if 23 > b
  367. *
  368. * (with draws in between). Very small exponents are often selected
  369. * with low Hamming weight, so we use w = 1 for b <= 23. */
  370. #define BN_window_bits_for_exponent_size(b) \
  371. ((b) > 671 ? 6 : \
  372. (b) > 239 ? 5 : \
  373. (b) > 79 ? 4 : \
  374. (b) > 23 ? 3 : 1)
  375. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  376. const BIGNUM *m, BN_CTX *ctx) {
  377. int i, j, bits, ret = 0, wstart, window;
  378. int start = 1;
  379. BIGNUM *aa;
  380. /* Table of variables obtained from 'ctx' */
  381. BIGNUM *val[TABLE_SIZE];
  382. BN_RECP_CTX recp;
  383. bits = BN_num_bits(p);
  384. if (bits == 0) {
  385. /* x**0 mod 1 is still zero. */
  386. if (BN_is_one(m)) {
  387. BN_zero(r);
  388. return 1;
  389. }
  390. return BN_one(r);
  391. }
  392. BN_CTX_start(ctx);
  393. aa = BN_CTX_get(ctx);
  394. val[0] = BN_CTX_get(ctx);
  395. if (!aa || !val[0]) {
  396. goto err;
  397. }
  398. BN_RECP_CTX_init(&recp);
  399. if (m->neg) {
  400. /* ignore sign of 'm' */
  401. if (!BN_copy(aa, m)) {
  402. goto err;
  403. }
  404. aa->neg = 0;
  405. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  406. goto err;
  407. }
  408. } else {
  409. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  410. goto err;
  411. }
  412. }
  413. if (!BN_nnmod(val[0], a, m, ctx)) {
  414. goto err; /* 1 */
  415. }
  416. if (BN_is_zero(val[0])) {
  417. BN_zero(r);
  418. ret = 1;
  419. goto err;
  420. }
  421. window = BN_window_bits_for_exponent_size(bits);
  422. if (window > 1) {
  423. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  424. goto err; /* 2 */
  425. }
  426. j = 1 << (window - 1);
  427. for (i = 1; i < j; i++) {
  428. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  429. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  430. goto err;
  431. }
  432. }
  433. }
  434. start = 1; /* This is used to avoid multiplication etc
  435. * when there is only the value '1' in the
  436. * buffer. */
  437. wstart = bits - 1; /* The top bit of the window */
  438. if (!BN_one(r)) {
  439. goto err;
  440. }
  441. for (;;) {
  442. int wvalue; /* The 'value' of the window */
  443. int wend; /* The bottom bit of the window */
  444. if (BN_is_bit_set(p, wstart) == 0) {
  445. if (!start) {
  446. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  447. goto err;
  448. }
  449. }
  450. if (wstart == 0) {
  451. break;
  452. }
  453. wstart--;
  454. continue;
  455. }
  456. /* We now have wstart on a 'set' bit, we now need to work out
  457. * how bit a window to do. To do this we need to scan
  458. * forward until the last set bit before the end of the
  459. * window */
  460. wvalue = 1;
  461. wend = 0;
  462. for (i = 1; i < window; i++) {
  463. if (wstart - i < 0) {
  464. break;
  465. }
  466. if (BN_is_bit_set(p, wstart - i)) {
  467. wvalue <<= (i - wend);
  468. wvalue |= 1;
  469. wend = i;
  470. }
  471. }
  472. /* wend is the size of the current window */
  473. j = wend + 1;
  474. /* add the 'bytes above' */
  475. if (!start) {
  476. for (i = 0; i < j; i++) {
  477. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  478. goto err;
  479. }
  480. }
  481. }
  482. /* wvalue will be an odd number < 2^window */
  483. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  484. goto err;
  485. }
  486. /* move the 'window' down further */
  487. wstart -= wend + 1;
  488. start = 0;
  489. if (wstart < 0) {
  490. break;
  491. }
  492. }
  493. ret = 1;
  494. err:
  495. BN_CTX_end(ctx);
  496. BN_RECP_CTX_free(&recp);
  497. return ret;
  498. }
  499. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  500. BN_CTX *ctx) {
  501. if (BN_is_odd(m)) {
  502. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  503. }
  504. return mod_exp_recp(r, a, p, m, ctx);
  505. }
  506. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  507. const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
  508. int i, j, bits, ret = 0, wstart, window;
  509. int start = 1;
  510. BIGNUM *d, *r;
  511. const BIGNUM *aa;
  512. /* Table of variables obtained from 'ctx' */
  513. BIGNUM *val[TABLE_SIZE];
  514. BN_MONT_CTX *new_mont = NULL;
  515. if (!BN_is_odd(m)) {
  516. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  517. return 0;
  518. }
  519. bits = BN_num_bits(p);
  520. if (bits == 0) {
  521. /* x**0 mod 1 is still zero. */
  522. if (BN_is_one(m)) {
  523. BN_zero(rr);
  524. return 1;
  525. }
  526. return BN_one(rr);
  527. }
  528. BN_CTX_start(ctx);
  529. d = BN_CTX_get(ctx);
  530. r = BN_CTX_get(ctx);
  531. val[0] = BN_CTX_get(ctx);
  532. if (!d || !r || !val[0]) {
  533. goto err;
  534. }
  535. /* Allocate a montgomery context if it was not supplied by the caller. */
  536. if (mont == NULL) {
  537. new_mont = BN_MONT_CTX_new();
  538. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  539. goto err;
  540. }
  541. mont = new_mont;
  542. }
  543. if (a->neg || BN_ucmp(a, m) >= 0) {
  544. if (!BN_nnmod(val[0], a, m, ctx)) {
  545. goto err;
  546. }
  547. aa = val[0];
  548. } else {
  549. aa = a;
  550. }
  551. if (BN_is_zero(aa)) {
  552. BN_zero(rr);
  553. ret = 1;
  554. goto err;
  555. }
  556. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  557. goto err; /* 1 */
  558. }
  559. window = BN_window_bits_for_exponent_size(bits);
  560. if (window > 1) {
  561. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  562. goto err; /* 2 */
  563. }
  564. j = 1 << (window - 1);
  565. for (i = 1; i < j; i++) {
  566. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  567. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  568. goto err;
  569. }
  570. }
  571. }
  572. start = 1; /* This is used to avoid multiplication etc
  573. * when there is only the value '1' in the
  574. * buffer. */
  575. wstart = bits - 1; /* The top bit of the window */
  576. j = m->top; /* borrow j */
  577. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  578. if (bn_wexpand(r, j) == NULL) {
  579. goto err;
  580. }
  581. /* 2^(top*BN_BITS2) - m */
  582. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  583. for (i = 1; i < j; i++) {
  584. r->d[i] = (~m->d[i]) & BN_MASK2;
  585. }
  586. r->top = j;
  587. /* Upper words will be zero if the corresponding words of 'm'
  588. * were 0xfff[...], so decrement r->top accordingly. */
  589. bn_correct_top(r);
  590. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  591. goto err;
  592. }
  593. for (;;) {
  594. int wvalue; /* The 'value' of the window */
  595. int wend; /* The bottom bit of the window */
  596. if (BN_is_bit_set(p, wstart) == 0) {
  597. if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  598. goto err;
  599. }
  600. if (wstart == 0) {
  601. break;
  602. }
  603. wstart--;
  604. continue;
  605. }
  606. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  607. * window to do. To do this we need to scan forward until the last set bit
  608. * before the end of the window */
  609. wvalue = 1;
  610. wend = 0;
  611. for (i = 1; i < window; i++) {
  612. if (wstart - i < 0) {
  613. break;
  614. }
  615. if (BN_is_bit_set(p, wstart - i)) {
  616. wvalue <<= (i - wend);
  617. wvalue |= 1;
  618. wend = i;
  619. }
  620. }
  621. /* wend is the size of the current window */
  622. j = wend + 1;
  623. /* add the 'bytes above' */
  624. if (!start) {
  625. for (i = 0; i < j; i++) {
  626. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  627. goto err;
  628. }
  629. }
  630. }
  631. /* wvalue will be an odd number < 2^window */
  632. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  633. goto err;
  634. }
  635. /* move the 'window' down further */
  636. wstart -= wend + 1;
  637. start = 0;
  638. if (wstart < 0) {
  639. break;
  640. }
  641. }
  642. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  643. goto err;
  644. }
  645. ret = 1;
  646. err:
  647. BN_MONT_CTX_free(new_mont);
  648. BN_CTX_end(ctx);
  649. return ret;
  650. }
  651. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  652. * layout so that accessing any of these table values shows the same access
  653. * pattern as far as cache lines are concerned. The following functions are
  654. * used to transfer a BIGNUM from/to that table. */
  655. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  656. int window) {
  657. int i, j;
  658. const int width = 1 << window;
  659. BN_ULONG *table = (BN_ULONG *) buf;
  660. if (top > b->top) {
  661. top = b->top; /* this works because 'buf' is explicitly zeroed */
  662. }
  663. for (i = 0, j = idx; i < top; i++, j += width) {
  664. table[j] = b->d[i];
  665. }
  666. return 1;
  667. }
  668. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  669. int window) {
  670. int i, j;
  671. const int width = 1 << window;
  672. volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
  673. if (bn_wexpand(b, top) == NULL) {
  674. return 0;
  675. }
  676. if (window <= 3) {
  677. for (i = 0; i < top; i++, table += width) {
  678. BN_ULONG acc = 0;
  679. for (j = 0; j < width; j++) {
  680. acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
  681. }
  682. b->d[i] = acc;
  683. }
  684. } else {
  685. int xstride = 1 << (window - 2);
  686. BN_ULONG y0, y1, y2, y3;
  687. i = idx >> (window - 2); /* equivalent of idx / xstride */
  688. idx &= xstride - 1; /* equivalent of idx % xstride */
  689. y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
  690. y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
  691. y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
  692. y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
  693. for (i = 0; i < top; i++, table += width) {
  694. BN_ULONG acc = 0;
  695. for (j = 0; j < xstride; j++) {
  696. acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
  697. (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
  698. ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
  699. }
  700. b->d[i] = acc;
  701. }
  702. }
  703. b->top = top;
  704. bn_correct_top(b);
  705. return 1;
  706. }
  707. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  708. * line width of the target processor is at least the following value. */
  709. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  710. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  711. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  712. /* Window sizes optimized for fixed window size modular exponentiation
  713. * algorithm (BN_mod_exp_mont_consttime).
  714. *
  715. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  716. * size of the window must not exceed
  717. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  718. *
  719. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  720. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  721. * 7 should only be used on processors that have a 128 byte or greater cache
  722. * line size. */
  723. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  724. #define BN_window_bits_for_ctime_exponent_size(b) \
  725. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  726. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  727. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  728. #define BN_window_bits_for_ctime_exponent_size(b) \
  729. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  730. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  731. #endif
  732. /* Given a pointer value, compute the next address that is a cache line
  733. * multiple. */
  734. #define MOD_EXP_CTIME_ALIGN(x_) \
  735. ((unsigned char *)(x_) + \
  736. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  737. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  738. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  739. * precomputation memory layout to limit data-dependency to a minimum
  740. * to protect secret exponents (cf. the hyper-threading timing attacks
  741. * pointed out by Colin Percival,
  742. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  743. */
  744. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  745. const BIGNUM *m, BN_CTX *ctx,
  746. const BN_MONT_CTX *mont) {
  747. int i, bits, ret = 0, window, wvalue;
  748. int top;
  749. BN_MONT_CTX *new_mont = NULL;
  750. int numPowers;
  751. unsigned char *powerbufFree = NULL;
  752. int powerbufLen = 0;
  753. unsigned char *powerbuf = NULL;
  754. BIGNUM tmp, am;
  755. BIGNUM *new_a = NULL;
  756. if (!BN_is_odd(m)) {
  757. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  758. return 0;
  759. }
  760. top = m->top;
  761. bits = BN_num_bits(p);
  762. if (bits == 0) {
  763. /* x**0 mod 1 is still zero. */
  764. if (BN_is_one(m)) {
  765. BN_zero(rr);
  766. return 1;
  767. }
  768. return BN_one(rr);
  769. }
  770. /* Allocate a montgomery context if it was not supplied by the caller. */
  771. if (mont == NULL) {
  772. new_mont = BN_MONT_CTX_new();
  773. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  774. goto err;
  775. }
  776. mont = new_mont;
  777. }
  778. if (a->neg || BN_ucmp(a, m) >= 0) {
  779. new_a = BN_new();
  780. if (new_a == NULL ||
  781. !BN_nnmod(new_a, a, m, ctx)) {
  782. goto err;
  783. }
  784. a = new_a;
  785. }
  786. #ifdef RSAZ_ENABLED
  787. /* If the size of the operands allow it, perform the optimized
  788. * RSAZ exponentiation. For further information see
  789. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  790. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  791. rsaz_avx2_eligible()) {
  792. if (NULL == bn_wexpand(rr, 16)) {
  793. goto err;
  794. }
  795. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  796. rr->top = 16;
  797. rr->neg = 0;
  798. bn_correct_top(rr);
  799. ret = 1;
  800. goto err;
  801. }
  802. #endif
  803. /* Get the window size to use with size of p. */
  804. window = BN_window_bits_for_ctime_exponent_size(bits);
  805. #if defined(OPENSSL_BN_ASM_MONT5)
  806. if (window >= 5) {
  807. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  808. /* reserve space for mont->N.d[] copy */
  809. powerbufLen += top * sizeof(mont->N.d[0]);
  810. }
  811. #endif
  812. /* Allocate a buffer large enough to hold all of the pre-computed
  813. * powers of am, am itself and tmp.
  814. */
  815. numPowers = 1 << window;
  816. powerbufLen +=
  817. sizeof(m->d[0]) *
  818. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  819. #ifdef alloca
  820. if (powerbufLen < 3072) {
  821. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  822. } else
  823. #endif
  824. {
  825. if ((powerbufFree = OPENSSL_malloc(
  826. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
  827. goto err;
  828. }
  829. }
  830. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  831. OPENSSL_memset(powerbuf, 0, powerbufLen);
  832. #ifdef alloca
  833. if (powerbufLen < 3072) {
  834. powerbufFree = NULL;
  835. }
  836. #endif
  837. /* lay down tmp and am right after powers table */
  838. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  839. am.d = tmp.d + top;
  840. tmp.top = am.top = 0;
  841. tmp.dmax = am.dmax = top;
  842. tmp.neg = am.neg = 0;
  843. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  844. /* prepare a^0 in Montgomery domain */
  845. /* by Shay Gueron's suggestion */
  846. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  847. /* 2^(top*BN_BITS2) - m */
  848. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  849. for (i = 1; i < top; i++) {
  850. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  851. }
  852. tmp.top = top;
  853. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
  854. goto err;
  855. }
  856. /* prepare a^1 in Montgomery domain */
  857. assert(!a->neg);
  858. assert(BN_ucmp(a, m) < 0);
  859. if (!BN_to_montgomery(&am, a, mont, ctx)) {
  860. goto err;
  861. }
  862. #if defined(OPENSSL_BN_ASM_MONT5)
  863. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  864. * specifically optimization of cache-timing attack countermeasures
  865. * and pre-computation optimization. */
  866. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  867. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  868. if (window == 5 && top > 1) {
  869. const BN_ULONG *n0 = mont->n0;
  870. BN_ULONG *np;
  871. /* BN_to_montgomery can contaminate words above .top
  872. * [in BN_DEBUG[_DEBUG] build]... */
  873. for (i = am.top; i < top; i++) {
  874. am.d[i] = 0;
  875. }
  876. for (i = tmp.top; i < top; i++) {
  877. tmp.d[i] = 0;
  878. }
  879. /* copy mont->N.d[] to improve cache locality */
  880. for (np = am.d + top, i = 0; i < top; i++) {
  881. np[i] = mont->N.d[i];
  882. }
  883. bn_scatter5(tmp.d, top, powerbuf, 0);
  884. bn_scatter5(am.d, am.top, powerbuf, 1);
  885. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  886. bn_scatter5(tmp.d, top, powerbuf, 2);
  887. /* same as above, but uses squaring for 1/2 of operations */
  888. for (i = 4; i < 32; i *= 2) {
  889. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  890. bn_scatter5(tmp.d, top, powerbuf, i);
  891. }
  892. for (i = 3; i < 8; i += 2) {
  893. int j;
  894. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  895. bn_scatter5(tmp.d, top, powerbuf, i);
  896. for (j = 2 * i; j < 32; j *= 2) {
  897. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  898. bn_scatter5(tmp.d, top, powerbuf, j);
  899. }
  900. }
  901. for (; i < 16; i += 2) {
  902. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  903. bn_scatter5(tmp.d, top, powerbuf, i);
  904. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  905. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  906. }
  907. for (; i < 32; i += 2) {
  908. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  909. bn_scatter5(tmp.d, top, powerbuf, i);
  910. }
  911. bits--;
  912. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
  913. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  914. }
  915. bn_gather5(tmp.d, top, powerbuf, wvalue);
  916. /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  917. * that has not been read yet.) */
  918. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  919. /* Scan the exponent one window at a time starting from the most
  920. * significant bits.
  921. */
  922. if (top & 7) {
  923. while (bits >= 0) {
  924. for (wvalue = 0, i = 0; i < 5; i++, bits--) {
  925. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  926. }
  927. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  928. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  929. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  930. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  931. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  932. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  933. }
  934. } else {
  935. const uint8_t *p_bytes = (const uint8_t *)p->d;
  936. int max_bits = p->top * BN_BITS2;
  937. assert(bits < max_bits);
  938. /* |p = 0| has been handled as a special case, so |max_bits| is at least
  939. * one word. */
  940. assert(max_bits >= 64);
  941. /* If the first bit to be read lands in the last byte, unroll the first
  942. * iteration to avoid reading past the bounds of |p->d|. (After the first
  943. * iteration, we are guaranteed to be past the last byte.) Note |bits|
  944. * here is the top bit, inclusive. */
  945. if (bits - 4 >= max_bits - 8) {
  946. /* Read five bits from |bits-4| through |bits|, inclusive. */
  947. wvalue = p_bytes[p->top * BN_BYTES - 1];
  948. wvalue >>= (bits - 4) & 7;
  949. wvalue &= 0x1f;
  950. bits -= 5;
  951. bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  952. }
  953. while (bits >= 0) {
  954. /* Read five bits from |bits-4| through |bits|, inclusive. */
  955. int first_bit = bits - 4;
  956. wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
  957. wvalue >>= first_bit & 7;
  958. wvalue &= 0x1f;
  959. bits -= 5;
  960. bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  961. }
  962. }
  963. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
  964. tmp.top = top;
  965. bn_correct_top(&tmp);
  966. if (ret) {
  967. if (!BN_copy(rr, &tmp)) {
  968. ret = 0;
  969. }
  970. goto err; /* non-zero ret means it's not error */
  971. }
  972. } else
  973. #endif
  974. {
  975. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, window) ||
  976. !copy_to_prebuf(&am, top, powerbuf, 1, window)) {
  977. goto err;
  978. }
  979. /* If the window size is greater than 1, then calculate
  980. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  981. * (even powers could instead be computed as (a^(i/2))^2
  982. * to use the slight performance advantage of sqr over mul).
  983. */
  984. if (window > 1) {
  985. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
  986. !copy_to_prebuf(&tmp, top, powerbuf, 2, window)) {
  987. goto err;
  988. }
  989. for (i = 3; i < numPowers; i++) {
  990. /* Calculate a^i = a^(i-1) * a */
  991. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
  992. !copy_to_prebuf(&tmp, top, powerbuf, i, window)) {
  993. goto err;
  994. }
  995. }
  996. }
  997. bits--;
  998. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
  999. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1000. }
  1001. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
  1002. goto err;
  1003. }
  1004. /* Scan the exponent one window at a time starting from the most
  1005. * significant bits.
  1006. */
  1007. while (bits >= 0) {
  1008. wvalue = 0; /* The 'value' of the window */
  1009. /* Scan the window, squaring the result as we go */
  1010. for (i = 0; i < window; i++, bits--) {
  1011. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
  1012. goto err;
  1013. }
  1014. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1015. }
  1016. /* Fetch the appropriate pre-computed value from the pre-buf */
  1017. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
  1018. goto err;
  1019. }
  1020. /* Multiply the result into the intermediate result */
  1021. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
  1022. goto err;
  1023. }
  1024. }
  1025. }
  1026. /* Convert the final result from montgomery to standard format */
  1027. if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
  1028. goto err;
  1029. }
  1030. ret = 1;
  1031. err:
  1032. BN_MONT_CTX_free(new_mont);
  1033. BN_clear_free(new_a);
  1034. if (powerbuf != NULL) {
  1035. OPENSSL_cleanse(powerbuf, powerbufLen);
  1036. OPENSSL_free(powerbufFree);
  1037. }
  1038. return (ret);
  1039. }
  1040. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1041. const BIGNUM *m, BN_CTX *ctx,
  1042. const BN_MONT_CTX *mont) {
  1043. BIGNUM a_bignum;
  1044. BN_init(&a_bignum);
  1045. int ret = 0;
  1046. if (!BN_set_word(&a_bignum, a)) {
  1047. OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
  1048. goto err;
  1049. }
  1050. ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
  1051. err:
  1052. BN_free(&a_bignum);
  1053. return ret;
  1054. }
  1055. #define TABLE_SIZE 32
  1056. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1057. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1058. BN_CTX *ctx, const BN_MONT_CTX *mont) {
  1059. BIGNUM tmp;
  1060. BN_init(&tmp);
  1061. int ret = 0;
  1062. BN_MONT_CTX *new_mont = NULL;
  1063. /* Allocate a montgomery context if it was not supplied by the caller. */
  1064. if (mont == NULL) {
  1065. new_mont = BN_MONT_CTX_new();
  1066. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1067. goto err;
  1068. }
  1069. mont = new_mont;
  1070. }
  1071. /* BN_mod_mul_montgomery removes one Montgomery factor, so passing one
  1072. * Montgomery-encoded and one non-Montgomery-encoded value gives a
  1073. * non-Montgomery-encoded result. */
  1074. if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
  1075. !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
  1076. !BN_to_montgomery(rr, rr, mont, ctx) ||
  1077. !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
  1078. goto err;
  1079. }
  1080. ret = 1;
  1081. err:
  1082. BN_MONT_CTX_free(new_mont);
  1083. BN_free(&tmp);
  1084. return ret;
  1085. }