exponentiation.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <string.h>
  111. #include <openssl/cpu.h>
  112. #include <openssl/err.h>
  113. #include <openssl/mem.h>
  114. #include "internal.h"
  115. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  116. #define OPENSSL_BN_ASM_MONT5
  117. #define RSAZ_ENABLED
  118. #include "rsaz_exp.h"
  119. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  120. const BN_ULONG *np, const BN_ULONG *n0, int num,
  121. int power);
  122. void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
  123. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  124. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  125. const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
  126. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  127. const BN_ULONG *not_used, const BN_ULONG *np,
  128. const BN_ULONG *n0, int num);
  129. #endif
  130. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  131. int i, bits, ret = 0;
  132. BIGNUM *v, *rr;
  133. BN_CTX_start(ctx);
  134. if (r == a || r == p) {
  135. rr = BN_CTX_get(ctx);
  136. } else {
  137. rr = r;
  138. }
  139. v = BN_CTX_get(ctx);
  140. if (rr == NULL || v == NULL) {
  141. goto err;
  142. }
  143. if (BN_copy(v, a) == NULL) {
  144. goto err;
  145. }
  146. bits = BN_num_bits(p);
  147. if (BN_is_odd(p)) {
  148. if (BN_copy(rr, a) == NULL) {
  149. goto err;
  150. }
  151. } else {
  152. if (!BN_one(rr)) {
  153. goto err;
  154. }
  155. }
  156. for (i = 1; i < bits; i++) {
  157. if (!BN_sqr(v, v, ctx)) {
  158. goto err;
  159. }
  160. if (BN_is_bit_set(p, i)) {
  161. if (!BN_mul(rr, rr, v, ctx)) {
  162. goto err;
  163. }
  164. }
  165. }
  166. if (r != rr && !BN_copy(r, rr)) {
  167. goto err;
  168. }
  169. ret = 1;
  170. err:
  171. BN_CTX_end(ctx);
  172. return ret;
  173. }
  174. typedef struct bn_recp_ctx_st {
  175. BIGNUM N; // the divisor
  176. BIGNUM Nr; // the reciprocal
  177. int num_bits;
  178. int shift;
  179. int flags;
  180. } BN_RECP_CTX;
  181. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  182. BN_init(&recp->N);
  183. BN_init(&recp->Nr);
  184. recp->num_bits = 0;
  185. recp->shift = 0;
  186. recp->flags = 0;
  187. }
  188. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  189. if (recp == NULL) {
  190. return;
  191. }
  192. BN_free(&recp->N);
  193. BN_free(&recp->Nr);
  194. }
  195. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  196. if (!BN_copy(&(recp->N), d)) {
  197. return 0;
  198. }
  199. BN_zero(&recp->Nr);
  200. recp->num_bits = BN_num_bits(d);
  201. recp->shift = 0;
  202. return 1;
  203. }
  204. // len is the expected size of the result We actually calculate with an extra
  205. // word of precision, so we can do faster division if the remainder is not
  206. // required.
  207. // r := 2^len / m
  208. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  209. int ret = -1;
  210. BIGNUM *t;
  211. BN_CTX_start(ctx);
  212. t = BN_CTX_get(ctx);
  213. if (t == NULL) {
  214. goto err;
  215. }
  216. if (!BN_set_bit(t, len)) {
  217. goto err;
  218. }
  219. if (!BN_div(r, NULL, t, m, ctx)) {
  220. goto err;
  221. }
  222. ret = len;
  223. err:
  224. BN_CTX_end(ctx);
  225. return ret;
  226. }
  227. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  228. BN_RECP_CTX *recp, BN_CTX *ctx) {
  229. int i, j, ret = 0;
  230. BIGNUM *a, *b, *d, *r;
  231. BN_CTX_start(ctx);
  232. a = BN_CTX_get(ctx);
  233. b = BN_CTX_get(ctx);
  234. if (dv != NULL) {
  235. d = dv;
  236. } else {
  237. d = BN_CTX_get(ctx);
  238. }
  239. if (rem != NULL) {
  240. r = rem;
  241. } else {
  242. r = BN_CTX_get(ctx);
  243. }
  244. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  245. goto err;
  246. }
  247. if (BN_ucmp(m, &recp->N) < 0) {
  248. BN_zero(d);
  249. if (!BN_copy(r, m)) {
  250. goto err;
  251. }
  252. BN_CTX_end(ctx);
  253. return 1;
  254. }
  255. // We want the remainder
  256. // Given input of ABCDEF / ab
  257. // we need multiply ABCDEF by 3 digests of the reciprocal of ab
  258. // i := max(BN_num_bits(m), 2*BN_num_bits(N))
  259. i = BN_num_bits(m);
  260. j = recp->num_bits << 1;
  261. if (j > i) {
  262. i = j;
  263. }
  264. // Nr := round(2^i / N)
  265. if (i != recp->shift) {
  266. recp->shift =
  267. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  268. ctx); // BN_reciprocal returns i, or -1 for an error
  269. }
  270. if (recp->shift == -1) {
  271. goto err;
  272. }
  273. // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  274. // BN_num_bits(N)))|
  275. // = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  276. // BN_num_bits(N)))|
  277. // <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  278. // = |m/N|
  279. if (!BN_rshift(a, m, recp->num_bits)) {
  280. goto err;
  281. }
  282. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  283. goto err;
  284. }
  285. if (!BN_rshift(d, b, i - recp->num_bits)) {
  286. goto err;
  287. }
  288. d->neg = 0;
  289. if (!BN_mul(b, &(recp->N), d, ctx)) {
  290. goto err;
  291. }
  292. if (!BN_usub(r, m, b)) {
  293. goto err;
  294. }
  295. r->neg = 0;
  296. j = 0;
  297. while (BN_ucmp(r, &(recp->N)) >= 0) {
  298. if (j++ > 2) {
  299. OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
  300. goto err;
  301. }
  302. if (!BN_usub(r, r, &(recp->N))) {
  303. goto err;
  304. }
  305. if (!BN_add_word(d, 1)) {
  306. goto err;
  307. }
  308. }
  309. r->neg = BN_is_zero(r) ? 0 : m->neg;
  310. d->neg = m->neg ^ recp->N.neg;
  311. ret = 1;
  312. err:
  313. BN_CTX_end(ctx);
  314. return ret;
  315. }
  316. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  317. BN_RECP_CTX *recp, BN_CTX *ctx) {
  318. int ret = 0;
  319. BIGNUM *a;
  320. const BIGNUM *ca;
  321. BN_CTX_start(ctx);
  322. a = BN_CTX_get(ctx);
  323. if (a == NULL) {
  324. goto err;
  325. }
  326. if (y != NULL) {
  327. if (x == y) {
  328. if (!BN_sqr(a, x, ctx)) {
  329. goto err;
  330. }
  331. } else {
  332. if (!BN_mul(a, x, y, ctx)) {
  333. goto err;
  334. }
  335. }
  336. ca = a;
  337. } else {
  338. ca = x; // Just do the mod
  339. }
  340. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  341. err:
  342. BN_CTX_end(ctx);
  343. return ret;
  344. }
  345. // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with
  346. // a |b| bit exponent.
  347. //
  348. // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  349. // multiplications is a constant plus on average
  350. //
  351. // 2^(w-1) + (b-w)/(w+1);
  352. //
  353. // here 2^(w-1) is for precomputing the table (we actually need entries only
  354. // for windows that have the lowest bit set), and (b-w)/(w+1) is an
  355. // approximation for the expected number of w-bit windows, not counting the
  356. // first one.
  357. //
  358. // Thus we should use
  359. //
  360. // w >= 6 if b > 671
  361. // w = 5 if 671 > b > 239
  362. // w = 4 if 239 > b > 79
  363. // w = 3 if 79 > b > 23
  364. // w <= 2 if 23 > b
  365. //
  366. // (with draws in between). Very small exponents are often selected
  367. // with low Hamming weight, so we use w = 1 for b <= 23.
  368. static int BN_window_bits_for_exponent_size(int b) {
  369. if (b > 671) {
  370. return 6;
  371. }
  372. if (b > 239) {
  373. return 5;
  374. }
  375. if (b > 79) {
  376. return 4;
  377. }
  378. if (b > 23) {
  379. return 3;
  380. }
  381. return 1;
  382. }
  383. // TABLE_SIZE is the maximum precomputation table size for *variable* sliding
  384. // windows. This must be 2^(max_window - 1), where max_window is the largest
  385. // value returned from |BN_window_bits_for_exponent_size|.
  386. #define TABLE_SIZE 32
  387. // TABLE_BITS_SMALL is the smallest value returned from
  388. // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| *
  389. // |BN_SMALL_MAX_WORDS| words.
  390. #define TABLE_BITS_SMALL 5
  391. // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most
  392. // |BN_BITS2| * |BN_SMALL_MAX_WORDS|.
  393. #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1))
  394. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  395. const BIGNUM *m, BN_CTX *ctx) {
  396. int i, j, bits, ret = 0, wstart, window;
  397. int start = 1;
  398. BIGNUM *aa;
  399. // Table of variables obtained from 'ctx'
  400. BIGNUM *val[TABLE_SIZE];
  401. BN_RECP_CTX recp;
  402. bits = BN_num_bits(p);
  403. if (bits == 0) {
  404. // x**0 mod 1 is still zero.
  405. if (BN_is_one(m)) {
  406. BN_zero(r);
  407. return 1;
  408. }
  409. return BN_one(r);
  410. }
  411. BN_CTX_start(ctx);
  412. aa = BN_CTX_get(ctx);
  413. val[0] = BN_CTX_get(ctx);
  414. if (!aa || !val[0]) {
  415. goto err;
  416. }
  417. BN_RECP_CTX_init(&recp);
  418. if (m->neg) {
  419. // ignore sign of 'm'
  420. if (!BN_copy(aa, m)) {
  421. goto err;
  422. }
  423. aa->neg = 0;
  424. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  425. goto err;
  426. }
  427. } else {
  428. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  429. goto err;
  430. }
  431. }
  432. if (!BN_nnmod(val[0], a, m, ctx)) {
  433. goto err; // 1
  434. }
  435. if (BN_is_zero(val[0])) {
  436. BN_zero(r);
  437. ret = 1;
  438. goto err;
  439. }
  440. window = BN_window_bits_for_exponent_size(bits);
  441. if (window > 1) {
  442. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  443. goto err; // 2
  444. }
  445. j = 1 << (window - 1);
  446. for (i = 1; i < j; i++) {
  447. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  448. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  449. goto err;
  450. }
  451. }
  452. }
  453. start = 1; // This is used to avoid multiplication etc
  454. // when there is only the value '1' in the
  455. // buffer.
  456. wstart = bits - 1; // The top bit of the window
  457. if (!BN_one(r)) {
  458. goto err;
  459. }
  460. for (;;) {
  461. int wvalue; // The 'value' of the window
  462. int wend; // The bottom bit of the window
  463. if (!BN_is_bit_set(p, wstart)) {
  464. if (!start) {
  465. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  466. goto err;
  467. }
  468. }
  469. if (wstart == 0) {
  470. break;
  471. }
  472. wstart--;
  473. continue;
  474. }
  475. // We now have wstart on a 'set' bit, we now need to work out
  476. // how bit a window to do. To do this we need to scan
  477. // forward until the last set bit before the end of the
  478. // window
  479. wvalue = 1;
  480. wend = 0;
  481. for (i = 1; i < window; i++) {
  482. if (wstart - i < 0) {
  483. break;
  484. }
  485. if (BN_is_bit_set(p, wstart - i)) {
  486. wvalue <<= (i - wend);
  487. wvalue |= 1;
  488. wend = i;
  489. }
  490. }
  491. // wend is the size of the current window
  492. j = wend + 1;
  493. // add the 'bytes above'
  494. if (!start) {
  495. for (i = 0; i < j; i++) {
  496. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  497. goto err;
  498. }
  499. }
  500. }
  501. // wvalue will be an odd number < 2^window
  502. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  503. goto err;
  504. }
  505. // move the 'window' down further
  506. wstart -= wend + 1;
  507. start = 0;
  508. if (wstart < 0) {
  509. break;
  510. }
  511. }
  512. ret = 1;
  513. err:
  514. BN_CTX_end(ctx);
  515. BN_RECP_CTX_free(&recp);
  516. return ret;
  517. }
  518. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  519. BN_CTX *ctx) {
  520. if (BN_is_odd(m)) {
  521. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  522. }
  523. return mod_exp_recp(r, a, p, m, ctx);
  524. }
  525. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  526. const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
  527. if (!BN_is_odd(m)) {
  528. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  529. return 0;
  530. }
  531. int bits = BN_num_bits(p);
  532. if (bits == 0) {
  533. // x**0 mod 1 is still zero.
  534. if (BN_is_one(m)) {
  535. BN_zero(rr);
  536. return 1;
  537. }
  538. return BN_one(rr);
  539. }
  540. int ret = 0;
  541. BIGNUM *val[TABLE_SIZE];
  542. BN_MONT_CTX *new_mont = NULL;
  543. BN_CTX_start(ctx);
  544. BIGNUM *d = BN_CTX_get(ctx);
  545. BIGNUM *r = BN_CTX_get(ctx);
  546. val[0] = BN_CTX_get(ctx);
  547. if (!d || !r || !val[0]) {
  548. goto err;
  549. }
  550. // Allocate a montgomery context if it was not supplied by the caller.
  551. if (mont == NULL) {
  552. new_mont = BN_MONT_CTX_new();
  553. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  554. goto err;
  555. }
  556. mont = new_mont;
  557. }
  558. const BIGNUM *aa;
  559. if (a->neg || BN_ucmp(a, m) >= 0) {
  560. if (!BN_nnmod(val[0], a, m, ctx)) {
  561. goto err;
  562. }
  563. aa = val[0];
  564. } else {
  565. aa = a;
  566. }
  567. if (BN_is_zero(aa)) {
  568. BN_zero(rr);
  569. ret = 1;
  570. goto err;
  571. }
  572. // We exponentiate by looking at sliding windows of the exponent and
  573. // precomputing powers of |aa|. Windows may be shifted so they always end on a
  574. // set bit, so only precompute odd powers. We compute val[i] = aa^(2*i + 1)
  575. // for i = 0 to 2^(window-1), all in Montgomery form.
  576. int window = BN_window_bits_for_exponent_size(bits);
  577. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  578. goto err;
  579. }
  580. if (window > 1) {
  581. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  582. goto err;
  583. }
  584. for (int i = 1; i < 1 << (window - 1); i++) {
  585. val[i] = BN_CTX_get(ctx);
  586. if (val[i] == NULL ||
  587. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  588. goto err;
  589. }
  590. }
  591. }
  592. // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is
  593. // close to R and we subtract rather than perform Montgomery reduction.
  594. if (m->d[m->top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  595. if (!bn_wexpand(r, m->top)) {
  596. goto err;
  597. }
  598. // r = 2^(top*BN_BITS2) - m
  599. r->d[0] = 0 - m->d[0];
  600. for (int i = 1; i < m->top; i++) {
  601. r->d[i] = ~m->d[i];
  602. }
  603. r->top = m->top;
  604. // The upper words will be zero if the corresponding words of |m| were
  605. // 0xfff[...], so call |bn_correct_top|.
  606. bn_correct_top(r);
  607. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  608. goto err;
  609. }
  610. int r_is_one = 1;
  611. int wstart = bits - 1; // The top bit of the window.
  612. for (;;) {
  613. if (!BN_is_bit_set(p, wstart)) {
  614. if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  615. goto err;
  616. }
  617. if (wstart == 0) {
  618. break;
  619. }
  620. wstart--;
  621. continue;
  622. }
  623. // We now have wstart on a set bit. Find the largest window we can use.
  624. int wvalue = 1;
  625. int wsize = 0;
  626. for (int i = 1; i < window && i <= wstart; i++) {
  627. if (BN_is_bit_set(p, wstart - i)) {
  628. wvalue <<= (i - wsize);
  629. wvalue |= 1;
  630. wsize = i;
  631. }
  632. }
  633. // Shift |r| to the end of the window.
  634. if (!r_is_one) {
  635. for (int i = 0; i < wsize + 1; i++) {
  636. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  637. goto err;
  638. }
  639. }
  640. }
  641. assert(wvalue & 1);
  642. assert(wvalue < (1 << window));
  643. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  644. goto err;
  645. }
  646. r_is_one = 0;
  647. if (wstart == wsize) {
  648. break;
  649. }
  650. wstart -= wsize + 1;
  651. }
  652. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  653. goto err;
  654. }
  655. ret = 1;
  656. err:
  657. BN_MONT_CTX_free(new_mont);
  658. BN_CTX_end(ctx);
  659. return ret;
  660. }
  661. int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
  662. size_t num_a, const BN_ULONG *p, size_t num_p,
  663. const BN_MONT_CTX *mont) {
  664. const BN_ULONG *n = mont->N.d;
  665. size_t num_n = mont->N.top;
  666. if (num_n != num_a || num_n != num_r || num_n > BN_SMALL_MAX_WORDS) {
  667. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  668. return 0;
  669. }
  670. if (!BN_is_odd(&mont->N)) {
  671. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  672. return 0;
  673. }
  674. unsigned bits = 0;
  675. if (num_p != 0) {
  676. bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
  677. }
  678. if (bits == 0) {
  679. OPENSSL_memset(r, 0, num_r * sizeof(BN_ULONG));
  680. if (!BN_is_one(&mont->N)) {
  681. r[0] = 1;
  682. }
  683. return 1;
  684. }
  685. // We exponentiate by looking at sliding windows of the exponent and
  686. // precomputing powers of |a|. Windows may be shifted so they always end on a
  687. // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for
  688. // i = 0 to 2^(window-1), all in Montgomery form.
  689. unsigned window = BN_window_bits_for_exponent_size(bits);
  690. if (window > TABLE_BITS_SMALL) {
  691. window = TABLE_BITS_SMALL; // Tolerate excessively large |p|.
  692. }
  693. int ret = 0;
  694. BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS];
  695. OPENSSL_memcpy(val[0], a, num_n * sizeof(BN_ULONG));
  696. if (window > 1) {
  697. BN_ULONG d[BN_SMALL_MAX_WORDS];
  698. if (!bn_mod_mul_montgomery_small(d, num_n, val[0], num_n, val[0], num_n,
  699. mont)) {
  700. goto err;
  701. }
  702. for (unsigned i = 1; i < 1u << (window - 1); i++) {
  703. if (!bn_mod_mul_montgomery_small(val[i], num_n, val[i - 1], num_n, d,
  704. num_n, mont)) {
  705. goto err;
  706. }
  707. }
  708. }
  709. // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is
  710. // close to R and we subtract rather than perform Montgomery reduction.
  711. if (n[num_n - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  712. // r = 2^(top*BN_BITS2) - m
  713. r[0] = 0 - n[0];
  714. for (size_t i = 1; i < num_n; i++) {
  715. r[i] = ~n[i];
  716. }
  717. } else if (!bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top,
  718. mont)) {
  719. goto err;
  720. }
  721. int r_is_one = 1;
  722. unsigned wstart = bits - 1; // The top bit of the window.
  723. for (;;) {
  724. if (!bn_is_bit_set_words(p, num_p, wstart)) {
  725. if (!r_is_one &&
  726. !bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) {
  727. goto err;
  728. }
  729. if (wstart == 0) {
  730. break;
  731. }
  732. wstart--;
  733. continue;
  734. }
  735. // We now have wstart on a set bit. Find the largest window we can use.
  736. unsigned wvalue = 1;
  737. unsigned wsize = 0;
  738. for (unsigned i = 1; i < window && i <= wstart; i++) {
  739. if (bn_is_bit_set_words(p, num_p, wstart - i)) {
  740. wvalue <<= (i - wsize);
  741. wvalue |= 1;
  742. wsize = i;
  743. }
  744. }
  745. // Shift |r| to the end of the window.
  746. if (!r_is_one) {
  747. for (unsigned i = 0; i < wsize + 1; i++) {
  748. if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) {
  749. goto err;
  750. }
  751. }
  752. }
  753. assert(wvalue & 1);
  754. assert(wvalue < (1u << window));
  755. if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, val[wvalue >> 1],
  756. num_n, mont)) {
  757. goto err;
  758. }
  759. r_is_one = 0;
  760. if (wstart == wsize) {
  761. break;
  762. }
  763. wstart -= wsize + 1;
  764. }
  765. ret = 1;
  766. err:
  767. OPENSSL_cleanse(val, sizeof(val));
  768. return ret;
  769. }
  770. int bn_mod_inverse_prime_mont_small(BN_ULONG *r, size_t num_r,
  771. const BN_ULONG *a, size_t num_a,
  772. const BN_MONT_CTX *mont) {
  773. const BN_ULONG *p = mont->N.d;
  774. size_t num_p = mont->N.top;
  775. if (num_p > BN_SMALL_MAX_WORDS || num_p == 0) {
  776. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  777. return 0;
  778. }
  779. // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime.
  780. BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS];
  781. OPENSSL_memcpy(p_minus_two, p, num_p * sizeof(BN_ULONG));
  782. if (p_minus_two[0] >= 2) {
  783. p_minus_two[0] -= 2;
  784. } else {
  785. p_minus_two[0] -= 2;
  786. for (size_t i = 1; i < num_p; i++) {
  787. if (p_minus_two[i]-- != 0) {
  788. break;
  789. }
  790. }
  791. }
  792. return bn_mod_exp_mont_small(r, num_r, a, num_a, p_minus_two, num_p, mont);
  793. }
  794. // |BN_mod_exp_mont_consttime| stores the precomputed powers in a specific
  795. // layout so that accessing any of these table values shows the same access
  796. // pattern as far as cache lines are concerned. The following functions are
  797. // used to transfer a BIGNUM from/to that table.
  798. static void copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf,
  799. int idx, int window) {
  800. int i, j;
  801. const int width = 1 << window;
  802. BN_ULONG *table = (BN_ULONG *) buf;
  803. if (top > b->top) {
  804. top = b->top; // this works because 'buf' is explicitly zeroed
  805. }
  806. for (i = 0, j = idx; i < top; i++, j += width) {
  807. table[j] = b->d[i];
  808. }
  809. }
  810. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  811. int window) {
  812. int i, j;
  813. const int width = 1 << window;
  814. volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
  815. if (!bn_wexpand(b, top)) {
  816. return 0;
  817. }
  818. if (window <= 3) {
  819. for (i = 0; i < top; i++, table += width) {
  820. BN_ULONG acc = 0;
  821. for (j = 0; j < width; j++) {
  822. acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
  823. }
  824. b->d[i] = acc;
  825. }
  826. } else {
  827. int xstride = 1 << (window - 2);
  828. BN_ULONG y0, y1, y2, y3;
  829. i = idx >> (window - 2); // equivalent of idx / xstride
  830. idx &= xstride - 1; // equivalent of idx % xstride
  831. y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
  832. y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
  833. y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
  834. y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
  835. for (i = 0; i < top; i++, table += width) {
  836. BN_ULONG acc = 0;
  837. for (j = 0; j < xstride; j++) {
  838. acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
  839. (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
  840. ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
  841. }
  842. b->d[i] = acc;
  843. }
  844. }
  845. b->top = top;
  846. bn_correct_top(b);
  847. return 1;
  848. }
  849. // BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  850. // line width of the target processor is at least the following value.
  851. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  852. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  853. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  854. // Window sizes optimized for fixed window size modular exponentiation
  855. // algorithm (BN_mod_exp_mont_consttime).
  856. //
  857. // To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  858. // size of the window must not exceed
  859. // log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  860. //
  861. // Window size thresholds are defined for cache line sizes of 32 and 64, cache
  862. // line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  863. // 7 should only be used on processors that have a 128 byte or greater cache
  864. // line size.
  865. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  866. #define BN_window_bits_for_ctime_exponent_size(b) \
  867. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  868. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  869. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  870. #define BN_window_bits_for_ctime_exponent_size(b) \
  871. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  872. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  873. #endif
  874. // Given a pointer value, compute the next address that is a cache line
  875. // multiple.
  876. #define MOD_EXP_CTIME_ALIGN(x_) \
  877. ((unsigned char *)(x_) + \
  878. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  879. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  880. // This variant of BN_mod_exp_mont() uses fixed windows and the special
  881. // precomputation memory layout to limit data-dependency to a minimum
  882. // to protect secret exponents (cf. the hyper-threading timing attacks
  883. // pointed out by Colin Percival,
  884. // http://www.daemonology.net/hyperthreading-considered-harmful/)
  885. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  886. const BIGNUM *m, BN_CTX *ctx,
  887. const BN_MONT_CTX *mont) {
  888. int i, bits, ret = 0, window, wvalue;
  889. int top;
  890. BN_MONT_CTX *new_mont = NULL;
  891. int numPowers;
  892. unsigned char *powerbufFree = NULL;
  893. int powerbufLen = 0;
  894. unsigned char *powerbuf = NULL;
  895. BIGNUM tmp, am;
  896. BIGNUM *new_a = NULL;
  897. if (!BN_is_odd(m)) {
  898. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  899. return 0;
  900. }
  901. top = m->top;
  902. bits = BN_num_bits(p);
  903. if (bits == 0) {
  904. // x**0 mod 1 is still zero.
  905. if (BN_is_one(m)) {
  906. BN_zero(rr);
  907. return 1;
  908. }
  909. return BN_one(rr);
  910. }
  911. // Allocate a montgomery context if it was not supplied by the caller.
  912. if (mont == NULL) {
  913. new_mont = BN_MONT_CTX_new();
  914. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  915. goto err;
  916. }
  917. mont = new_mont;
  918. }
  919. if (a->neg || BN_ucmp(a, m) >= 0) {
  920. new_a = BN_new();
  921. if (new_a == NULL ||
  922. !BN_nnmod(new_a, a, m, ctx)) {
  923. goto err;
  924. }
  925. a = new_a;
  926. }
  927. #ifdef RSAZ_ENABLED
  928. // If the size of the operands allow it, perform the optimized
  929. // RSAZ exponentiation. For further information see
  930. // crypto/bn/rsaz_exp.c and accompanying assembly modules.
  931. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  932. rsaz_avx2_eligible()) {
  933. if (!bn_wexpand(rr, 16)) {
  934. goto err;
  935. }
  936. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  937. rr->top = 16;
  938. rr->neg = 0;
  939. bn_correct_top(rr);
  940. ret = 1;
  941. goto err;
  942. }
  943. #endif
  944. // Get the window size to use with size of p.
  945. window = BN_window_bits_for_ctime_exponent_size(bits);
  946. #if defined(OPENSSL_BN_ASM_MONT5)
  947. if (window >= 5) {
  948. window = 5; // ~5% improvement for RSA2048 sign, and even for RSA4096
  949. // reserve space for mont->N.d[] copy
  950. powerbufLen += top * sizeof(mont->N.d[0]);
  951. }
  952. #endif
  953. // Allocate a buffer large enough to hold all of the pre-computed
  954. // powers of am, am itself and tmp.
  955. numPowers = 1 << window;
  956. powerbufLen +=
  957. sizeof(m->d[0]) *
  958. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  959. #ifdef alloca
  960. if (powerbufLen < 3072) {
  961. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  962. } else
  963. #endif
  964. {
  965. if ((powerbufFree = OPENSSL_malloc(
  966. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
  967. goto err;
  968. }
  969. }
  970. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  971. OPENSSL_memset(powerbuf, 0, powerbufLen);
  972. #ifdef alloca
  973. if (powerbufLen < 3072) {
  974. powerbufFree = NULL;
  975. }
  976. #endif
  977. // lay down tmp and am right after powers table
  978. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  979. am.d = tmp.d + top;
  980. tmp.top = am.top = 0;
  981. tmp.dmax = am.dmax = top;
  982. tmp.neg = am.neg = 0;
  983. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  984. // prepare a^0 in Montgomery domain
  985. // by Shay Gueron's suggestion
  986. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  987. // 2^(top*BN_BITS2) - m
  988. tmp.d[0] = 0 - m->d[0];
  989. for (i = 1; i < top; i++) {
  990. tmp.d[i] = ~m->d[i];
  991. }
  992. tmp.top = top;
  993. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
  994. goto err;
  995. }
  996. // prepare a^1 in Montgomery domain
  997. assert(!a->neg);
  998. assert(BN_ucmp(a, m) < 0);
  999. if (!BN_to_montgomery(&am, a, mont, ctx)) {
  1000. goto err;
  1001. }
  1002. #if defined(OPENSSL_BN_ASM_MONT5)
  1003. // This optimization uses ideas from http://eprint.iacr.org/2011/239,
  1004. // specifically optimization of cache-timing attack countermeasures
  1005. // and pre-computation optimization.
  1006. // Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  1007. // 512-bit RSA is hardly relevant, we omit it to spare size...
  1008. if (window == 5 && top > 1) {
  1009. const BN_ULONG *n0 = mont->n0;
  1010. BN_ULONG *np;
  1011. // BN_to_montgomery can contaminate words above .top
  1012. // [in BN_DEBUG[_DEBUG] build]...
  1013. for (i = am.top; i < top; i++) {
  1014. am.d[i] = 0;
  1015. }
  1016. for (i = tmp.top; i < top; i++) {
  1017. tmp.d[i] = 0;
  1018. }
  1019. // copy mont->N.d[] to improve cache locality
  1020. for (np = am.d + top, i = 0; i < top; i++) {
  1021. np[i] = mont->N.d[i];
  1022. }
  1023. bn_scatter5(tmp.d, top, powerbuf, 0);
  1024. bn_scatter5(am.d, am.top, powerbuf, 1);
  1025. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  1026. bn_scatter5(tmp.d, top, powerbuf, 2);
  1027. // same as above, but uses squaring for 1/2 of operations
  1028. for (i = 4; i < 32; i *= 2) {
  1029. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1030. bn_scatter5(tmp.d, top, powerbuf, i);
  1031. }
  1032. for (i = 3; i < 8; i += 2) {
  1033. int j;
  1034. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  1035. bn_scatter5(tmp.d, top, powerbuf, i);
  1036. for (j = 2 * i; j < 32; j *= 2) {
  1037. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1038. bn_scatter5(tmp.d, top, powerbuf, j);
  1039. }
  1040. }
  1041. for (; i < 16; i += 2) {
  1042. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  1043. bn_scatter5(tmp.d, top, powerbuf, i);
  1044. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1045. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  1046. }
  1047. for (; i < 32; i += 2) {
  1048. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
  1049. bn_scatter5(tmp.d, top, powerbuf, i);
  1050. }
  1051. bits--;
  1052. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
  1053. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1054. }
  1055. bn_gather5(tmp.d, top, powerbuf, wvalue);
  1056. // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  1057. // that has not been read yet.)
  1058. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  1059. // Scan the exponent one window at a time starting from the most
  1060. // significant bits.
  1061. if (top & 7) {
  1062. while (bits >= 0) {
  1063. for (wvalue = 0, i = 0; i < 5; i++, bits--) {
  1064. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1065. }
  1066. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1067. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1068. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1069. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1070. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  1071. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  1072. }
  1073. } else {
  1074. const uint8_t *p_bytes = (const uint8_t *)p->d;
  1075. int max_bits = p->top * BN_BITS2;
  1076. assert(bits < max_bits);
  1077. // |p = 0| has been handled as a special case, so |max_bits| is at least
  1078. // one word.
  1079. assert(max_bits >= 64);
  1080. // If the first bit to be read lands in the last byte, unroll the first
  1081. // iteration to avoid reading past the bounds of |p->d|. (After the first
  1082. // iteration, we are guaranteed to be past the last byte.) Note |bits|
  1083. // here is the top bit, inclusive.
  1084. if (bits - 4 >= max_bits - 8) {
  1085. // Read five bits from |bits-4| through |bits|, inclusive.
  1086. wvalue = p_bytes[p->top * BN_BYTES - 1];
  1087. wvalue >>= (bits - 4) & 7;
  1088. wvalue &= 0x1f;
  1089. bits -= 5;
  1090. bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  1091. }
  1092. while (bits >= 0) {
  1093. // Read five bits from |bits-4| through |bits|, inclusive.
  1094. int first_bit = bits - 4;
  1095. uint16_t val;
  1096. OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val));
  1097. val >>= first_bit & 7;
  1098. val &= 0x1f;
  1099. bits -= 5;
  1100. bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val);
  1101. }
  1102. }
  1103. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
  1104. tmp.top = top;
  1105. bn_correct_top(&tmp);
  1106. if (ret) {
  1107. if (!BN_copy(rr, &tmp)) {
  1108. ret = 0;
  1109. }
  1110. goto err; // non-zero ret means it's not error
  1111. }
  1112. } else
  1113. #endif
  1114. {
  1115. copy_to_prebuf(&tmp, top, powerbuf, 0, window);
  1116. copy_to_prebuf(&am, top, powerbuf, 1, window);
  1117. // If the window size is greater than 1, then calculate
  1118. // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  1119. // (even powers could instead be computed as (a^(i/2))^2
  1120. // to use the slight performance advantage of sqr over mul).
  1121. if (window > 1) {
  1122. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) {
  1123. goto err;
  1124. }
  1125. copy_to_prebuf(&tmp, top, powerbuf, 2, window);
  1126. for (i = 3; i < numPowers; i++) {
  1127. // Calculate a^i = a^(i-1) * a
  1128. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) {
  1129. goto err;
  1130. }
  1131. copy_to_prebuf(&tmp, top, powerbuf, i, window);
  1132. }
  1133. }
  1134. bits--;
  1135. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
  1136. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1137. }
  1138. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
  1139. goto err;
  1140. }
  1141. // Scan the exponent one window at a time starting from the most
  1142. // significant bits.
  1143. while (bits >= 0) {
  1144. wvalue = 0; // The 'value' of the window
  1145. // Scan the window, squaring the result as we go
  1146. for (i = 0; i < window; i++, bits--) {
  1147. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
  1148. goto err;
  1149. }
  1150. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1151. }
  1152. // Fetch the appropriate pre-computed value from the pre-buf
  1153. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
  1154. goto err;
  1155. }
  1156. // Multiply the result into the intermediate result
  1157. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
  1158. goto err;
  1159. }
  1160. }
  1161. }
  1162. // Convert the final result from montgomery to standard format
  1163. if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
  1164. goto err;
  1165. }
  1166. ret = 1;
  1167. err:
  1168. BN_MONT_CTX_free(new_mont);
  1169. BN_clear_free(new_a);
  1170. OPENSSL_free(powerbufFree);
  1171. return (ret);
  1172. }
  1173. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1174. const BIGNUM *m, BN_CTX *ctx,
  1175. const BN_MONT_CTX *mont) {
  1176. BIGNUM a_bignum;
  1177. BN_init(&a_bignum);
  1178. int ret = 0;
  1179. if (!BN_set_word(&a_bignum, a)) {
  1180. OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
  1181. goto err;
  1182. }
  1183. ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
  1184. err:
  1185. BN_free(&a_bignum);
  1186. return ret;
  1187. }
  1188. #define TABLE_SIZE 32
  1189. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1190. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1191. BN_CTX *ctx, const BN_MONT_CTX *mont) {
  1192. BIGNUM tmp;
  1193. BN_init(&tmp);
  1194. int ret = 0;
  1195. BN_MONT_CTX *new_mont = NULL;
  1196. // Allocate a montgomery context if it was not supplied by the caller.
  1197. if (mont == NULL) {
  1198. new_mont = BN_MONT_CTX_new();
  1199. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1200. goto err;
  1201. }
  1202. mont = new_mont;
  1203. }
  1204. // BN_mod_mul_montgomery removes one Montgomery factor, so passing one
  1205. // Montgomery-encoded and one non-Montgomery-encoded value gives a
  1206. // non-Montgomery-encoded result.
  1207. if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
  1208. !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
  1209. !BN_to_montgomery(rr, rr, mont, ctx) ||
  1210. !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
  1211. goto err;
  1212. }
  1213. ret = 1;
  1214. err:
  1215. BN_MONT_CTX_free(new_mont);
  1216. BN_free(&tmp);
  1217. return ret;
  1218. }