exponentiation.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <string.h>
  111. #include <openssl/cpu.h>
  112. #include <openssl/err.h>
  113. #include <openssl/mem.h>
  114. #include "internal.h"
  115. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  116. #define OPENSSL_BN_ASM_MONT5
  117. #define RSAZ_ENABLED
  118. #include "rsaz_exp.h"
  119. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  120. const BN_ULONG *np, const BN_ULONG *n0, int num,
  121. int power);
  122. void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
  123. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  124. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  125. const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
  126. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  127. const BN_ULONG *not_used, const BN_ULONG *np,
  128. const BN_ULONG *n0, int num);
  129. #endif
  130. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  131. int i, bits, ret = 0;
  132. BIGNUM *v, *rr;
  133. if ((p->flags & BN_FLG_CONSTTIME) != 0) {
  134. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  135. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  136. return 0;
  137. }
  138. BN_CTX_start(ctx);
  139. if (r == a || r == p) {
  140. rr = BN_CTX_get(ctx);
  141. } else {
  142. rr = r;
  143. }
  144. v = BN_CTX_get(ctx);
  145. if (rr == NULL || v == NULL) {
  146. goto err;
  147. }
  148. if (BN_copy(v, a) == NULL) {
  149. goto err;
  150. }
  151. bits = BN_num_bits(p);
  152. if (BN_is_odd(p)) {
  153. if (BN_copy(rr, a) == NULL) {
  154. goto err;
  155. }
  156. } else {
  157. if (!BN_one(rr)) {
  158. goto err;
  159. }
  160. }
  161. for (i = 1; i < bits; i++) {
  162. if (!BN_sqr(v, v, ctx)) {
  163. goto err;
  164. }
  165. if (BN_is_bit_set(p, i)) {
  166. if (!BN_mul(rr, rr, v, ctx)) {
  167. goto err;
  168. }
  169. }
  170. }
  171. if (r != rr && !BN_copy(r, rr)) {
  172. goto err;
  173. }
  174. ret = 1;
  175. err:
  176. BN_CTX_end(ctx);
  177. return ret;
  178. }
  179. /* maximum precomputation table size for *variable* sliding windows */
  180. #define TABLE_SIZE 32
  181. typedef struct bn_recp_ctx_st {
  182. BIGNUM N; /* the divisor */
  183. BIGNUM Nr; /* the reciprocal */
  184. int num_bits;
  185. int shift;
  186. int flags;
  187. } BN_RECP_CTX;
  188. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  189. BN_init(&recp->N);
  190. BN_init(&recp->Nr);
  191. recp->num_bits = 0;
  192. recp->flags = 0;
  193. }
  194. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  195. if (recp == NULL) {
  196. return;
  197. }
  198. BN_free(&recp->N);
  199. BN_free(&recp->Nr);
  200. }
  201. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  202. if (!BN_copy(&(recp->N), d)) {
  203. return 0;
  204. }
  205. BN_zero(&recp->Nr);
  206. recp->num_bits = BN_num_bits(d);
  207. recp->shift = 0;
  208. return 1;
  209. }
  210. /* len is the expected size of the result We actually calculate with an extra
  211. * word of precision, so we can do faster division if the remainder is not
  212. * required.
  213. * r := 2^len / m */
  214. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  215. int ret = -1;
  216. BIGNUM *t;
  217. BN_CTX_start(ctx);
  218. t = BN_CTX_get(ctx);
  219. if (t == NULL) {
  220. goto err;
  221. }
  222. if (!BN_set_bit(t, len)) {
  223. goto err;
  224. }
  225. if (!BN_div(r, NULL, t, m, ctx)) {
  226. goto err;
  227. }
  228. ret = len;
  229. err:
  230. BN_CTX_end(ctx);
  231. return ret;
  232. }
  233. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  234. BN_RECP_CTX *recp, BN_CTX *ctx) {
  235. int i, j, ret = 0;
  236. BIGNUM *a, *b, *d, *r;
  237. BN_CTX_start(ctx);
  238. a = BN_CTX_get(ctx);
  239. b = BN_CTX_get(ctx);
  240. if (dv != NULL) {
  241. d = dv;
  242. } else {
  243. d = BN_CTX_get(ctx);
  244. }
  245. if (rem != NULL) {
  246. r = rem;
  247. } else {
  248. r = BN_CTX_get(ctx);
  249. }
  250. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  251. goto err;
  252. }
  253. if (BN_ucmp(m, &recp->N) < 0) {
  254. BN_zero(d);
  255. if (!BN_copy(r, m)) {
  256. goto err;
  257. }
  258. BN_CTX_end(ctx);
  259. return 1;
  260. }
  261. /* We want the remainder
  262. * Given input of ABCDEF / ab
  263. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  264. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  265. i = BN_num_bits(m);
  266. j = recp->num_bits << 1;
  267. if (j > i) {
  268. i = j;
  269. }
  270. /* Nr := round(2^i / N) */
  271. if (i != recp->shift) {
  272. recp->shift =
  273. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  274. ctx); /* BN_reciprocal returns i, or -1 for an error */
  275. }
  276. if (recp->shift == -1) {
  277. goto err;
  278. }
  279. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  280. * BN_num_bits(N)))|
  281. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  282. * BN_num_bits(N)))|
  283. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  284. * = |m/N| */
  285. if (!BN_rshift(a, m, recp->num_bits)) {
  286. goto err;
  287. }
  288. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  289. goto err;
  290. }
  291. if (!BN_rshift(d, b, i - recp->num_bits)) {
  292. goto err;
  293. }
  294. d->neg = 0;
  295. if (!BN_mul(b, &(recp->N), d, ctx)) {
  296. goto err;
  297. }
  298. if (!BN_usub(r, m, b)) {
  299. goto err;
  300. }
  301. r->neg = 0;
  302. j = 0;
  303. while (BN_ucmp(r, &(recp->N)) >= 0) {
  304. if (j++ > 2) {
  305. OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
  306. goto err;
  307. }
  308. if (!BN_usub(r, r, &(recp->N))) {
  309. goto err;
  310. }
  311. if (!BN_add_word(d, 1)) {
  312. goto err;
  313. }
  314. }
  315. r->neg = BN_is_zero(r) ? 0 : m->neg;
  316. d->neg = m->neg ^ recp->N.neg;
  317. ret = 1;
  318. err:
  319. BN_CTX_end(ctx);
  320. return ret;
  321. }
  322. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  323. BN_RECP_CTX *recp, BN_CTX *ctx) {
  324. int ret = 0;
  325. BIGNUM *a;
  326. const BIGNUM *ca;
  327. BN_CTX_start(ctx);
  328. a = BN_CTX_get(ctx);
  329. if (a == NULL) {
  330. goto err;
  331. }
  332. if (y != NULL) {
  333. if (x == y) {
  334. if (!BN_sqr(a, x, ctx)) {
  335. goto err;
  336. }
  337. } else {
  338. if (!BN_mul(a, x, y, ctx)) {
  339. goto err;
  340. }
  341. }
  342. ca = a;
  343. } else {
  344. ca = x; /* Just do the mod */
  345. }
  346. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  347. err:
  348. BN_CTX_end(ctx);
  349. return ret;
  350. }
  351. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  352. * functions
  353. *
  354. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  355. * multiplications is a constant plus on average
  356. *
  357. * 2^(w-1) + (b-w)/(w+1);
  358. *
  359. * here 2^(w-1) is for precomputing the table (we actually need entries only
  360. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  361. * approximation for the expected number of w-bit windows, not counting the
  362. * first one.
  363. *
  364. * Thus we should use
  365. *
  366. * w >= 6 if b > 671
  367. * w = 5 if 671 > b > 239
  368. * w = 4 if 239 > b > 79
  369. * w = 3 if 79 > b > 23
  370. * w <= 2 if 23 > b
  371. *
  372. * (with draws in between). Very small exponents are often selected
  373. * with low Hamming weight, so we use w = 1 for b <= 23. */
  374. #define BN_window_bits_for_exponent_size(b) \
  375. ((b) > 671 ? 6 : \
  376. (b) > 239 ? 5 : \
  377. (b) > 79 ? 4 : \
  378. (b) > 23 ? 3 : 1)
  379. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  380. const BIGNUM *m, BN_CTX *ctx) {
  381. int i, j, bits, ret = 0, wstart, window;
  382. int start = 1;
  383. BIGNUM *aa;
  384. /* Table of variables obtained from 'ctx' */
  385. BIGNUM *val[TABLE_SIZE];
  386. BN_RECP_CTX recp;
  387. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  388. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  389. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  390. return 0;
  391. }
  392. bits = BN_num_bits(p);
  393. if (bits == 0) {
  394. /* x**0 mod 1 is still zero. */
  395. if (BN_is_one(m)) {
  396. BN_zero(r);
  397. return 1;
  398. }
  399. return BN_one(r);
  400. }
  401. BN_CTX_start(ctx);
  402. aa = BN_CTX_get(ctx);
  403. val[0] = BN_CTX_get(ctx);
  404. if (!aa || !val[0]) {
  405. goto err;
  406. }
  407. BN_RECP_CTX_init(&recp);
  408. if (m->neg) {
  409. /* ignore sign of 'm' */
  410. if (!BN_copy(aa, m)) {
  411. goto err;
  412. }
  413. aa->neg = 0;
  414. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  415. goto err;
  416. }
  417. } else {
  418. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  419. goto err;
  420. }
  421. }
  422. if (!BN_nnmod(val[0], a, m, ctx)) {
  423. goto err; /* 1 */
  424. }
  425. if (BN_is_zero(val[0])) {
  426. BN_zero(r);
  427. ret = 1;
  428. goto err;
  429. }
  430. window = BN_window_bits_for_exponent_size(bits);
  431. if (window > 1) {
  432. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  433. goto err; /* 2 */
  434. }
  435. j = 1 << (window - 1);
  436. for (i = 1; i < j; i++) {
  437. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  438. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  439. goto err;
  440. }
  441. }
  442. }
  443. start = 1; /* This is used to avoid multiplication etc
  444. * when there is only the value '1' in the
  445. * buffer. */
  446. wstart = bits - 1; /* The top bit of the window */
  447. if (!BN_one(r)) {
  448. goto err;
  449. }
  450. for (;;) {
  451. int wvalue; /* The 'value' of the window */
  452. int wend; /* The bottom bit of the window */
  453. if (BN_is_bit_set(p, wstart) == 0) {
  454. if (!start) {
  455. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  456. goto err;
  457. }
  458. }
  459. if (wstart == 0) {
  460. break;
  461. }
  462. wstart--;
  463. continue;
  464. }
  465. /* We now have wstart on a 'set' bit, we now need to work out
  466. * how bit a window to do. To do this we need to scan
  467. * forward until the last set bit before the end of the
  468. * window */
  469. wvalue = 1;
  470. wend = 0;
  471. for (i = 1; i < window; i++) {
  472. if (wstart - i < 0) {
  473. break;
  474. }
  475. if (BN_is_bit_set(p, wstart - i)) {
  476. wvalue <<= (i - wend);
  477. wvalue |= 1;
  478. wend = i;
  479. }
  480. }
  481. /* wend is the size of the current window */
  482. j = wend + 1;
  483. /* add the 'bytes above' */
  484. if (!start) {
  485. for (i = 0; i < j; i++) {
  486. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  487. goto err;
  488. }
  489. }
  490. }
  491. /* wvalue will be an odd number < 2^window */
  492. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  493. goto err;
  494. }
  495. /* move the 'window' down further */
  496. wstart -= wend + 1;
  497. start = 0;
  498. if (wstart < 0) {
  499. break;
  500. }
  501. }
  502. ret = 1;
  503. err:
  504. BN_CTX_end(ctx);
  505. BN_RECP_CTX_free(&recp);
  506. return ret;
  507. }
  508. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  509. BN_CTX *ctx) {
  510. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  511. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  512. * exponentiation for the odd part), using appropriate exponent
  513. * reductions, and combine the results using the CRT.
  514. *
  515. * For now, we use Montgomery only if the modulus is odd; otherwise,
  516. * exponentiation using the reciprocal-based quick remaindering
  517. * algorithm is used.
  518. *
  519. * (Timing obtained with expspeed.c [computations a^p mod m
  520. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  521. * 4096, 8192 bits], compared to the running time of the
  522. * standard algorithm:
  523. *
  524. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  525. * 55 .. 77 % [UltraSparc processor, but
  526. * debug-solaris-sparcv8-gcc conf.]
  527. *
  528. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  529. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  530. *
  531. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  532. * at 2048 and more bits, but at 512 and 1024 bits, it was
  533. * slower even than the standard algorithm!
  534. *
  535. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  536. * should be obtained when the new Montgomery reduction code
  537. * has been integrated into OpenSSL.) */
  538. if (BN_is_odd(m)) {
  539. if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) {
  540. BN_ULONG A = a->d[0];
  541. return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  542. }
  543. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  544. }
  545. return mod_exp_recp(r, a, p, m, ctx);
  546. }
  547. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  548. const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
  549. int i, j, bits, ret = 0, wstart, window;
  550. int start = 1;
  551. BIGNUM *d, *r;
  552. const BIGNUM *aa;
  553. /* Table of variables obtained from 'ctx' */
  554. BIGNUM *val[TABLE_SIZE];
  555. BN_MONT_CTX *new_mont = NULL;
  556. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  557. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, mont);
  558. }
  559. if (!BN_is_odd(m)) {
  560. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  561. return 0;
  562. }
  563. bits = BN_num_bits(p);
  564. if (bits == 0) {
  565. /* x**0 mod 1 is still zero. */
  566. if (BN_is_one(m)) {
  567. BN_zero(rr);
  568. return 1;
  569. }
  570. return BN_one(rr);
  571. }
  572. BN_CTX_start(ctx);
  573. d = BN_CTX_get(ctx);
  574. r = BN_CTX_get(ctx);
  575. val[0] = BN_CTX_get(ctx);
  576. if (!d || !r || !val[0]) {
  577. goto err;
  578. }
  579. /* Allocate a montgomery context if it was not supplied by the caller. */
  580. if (mont == NULL) {
  581. new_mont = BN_MONT_CTX_new();
  582. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  583. goto err;
  584. }
  585. mont = new_mont;
  586. }
  587. if (a->neg || BN_ucmp(a, m) >= 0) {
  588. if (!BN_nnmod(val[0], a, m, ctx)) {
  589. goto err;
  590. }
  591. aa = val[0];
  592. } else {
  593. aa = a;
  594. }
  595. if (BN_is_zero(aa)) {
  596. BN_zero(rr);
  597. ret = 1;
  598. goto err;
  599. }
  600. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  601. goto err; /* 1 */
  602. }
  603. window = BN_window_bits_for_exponent_size(bits);
  604. if (window > 1) {
  605. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  606. goto err; /* 2 */
  607. }
  608. j = 1 << (window - 1);
  609. for (i = 1; i < j; i++) {
  610. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  611. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  612. goto err;
  613. }
  614. }
  615. }
  616. start = 1; /* This is used to avoid multiplication etc
  617. * when there is only the value '1' in the
  618. * buffer. */
  619. wstart = bits - 1; /* The top bit of the window */
  620. j = m->top; /* borrow j */
  621. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  622. if (bn_wexpand(r, j) == NULL) {
  623. goto err;
  624. }
  625. /* 2^(top*BN_BITS2) - m */
  626. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  627. for (i = 1; i < j; i++) {
  628. r->d[i] = (~m->d[i]) & BN_MASK2;
  629. }
  630. r->top = j;
  631. /* Upper words will be zero if the corresponding words of 'm'
  632. * were 0xfff[...], so decrement r->top accordingly. */
  633. bn_correct_top(r);
  634. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  635. goto err;
  636. }
  637. for (;;) {
  638. int wvalue; /* The 'value' of the window */
  639. int wend; /* The bottom bit of the window */
  640. if (BN_is_bit_set(p, wstart) == 0) {
  641. if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  642. goto err;
  643. }
  644. if (wstart == 0) {
  645. break;
  646. }
  647. wstart--;
  648. continue;
  649. }
  650. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  651. * window to do. To do this we need to scan forward until the last set bit
  652. * before the end of the window */
  653. wvalue = 1;
  654. wend = 0;
  655. for (i = 1; i < window; i++) {
  656. if (wstart - i < 0) {
  657. break;
  658. }
  659. if (BN_is_bit_set(p, wstart - i)) {
  660. wvalue <<= (i - wend);
  661. wvalue |= 1;
  662. wend = i;
  663. }
  664. }
  665. /* wend is the size of the current window */
  666. j = wend + 1;
  667. /* add the 'bytes above' */
  668. if (!start) {
  669. for (i = 0; i < j; i++) {
  670. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  671. goto err;
  672. }
  673. }
  674. }
  675. /* wvalue will be an odd number < 2^window */
  676. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  677. goto err;
  678. }
  679. /* move the 'window' down further */
  680. wstart -= wend + 1;
  681. start = 0;
  682. if (wstart < 0) {
  683. break;
  684. }
  685. }
  686. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  687. goto err;
  688. }
  689. ret = 1;
  690. err:
  691. BN_MONT_CTX_free(new_mont);
  692. BN_CTX_end(ctx);
  693. return ret;
  694. }
  695. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  696. * layout so that accessing any of these table values shows the same access
  697. * pattern as far as cache lines are concerned. The following functions are
  698. * used to transfer a BIGNUM from/to that table. */
  699. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  700. int width) {
  701. size_t i, j;
  702. if (top > b->top) {
  703. top = b->top; /* this works because 'buf' is explicitly zeroed */
  704. }
  705. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  706. buf[j] = ((unsigned char *)b->d)[i];
  707. }
  708. return 1;
  709. }
  710. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  711. int width) {
  712. size_t i, j;
  713. if (bn_wexpand(b, top) == NULL) {
  714. return 0;
  715. }
  716. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  717. ((unsigned char *)b->d)[i] = buf[j];
  718. }
  719. b->top = top;
  720. bn_correct_top(b);
  721. return 1;
  722. }
  723. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  724. * line width of the target processor is at least the following value. */
  725. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  726. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  727. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  728. /* Window sizes optimized for fixed window size modular exponentiation
  729. * algorithm (BN_mod_exp_mont_consttime).
  730. *
  731. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  732. * size of the window must not exceed
  733. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  734. *
  735. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  736. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  737. * 7 should only be used on processors that have a 128 byte or greater cache
  738. * line size. */
  739. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  740. #define BN_window_bits_for_ctime_exponent_size(b) \
  741. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  742. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  743. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  744. #define BN_window_bits_for_ctime_exponent_size(b) \
  745. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  746. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  747. #endif
  748. /* Given a pointer value, compute the next address that is a cache line
  749. * multiple. */
  750. #define MOD_EXP_CTIME_ALIGN(x_) \
  751. ((unsigned char *)(x_) + \
  752. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  753. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  754. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  755. * precomputation memory layout to limit data-dependency to a minimum
  756. * to protect secret exponents (cf. the hyper-threading timing attacks
  757. * pointed out by Colin Percival,
  758. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  759. */
  760. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  761. const BIGNUM *m, BN_CTX *ctx,
  762. const BN_MONT_CTX *mont) {
  763. int i, bits, ret = 0, window, wvalue;
  764. int top;
  765. BN_MONT_CTX *new_mont = NULL;
  766. int numPowers;
  767. unsigned char *powerbufFree = NULL;
  768. int powerbufLen = 0;
  769. unsigned char *powerbuf = NULL;
  770. BIGNUM tmp, am;
  771. if (!BN_is_odd(m)) {
  772. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  773. return 0;
  774. }
  775. top = m->top;
  776. bits = BN_num_bits(p);
  777. if (bits == 0) {
  778. /* x**0 mod 1 is still zero. */
  779. if (BN_is_one(m)) {
  780. BN_zero(rr);
  781. return 1;
  782. }
  783. return BN_one(rr);
  784. }
  785. BN_CTX_start(ctx);
  786. /* Allocate a montgomery context if it was not supplied by the caller. */
  787. if (mont == NULL) {
  788. new_mont = BN_MONT_CTX_new();
  789. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  790. goto err;
  791. }
  792. mont = new_mont;
  793. }
  794. #ifdef RSAZ_ENABLED
  795. /* If the size of the operands allow it, perform the optimized
  796. * RSAZ exponentiation. For further information see
  797. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  798. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  799. rsaz_avx2_eligible()) {
  800. if (NULL == bn_wexpand(rr, 16)) {
  801. goto err;
  802. }
  803. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  804. rr->top = 16;
  805. rr->neg = 0;
  806. bn_correct_top(rr);
  807. ret = 1;
  808. goto err;
  809. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  810. if (NULL == bn_wexpand(rr, 8)) {
  811. goto err;
  812. }
  813. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  814. rr->top = 8;
  815. rr->neg = 0;
  816. bn_correct_top(rr);
  817. ret = 1;
  818. goto err;
  819. }
  820. #endif
  821. /* Get the window size to use with size of p. */
  822. window = BN_window_bits_for_ctime_exponent_size(bits);
  823. #if defined(OPENSSL_BN_ASM_MONT5)
  824. if (window >= 5) {
  825. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  826. if ((top & 7) == 0) {
  827. powerbufLen += 2 * top * sizeof(m->d[0]);
  828. }
  829. }
  830. #endif
  831. /* Allocate a buffer large enough to hold all of the pre-computed
  832. * powers of am, am itself and tmp.
  833. */
  834. numPowers = 1 << window;
  835. powerbufLen +=
  836. sizeof(m->d[0]) *
  837. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  838. #ifdef alloca
  839. if (powerbufLen < 3072) {
  840. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  841. } else
  842. #endif
  843. {
  844. if ((powerbufFree = OPENSSL_malloc(
  845. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
  846. goto err;
  847. }
  848. }
  849. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  850. memset(powerbuf, 0, powerbufLen);
  851. #ifdef alloca
  852. if (powerbufLen < 3072) {
  853. powerbufFree = NULL;
  854. }
  855. #endif
  856. /* lay down tmp and am right after powers table */
  857. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  858. am.d = tmp.d + top;
  859. tmp.top = am.top = 0;
  860. tmp.dmax = am.dmax = top;
  861. tmp.neg = am.neg = 0;
  862. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  863. /* prepare a^0 in Montgomery domain */
  864. /* by Shay Gueron's suggestion */
  865. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  866. /* 2^(top*BN_BITS2) - m */
  867. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  868. for (i = 1; i < top; i++) {
  869. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  870. }
  871. tmp.top = top;
  872. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
  873. goto err;
  874. }
  875. /* prepare a^1 in Montgomery domain */
  876. if (a->neg || BN_ucmp(a, m) >= 0) {
  877. if (!BN_mod(&am, a, m, ctx) ||
  878. !BN_to_montgomery(&am, &am, mont, ctx)) {
  879. goto err;
  880. }
  881. } else if (!BN_to_montgomery(&am, a, mont, ctx)) {
  882. goto err;
  883. }
  884. #if defined(OPENSSL_BN_ASM_MONT5)
  885. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  886. * specifically optimization of cache-timing attack countermeasures
  887. * and pre-computation optimization. */
  888. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  889. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  890. if (window == 5 && top > 1) {
  891. const BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  892. /* BN_to_montgomery can contaminate words above .top
  893. * [in BN_DEBUG[_DEBUG] build]... */
  894. for (i = am.top; i < top; i++) {
  895. am.d[i] = 0;
  896. }
  897. for (i = tmp.top; i < top; i++) {
  898. tmp.d[i] = 0;
  899. }
  900. if (top & 7) {
  901. np2 = np;
  902. } else {
  903. BN_ULONG *np_double = am.d + top;
  904. for (i = 0; i < top; i++) {
  905. np_double[2 * i] = np[i];
  906. }
  907. np2 = np_double;
  908. }
  909. bn_scatter5(tmp.d, top, powerbuf, 0);
  910. bn_scatter5(am.d, am.top, powerbuf, 1);
  911. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  912. bn_scatter5(tmp.d, top, powerbuf, 2);
  913. /* same as above, but uses squaring for 1/2 of operations */
  914. for (i = 4; i < 32; i *= 2) {
  915. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  916. bn_scatter5(tmp.d, top, powerbuf, i);
  917. }
  918. for (i = 3; i < 8; i += 2) {
  919. int j;
  920. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  921. bn_scatter5(tmp.d, top, powerbuf, i);
  922. for (j = 2 * i; j < 32; j *= 2) {
  923. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  924. bn_scatter5(tmp.d, top, powerbuf, j);
  925. }
  926. }
  927. for (; i < 16; i += 2) {
  928. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  929. bn_scatter5(tmp.d, top, powerbuf, i);
  930. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  931. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  932. }
  933. for (; i < 32; i += 2) {
  934. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  935. bn_scatter5(tmp.d, top, powerbuf, i);
  936. }
  937. bits--;
  938. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
  939. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  940. }
  941. bn_gather5(tmp.d, top, powerbuf, wvalue);
  942. /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  943. * that has not been read yet.) */
  944. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  945. /* Scan the exponent one window at a time starting from the most
  946. * significant bits.
  947. */
  948. if (top & 7) {
  949. while (bits >= 0) {
  950. for (wvalue = 0, i = 0; i < 5; i++, bits--) {
  951. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  952. }
  953. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  954. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  955. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  956. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  957. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  958. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  959. }
  960. } else {
  961. const uint8_t *p_bytes = (const uint8_t *)p->d;
  962. int max_bits = p->top * BN_BITS2;
  963. assert(bits < max_bits);
  964. /* |p = 0| has been handled as a special case, so |max_bits| is at least
  965. * one word. */
  966. assert(max_bits >= 64);
  967. /* If the first bit to be read lands in the last byte, unroll the first
  968. * iteration to avoid reading past the bounds of |p->d|. (After the first
  969. * iteration, we are guaranteed to be past the last byte.) Note |bits|
  970. * here is the top bit, inclusive. */
  971. if (bits - 4 >= max_bits - 8) {
  972. /* Read five bits from |bits-4| through |bits|, inclusive. */
  973. wvalue = p_bytes[p->top * BN_BYTES - 1];
  974. wvalue >>= (bits - 4) & 7;
  975. wvalue &= 0x1f;
  976. bits -= 5;
  977. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  978. }
  979. while (bits >= 0) {
  980. /* Read five bits from |bits-4| through |bits|, inclusive. */
  981. int first_bit = bits - 4;
  982. wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
  983. wvalue >>= first_bit & 7;
  984. wvalue &= 0x1f;
  985. bits -= 5;
  986. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  987. }
  988. }
  989. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  990. tmp.top = top;
  991. bn_correct_top(&tmp);
  992. if (ret) {
  993. if (!BN_copy(rr, &tmp)) {
  994. ret = 0;
  995. }
  996. goto err; /* non-zero ret means it's not error */
  997. }
  998. } else
  999. #endif
  1000. {
  1001. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) ||
  1002. !copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) {
  1003. goto err;
  1004. }
  1005. /* If the window size is greater than 1, then calculate
  1006. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  1007. * (even powers could instead be computed as (a^(i/2))^2
  1008. * to use the slight performance advantage of sqr over mul).
  1009. */
  1010. if (window > 1) {
  1011. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
  1012. !copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) {
  1013. goto err;
  1014. }
  1015. for (i = 3; i < numPowers; i++) {
  1016. /* Calculate a^i = a^(i-1) * a */
  1017. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
  1018. !copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) {
  1019. goto err;
  1020. }
  1021. }
  1022. }
  1023. bits--;
  1024. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
  1025. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1026. }
  1027. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) {
  1028. goto err;
  1029. }
  1030. /* Scan the exponent one window at a time starting from the most
  1031. * significant bits.
  1032. */
  1033. while (bits >= 0) {
  1034. wvalue = 0; /* The 'value' of the window */
  1035. /* Scan the window, squaring the result as we go */
  1036. for (i = 0; i < window; i++, bits--) {
  1037. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
  1038. goto err;
  1039. }
  1040. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1041. }
  1042. /* Fetch the appropriate pre-computed value from the pre-buf */
  1043. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) {
  1044. goto err;
  1045. }
  1046. /* Multiply the result into the intermediate result */
  1047. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
  1048. goto err;
  1049. }
  1050. }
  1051. }
  1052. /* Convert the final result from montgomery to standard format */
  1053. if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
  1054. goto err;
  1055. }
  1056. ret = 1;
  1057. err:
  1058. BN_MONT_CTX_free(new_mont);
  1059. if (powerbuf != NULL) {
  1060. OPENSSL_cleanse(powerbuf, powerbufLen);
  1061. OPENSSL_free(powerbufFree);
  1062. }
  1063. BN_CTX_end(ctx);
  1064. return (ret);
  1065. }
  1066. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1067. const BIGNUM *m, BN_CTX *ctx,
  1068. const BN_MONT_CTX *mont) {
  1069. BN_MONT_CTX *new_mont = NULL;
  1070. int b, bits, ret = 0;
  1071. int r_is_one;
  1072. BN_ULONG w, next_w;
  1073. BIGNUM *d, *r, *t;
  1074. BIGNUM *swap_tmp;
  1075. #define BN_MOD_MUL_WORD(r, w, m) \
  1076. (BN_mul_word(r, (w)) && \
  1077. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1078. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1079. /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1080. * probably more overhead than always using BN_mod (which uses BN_copy if a
  1081. * similar test returns true). We can use BN_mod and do not need BN_nnmod
  1082. * because our accumulator is never negative (the result of BN_mod does not
  1083. * depend on the sign of the modulus). */
  1084. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1085. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1086. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1087. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1088. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1089. return 0;
  1090. }
  1091. if (!BN_is_odd(m)) {
  1092. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  1093. return 0;
  1094. }
  1095. if (m->top == 1) {
  1096. a %= m->d[0]; /* make sure that 'a' is reduced */
  1097. }
  1098. bits = BN_num_bits(p);
  1099. if (bits == 0) {
  1100. /* x**0 mod 1 is still zero. */
  1101. if (BN_is_one(m)) {
  1102. BN_zero(rr);
  1103. return 1;
  1104. }
  1105. return BN_one(rr);
  1106. }
  1107. if (a == 0) {
  1108. BN_zero(rr);
  1109. return 1;
  1110. }
  1111. BN_CTX_start(ctx);
  1112. d = BN_CTX_get(ctx);
  1113. r = BN_CTX_get(ctx);
  1114. t = BN_CTX_get(ctx);
  1115. if (d == NULL || r == NULL || t == NULL) {
  1116. goto err;
  1117. }
  1118. /* Allocate a montgomery context if it was not supplied by the caller. */
  1119. if (mont == NULL) {
  1120. new_mont = BN_MONT_CTX_new();
  1121. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1122. goto err;
  1123. }
  1124. mont = new_mont;
  1125. }
  1126. r_is_one = 1; /* except for Montgomery factor */
  1127. /* bits-1 >= 0 */
  1128. /* The result is accumulated in the product r*w. */
  1129. w = a; /* bit 'bits-1' of 'p' is always set */
  1130. for (b = bits - 2; b >= 0; b--) {
  1131. /* First, square r*w. */
  1132. next_w = w * w;
  1133. if ((next_w / w) != w) {
  1134. /* overflow */
  1135. if (r_is_one) {
  1136. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1137. goto err;
  1138. }
  1139. r_is_one = 0;
  1140. } else {
  1141. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1142. goto err;
  1143. }
  1144. }
  1145. next_w = 1;
  1146. }
  1147. w = next_w;
  1148. if (!r_is_one) {
  1149. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1150. goto err;
  1151. }
  1152. }
  1153. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1154. if (BN_is_bit_set(p, b)) {
  1155. next_w = w * a;
  1156. if ((next_w / a) != w) {
  1157. /* overflow */
  1158. if (r_is_one) {
  1159. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1160. goto err;
  1161. }
  1162. r_is_one = 0;
  1163. } else {
  1164. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1165. goto err;
  1166. }
  1167. }
  1168. next_w = a;
  1169. }
  1170. w = next_w;
  1171. }
  1172. }
  1173. /* Finally, set r:=r*w. */
  1174. if (w != 1) {
  1175. if (r_is_one) {
  1176. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1177. goto err;
  1178. }
  1179. r_is_one = 0;
  1180. } else {
  1181. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1182. goto err;
  1183. }
  1184. }
  1185. }
  1186. if (r_is_one) {
  1187. /* can happen only if a == 1*/
  1188. if (!BN_one(rr)) {
  1189. goto err;
  1190. }
  1191. } else {
  1192. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1193. goto err;
  1194. }
  1195. }
  1196. ret = 1;
  1197. err:
  1198. BN_MONT_CTX_free(new_mont);
  1199. BN_CTX_end(ctx);
  1200. return ret;
  1201. }
  1202. #define TABLE_SIZE 32
  1203. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1204. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1205. BN_CTX *ctx, const BN_MONT_CTX *mont) {
  1206. int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2,
  1207. wvalue1, wvalue2;
  1208. int r_is_one = 1;
  1209. BIGNUM *d, *r;
  1210. const BIGNUM *a_mod_m;
  1211. /* Tables of variables obtained from 'ctx' */
  1212. BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
  1213. BN_MONT_CTX *new_mont = NULL;
  1214. if (!(m->d[0] & 1)) {
  1215. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  1216. return 0;
  1217. }
  1218. bits1 = BN_num_bits(p1);
  1219. bits2 = BN_num_bits(p2);
  1220. if (bits1 == 0 && bits2 == 0) {
  1221. ret = BN_one(rr);
  1222. return ret;
  1223. }
  1224. bits = (bits1 > bits2) ? bits1 : bits2;
  1225. BN_CTX_start(ctx);
  1226. d = BN_CTX_get(ctx);
  1227. r = BN_CTX_get(ctx);
  1228. val1[0] = BN_CTX_get(ctx);
  1229. val2[0] = BN_CTX_get(ctx);
  1230. if (!d || !r || !val1[0] || !val2[0]) {
  1231. goto err;
  1232. }
  1233. /* Allocate a montgomery context if it was not supplied by the caller. */
  1234. if (mont == NULL) {
  1235. new_mont = BN_MONT_CTX_new();
  1236. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1237. goto err;
  1238. }
  1239. mont = new_mont;
  1240. }
  1241. window1 = BN_window_bits_for_exponent_size(bits1);
  1242. window2 = BN_window_bits_for_exponent_size(bits2);
  1243. /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 ..
  1244. * 2^(window1-1) */
  1245. if (a1->neg || BN_ucmp(a1, m) >= 0) {
  1246. if (!BN_mod(val1[0], a1, m, ctx)) {
  1247. goto err;
  1248. }
  1249. a_mod_m = val1[0];
  1250. } else {
  1251. a_mod_m = a1;
  1252. }
  1253. if (BN_is_zero(a_mod_m)) {
  1254. BN_zero(rr);
  1255. ret = 1;
  1256. goto err;
  1257. }
  1258. if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
  1259. goto err;
  1260. }
  1261. if (window1 > 1) {
  1262. if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
  1263. goto err;
  1264. }
  1265. j = 1 << (window1 - 1);
  1266. for (i = 1; i < j; i++) {
  1267. if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
  1268. !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) {
  1269. goto err;
  1270. }
  1271. }
  1272. }
  1273. /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 ..
  1274. * 2^(window2-1) */
  1275. if (a2->neg || BN_ucmp(a2, m) >= 0) {
  1276. if (!BN_mod(val2[0], a2, m, ctx)) {
  1277. goto err;
  1278. }
  1279. a_mod_m = val2[0];
  1280. } else {
  1281. a_mod_m = a2;
  1282. }
  1283. if (BN_is_zero(a_mod_m)) {
  1284. BN_zero(rr);
  1285. ret = 1;
  1286. goto err;
  1287. }
  1288. if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
  1289. goto err;
  1290. }
  1291. if (window2 > 1) {
  1292. if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
  1293. goto err;
  1294. }
  1295. j = 1 << (window2 - 1);
  1296. for (i = 1; i < j; i++) {
  1297. if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
  1298. !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) {
  1299. goto err;
  1300. }
  1301. }
  1302. }
  1303. /* Now compute the power product, using independent windows. */
  1304. r_is_one = 1;
  1305. wvalue1 = 0; /* The 'value' of the first window */
  1306. wvalue2 = 0; /* The 'value' of the second window */
  1307. wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
  1308. wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
  1309. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  1310. goto err;
  1311. }
  1312. for (b = bits - 1; b >= 0; b--) {
  1313. if (!r_is_one) {
  1314. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1315. goto err;
  1316. }
  1317. }
  1318. if (!wvalue1 && BN_is_bit_set(p1, b)) {
  1319. /* consider bits b-window1+1 .. b for this window */
  1320. i = b - window1 + 1;
  1321. /* works for i<0 */
  1322. while (!BN_is_bit_set(p1, i)) {
  1323. i++;
  1324. }
  1325. wpos1 = i;
  1326. wvalue1 = 1;
  1327. for (i = b - 1; i >= wpos1; i--) {
  1328. wvalue1 <<= 1;
  1329. if (BN_is_bit_set(p1, i)) {
  1330. wvalue1++;
  1331. }
  1332. }
  1333. }
  1334. if (!wvalue2 && BN_is_bit_set(p2, b)) {
  1335. /* consider bits b-window2+1 .. b for this window */
  1336. i = b - window2 + 1;
  1337. while (!BN_is_bit_set(p2, i)) {
  1338. i++;
  1339. }
  1340. wpos2 = i;
  1341. wvalue2 = 1;
  1342. for (i = b - 1; i >= wpos2; i--) {
  1343. wvalue2 <<= 1;
  1344. if (BN_is_bit_set(p2, i)) {
  1345. wvalue2++;
  1346. }
  1347. }
  1348. }
  1349. if (wvalue1 && b == wpos1) {
  1350. /* wvalue1 is odd and < 2^window1 */
  1351. if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) {
  1352. goto err;
  1353. }
  1354. wvalue1 = 0;
  1355. r_is_one = 0;
  1356. }
  1357. if (wvalue2 && b == wpos2) {
  1358. /* wvalue2 is odd and < 2^window2 */
  1359. if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) {
  1360. goto err;
  1361. }
  1362. wvalue2 = 0;
  1363. r_is_one = 0;
  1364. }
  1365. }
  1366. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1367. goto err;
  1368. }
  1369. ret = 1;
  1370. err:
  1371. BN_MONT_CTX_free(new_mont);
  1372. BN_CTX_end(ctx);
  1373. return ret;
  1374. }