gcm.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063
  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/base.h>
  49. #include <assert.h>
  50. #include <string.h>
  51. #include <openssl/mem.h>
  52. #include <openssl/cpu.h>
  53. #include "internal.h"
  54. #include "../../internal.h"
  55. #if !defined(OPENSSL_NO_ASM) && \
  56. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
  57. defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
  58. defined(OPENSSL_PPC64LE))
  59. #define GHASH_ASM
  60. #endif
  61. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  62. #define REDUCE1BIT(V) \
  63. do { \
  64. if (sizeof(size_t) == 8) { \
  65. uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
  66. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  67. (V).hi = ((V).hi >> 1) ^ T; \
  68. } else { \
  69. uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
  70. (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
  71. (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
  72. } \
  73. } while (0)
  74. // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
  75. // bits of a |size_t|.
  76. static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
  77. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  78. u128 V;
  79. Htable[0].hi = 0;
  80. Htable[0].lo = 0;
  81. V.hi = H[0];
  82. V.lo = H[1];
  83. Htable[8] = V;
  84. REDUCE1BIT(V);
  85. Htable[4] = V;
  86. REDUCE1BIT(V);
  87. Htable[2] = V;
  88. REDUCE1BIT(V);
  89. Htable[1] = V;
  90. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  91. V = Htable[4];
  92. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  93. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  94. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  95. V = Htable[8];
  96. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  97. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  98. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  99. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  100. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  101. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  102. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  103. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  104. for (int j = 0; j < 16; ++j) {
  105. V = Htable[j];
  106. Htable[j].hi = V.lo;
  107. Htable[j].lo = V.hi;
  108. }
  109. #endif
  110. }
  111. #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
  112. static const size_t rem_4bit[16] = {
  113. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  114. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  115. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  116. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  117. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  118. u128 Z;
  119. int cnt = 15;
  120. size_t rem, nlo, nhi;
  121. nlo = ((const uint8_t *)Xi)[15];
  122. nhi = nlo >> 4;
  123. nlo &= 0xf;
  124. Z.hi = Htable[nlo].hi;
  125. Z.lo = Htable[nlo].lo;
  126. while (1) {
  127. rem = (size_t)Z.lo & 0xf;
  128. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  129. Z.hi = (Z.hi >> 4);
  130. if (sizeof(size_t) == 8) {
  131. Z.hi ^= rem_4bit[rem];
  132. } else {
  133. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  134. }
  135. Z.hi ^= Htable[nhi].hi;
  136. Z.lo ^= Htable[nhi].lo;
  137. if (--cnt < 0) {
  138. break;
  139. }
  140. nlo = ((const uint8_t *)Xi)[cnt];
  141. nhi = nlo >> 4;
  142. nlo &= 0xf;
  143. rem = (size_t)Z.lo & 0xf;
  144. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  145. Z.hi = (Z.hi >> 4);
  146. if (sizeof(size_t) == 8) {
  147. Z.hi ^= rem_4bit[rem];
  148. } else {
  149. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  150. }
  151. Z.hi ^= Htable[nlo].hi;
  152. Z.lo ^= Htable[nlo].lo;
  153. }
  154. Xi[0] = CRYPTO_bswap8(Z.hi);
  155. Xi[1] = CRYPTO_bswap8(Z.lo);
  156. }
  157. // Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  158. // details... Compiler-generated code doesn't seem to give any
  159. // performance improvement, at least not on x86[_64]. It's here
  160. // mostly as reference and a placeholder for possible future
  161. // non-trivial optimization[s]...
  162. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
  163. const uint8_t *inp, size_t len) {
  164. u128 Z;
  165. int cnt;
  166. size_t rem, nlo, nhi;
  167. do {
  168. cnt = 15;
  169. nlo = ((const uint8_t *)Xi)[15];
  170. nlo ^= inp[15];
  171. nhi = nlo >> 4;
  172. nlo &= 0xf;
  173. Z.hi = Htable[nlo].hi;
  174. Z.lo = Htable[nlo].lo;
  175. while (1) {
  176. rem = (size_t)Z.lo & 0xf;
  177. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  178. Z.hi = (Z.hi >> 4);
  179. if (sizeof(size_t) == 8) {
  180. Z.hi ^= rem_4bit[rem];
  181. } else {
  182. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  183. }
  184. Z.hi ^= Htable[nhi].hi;
  185. Z.lo ^= Htable[nhi].lo;
  186. if (--cnt < 0) {
  187. break;
  188. }
  189. nlo = ((const uint8_t *)Xi)[cnt];
  190. nlo ^= inp[cnt];
  191. nhi = nlo >> 4;
  192. nlo &= 0xf;
  193. rem = (size_t)Z.lo & 0xf;
  194. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  195. Z.hi = (Z.hi >> 4);
  196. if (sizeof(size_t) == 8) {
  197. Z.hi ^= rem_4bit[rem];
  198. } else {
  199. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  200. }
  201. Z.hi ^= Htable[nlo].hi;
  202. Z.lo ^= Htable[nlo].lo;
  203. }
  204. Xi[0] = CRYPTO_bswap8(Z.hi);
  205. Xi[1] = CRYPTO_bswap8(Z.lo);
  206. } while (inp += 16, len -= 16);
  207. }
  208. #else // GHASH_ASM
  209. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  210. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  211. size_t len);
  212. #endif
  213. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
  214. #if defined(GHASH_ASM)
  215. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  216. // GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  217. // trashing effect. In other words idea is to hash data while it's
  218. // still in L1 cache after encryption pass...
  219. #define GHASH_CHUNK (3 * 1024)
  220. #endif
  221. #if defined(GHASH_ASM)
  222. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  223. #define GCM_FUNCREF_4BIT
  224. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  225. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  226. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  227. size_t len);
  228. #if defined(OPENSSL_X86_64)
  229. #define GHASH_ASM_X86_64
  230. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  231. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  232. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
  233. size_t len);
  234. #define AESNI_GCM
  235. size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
  236. const void *key, uint8_t ivec[16], uint64_t *Xi);
  237. size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
  238. const void *key, uint8_t ivec[16], uint64_t *Xi);
  239. #endif
  240. #if defined(OPENSSL_X86)
  241. #define GHASH_ASM_X86
  242. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  243. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  244. size_t len);
  245. #endif
  246. #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
  247. #include <openssl/arm_arch.h>
  248. #if __ARM_ARCH__ >= 7
  249. #define GHASH_ASM_ARM
  250. #define GCM_FUNCREF_4BIT
  251. static int pmull_capable(void) {
  252. return CRYPTO_is_ARMv8_PMULL_capable();
  253. }
  254. void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
  255. void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
  256. void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  257. size_t len);
  258. #if defined(OPENSSL_ARM)
  259. // 32-bit ARM also has support for doing GCM with NEON instructions.
  260. static int neon_capable(void) {
  261. return CRYPTO_is_NEON_capable();
  262. }
  263. void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
  264. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  265. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  266. size_t len);
  267. #else
  268. // AArch64 only has the ARMv8 versions of functions.
  269. static int neon_capable(void) {
  270. return 0;
  271. }
  272. static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
  273. abort();
  274. }
  275. static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
  276. abort();
  277. }
  278. static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
  279. const uint8_t *inp, size_t len) {
  280. abort();
  281. }
  282. #endif
  283. #endif
  284. #elif defined(OPENSSL_PPC64LE)
  285. #define GHASH_ASM_PPC64LE
  286. #define GCM_FUNCREF_4BIT
  287. void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
  288. void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
  289. void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  290. size_t len);
  291. #endif
  292. #endif
  293. #ifdef GCM_FUNCREF_4BIT
  294. #undef GCM_MUL
  295. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
  296. #ifdef GHASH
  297. #undef GHASH
  298. #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
  299. #endif
  300. #endif
  301. void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
  302. u128 *out_key, u128 out_table[16],
  303. int *out_is_avx,
  304. const uint8_t *gcm_key) {
  305. *out_is_avx = 0;
  306. union {
  307. uint64_t u[2];
  308. uint8_t c[16];
  309. } H;
  310. OPENSSL_memcpy(H.c, gcm_key, 16);
  311. // H is stored in host byte order
  312. H.u[0] = CRYPTO_bswap8(H.u[0]);
  313. H.u[1] = CRYPTO_bswap8(H.u[1]);
  314. OPENSSL_memcpy(out_key, H.c, 16);
  315. #if defined(GHASH_ASM_X86_64)
  316. if (crypto_gcm_clmul_enabled()) {
  317. if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
  318. gcm_init_avx(out_table, H.u);
  319. *out_mult = gcm_gmult_avx;
  320. *out_hash = gcm_ghash_avx;
  321. *out_is_avx = 1;
  322. return;
  323. }
  324. gcm_init_clmul(out_table, H.u);
  325. *out_mult = gcm_gmult_clmul;
  326. *out_hash = gcm_ghash_clmul;
  327. return;
  328. }
  329. #elif defined(GHASH_ASM_X86)
  330. if (crypto_gcm_clmul_enabled()) {
  331. gcm_init_clmul(out_table, H.u);
  332. *out_mult = gcm_gmult_clmul;
  333. *out_hash = gcm_ghash_clmul;
  334. return;
  335. }
  336. #elif defined(GHASH_ASM_ARM)
  337. if (pmull_capable()) {
  338. gcm_init_v8(out_table, H.u);
  339. *out_mult = gcm_gmult_v8;
  340. *out_hash = gcm_ghash_v8;
  341. return;
  342. }
  343. if (neon_capable()) {
  344. gcm_init_neon(out_table, H.u);
  345. *out_mult = gcm_gmult_neon;
  346. *out_hash = gcm_ghash_neon;
  347. return;
  348. }
  349. #elif defined(GHASH_ASM_PPC64LE)
  350. if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
  351. gcm_init_p8(out_table, H.u);
  352. *out_mult = gcm_gmult_p8;
  353. *out_hash = gcm_ghash_p8;
  354. return;
  355. }
  356. #endif
  357. gcm_init_4bit(out_table, H.u);
  358. #if defined(GHASH_ASM_X86)
  359. *out_mult = gcm_gmult_4bit_mmx;
  360. *out_hash = gcm_ghash_4bit_mmx;
  361. #else
  362. *out_mult = gcm_gmult_4bit;
  363. *out_hash = gcm_ghash_4bit;
  364. #endif
  365. }
  366. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
  367. block128_f block, int is_aesni_encrypt) {
  368. OPENSSL_memset(ctx, 0, sizeof(*ctx));
  369. ctx->block = block;
  370. uint8_t gcm_key[16];
  371. OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
  372. (*block)(gcm_key, gcm_key, aes_key);
  373. int is_avx;
  374. CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx,
  375. gcm_key);
  376. ctx->use_aesni_gcm_crypt = (is_avx && is_aesni_encrypt) ? 1 : 0;
  377. }
  378. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
  379. const uint8_t *iv, size_t len) {
  380. unsigned int ctr;
  381. #ifdef GCM_FUNCREF_4BIT
  382. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  383. #endif
  384. ctx->Yi.u[0] = 0;
  385. ctx->Yi.u[1] = 0;
  386. ctx->Xi.u[0] = 0;
  387. ctx->Xi.u[1] = 0;
  388. ctx->len.u[0] = 0; // AAD length
  389. ctx->len.u[1] = 0; // message length
  390. ctx->ares = 0;
  391. ctx->mres = 0;
  392. if (len == 12) {
  393. OPENSSL_memcpy(ctx->Yi.c, iv, 12);
  394. ctx->Yi.c[15] = 1;
  395. ctr = 1;
  396. } else {
  397. uint64_t len0 = len;
  398. while (len >= 16) {
  399. for (size_t i = 0; i < 16; ++i) {
  400. ctx->Yi.c[i] ^= iv[i];
  401. }
  402. GCM_MUL(ctx, Yi);
  403. iv += 16;
  404. len -= 16;
  405. }
  406. if (len) {
  407. for (size_t i = 0; i < len; ++i) {
  408. ctx->Yi.c[i] ^= iv[i];
  409. }
  410. GCM_MUL(ctx, Yi);
  411. }
  412. len0 <<= 3;
  413. ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
  414. GCM_MUL(ctx, Yi);
  415. ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
  416. }
  417. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
  418. ++ctr;
  419. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  420. }
  421. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  422. unsigned int n;
  423. uint64_t alen = ctx->len.u[0];
  424. #ifdef GCM_FUNCREF_4BIT
  425. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  426. #ifdef GHASH
  427. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  428. size_t len) = ctx->ghash;
  429. #endif
  430. #endif
  431. if (ctx->len.u[1]) {
  432. return 0;
  433. }
  434. alen += len;
  435. if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  436. return 0;
  437. }
  438. ctx->len.u[0] = alen;
  439. n = ctx->ares;
  440. if (n) {
  441. while (n && len) {
  442. ctx->Xi.c[n] ^= *(aad++);
  443. --len;
  444. n = (n + 1) % 16;
  445. }
  446. if (n == 0) {
  447. GCM_MUL(ctx, Xi);
  448. } else {
  449. ctx->ares = n;
  450. return 1;
  451. }
  452. }
  453. // Process a whole number of blocks.
  454. #ifdef GHASH
  455. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  456. if (len_blocks != 0) {
  457. GHASH(ctx, aad, len_blocks);
  458. aad += len_blocks;
  459. len -= len_blocks;
  460. }
  461. #else
  462. while (len >= 16) {
  463. for (size_t i = 0; i < 16; ++i) {
  464. ctx->Xi.c[i] ^= aad[i];
  465. }
  466. GCM_MUL(ctx, Xi);
  467. aad += 16;
  468. len -= 16;
  469. }
  470. #endif
  471. // Process the remainder.
  472. if (len != 0) {
  473. n = (unsigned int)len;
  474. for (size_t i = 0; i < len; ++i) {
  475. ctx->Xi.c[i] ^= aad[i];
  476. }
  477. }
  478. ctx->ares = n;
  479. return 1;
  480. }
  481. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
  482. const uint8_t *in, uint8_t *out, size_t len) {
  483. unsigned int n, ctr;
  484. uint64_t mlen = ctx->len.u[1];
  485. block128_f block = ctx->block;
  486. #ifdef GCM_FUNCREF_4BIT
  487. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  488. #ifdef GHASH
  489. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  490. size_t len) = ctx->ghash;
  491. #endif
  492. #endif
  493. mlen += len;
  494. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  495. (sizeof(len) == 8 && mlen < len)) {
  496. return 0;
  497. }
  498. ctx->len.u[1] = mlen;
  499. if (ctx->ares) {
  500. // First call to encrypt finalizes GHASH(AAD)
  501. GCM_MUL(ctx, Xi);
  502. ctx->ares = 0;
  503. }
  504. ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
  505. n = ctx->mres;
  506. if (n) {
  507. while (n && len) {
  508. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  509. --len;
  510. n = (n + 1) % 16;
  511. }
  512. if (n == 0) {
  513. GCM_MUL(ctx, Xi);
  514. } else {
  515. ctx->mres = n;
  516. return 1;
  517. }
  518. }
  519. if (STRICT_ALIGNMENT &&
  520. ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
  521. for (size_t i = 0; i < len; ++i) {
  522. if (n == 0) {
  523. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  524. ++ctr;
  525. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  526. }
  527. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  528. n = (n + 1) % 16;
  529. if (n == 0) {
  530. GCM_MUL(ctx, Xi);
  531. }
  532. }
  533. ctx->mres = n;
  534. return 1;
  535. }
  536. #if defined(GHASH) && defined(GHASH_CHUNK)
  537. while (len >= GHASH_CHUNK) {
  538. size_t j = GHASH_CHUNK;
  539. while (j) {
  540. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  541. ++ctr;
  542. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  543. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  544. store_word_le(out + i,
  545. load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
  546. }
  547. out += 16;
  548. in += 16;
  549. j -= 16;
  550. }
  551. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  552. len -= GHASH_CHUNK;
  553. }
  554. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  555. if (len_blocks != 0) {
  556. while (len >= 16) {
  557. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  558. ++ctr;
  559. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  560. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  561. store_word_le(out + i,
  562. load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
  563. }
  564. out += 16;
  565. in += 16;
  566. len -= 16;
  567. }
  568. GHASH(ctx, out - len_blocks, len_blocks);
  569. }
  570. #else
  571. while (len >= 16) {
  572. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  573. ++ctr;
  574. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  575. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  576. size_t tmp = load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)];
  577. store_word_le(out + i, tmp);
  578. ctx->Xi.t[i / sizeof(size_t)] ^= tmp;
  579. }
  580. GCM_MUL(ctx, Xi);
  581. out += 16;
  582. in += 16;
  583. len -= 16;
  584. }
  585. #endif
  586. if (len) {
  587. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  588. ++ctr;
  589. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  590. while (len--) {
  591. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  592. ++n;
  593. }
  594. }
  595. ctx->mres = n;
  596. return 1;
  597. }
  598. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
  599. const unsigned char *in, unsigned char *out,
  600. size_t len) {
  601. unsigned int n, ctr;
  602. uint64_t mlen = ctx->len.u[1];
  603. block128_f block = ctx->block;
  604. #ifdef GCM_FUNCREF_4BIT
  605. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  606. #ifdef GHASH
  607. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  608. size_t len) = ctx->ghash;
  609. #endif
  610. #endif
  611. mlen += len;
  612. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  613. (sizeof(len) == 8 && mlen < len)) {
  614. return 0;
  615. }
  616. ctx->len.u[1] = mlen;
  617. if (ctx->ares) {
  618. // First call to decrypt finalizes GHASH(AAD)
  619. GCM_MUL(ctx, Xi);
  620. ctx->ares = 0;
  621. }
  622. ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
  623. n = ctx->mres;
  624. if (n) {
  625. while (n && len) {
  626. uint8_t c = *(in++);
  627. *(out++) = c ^ ctx->EKi.c[n];
  628. ctx->Xi.c[n] ^= c;
  629. --len;
  630. n = (n + 1) % 16;
  631. }
  632. if (n == 0) {
  633. GCM_MUL(ctx, Xi);
  634. } else {
  635. ctx->mres = n;
  636. return 1;
  637. }
  638. }
  639. if (STRICT_ALIGNMENT &&
  640. ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
  641. for (size_t i = 0; i < len; ++i) {
  642. uint8_t c;
  643. if (n == 0) {
  644. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  645. ++ctr;
  646. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  647. }
  648. c = in[i];
  649. out[i] = c ^ ctx->EKi.c[n];
  650. ctx->Xi.c[n] ^= c;
  651. n = (n + 1) % 16;
  652. if (n == 0) {
  653. GCM_MUL(ctx, Xi);
  654. }
  655. }
  656. ctx->mres = n;
  657. return 1;
  658. }
  659. #if defined(GHASH) && defined(GHASH_CHUNK)
  660. while (len >= GHASH_CHUNK) {
  661. size_t j = GHASH_CHUNK;
  662. GHASH(ctx, in, GHASH_CHUNK);
  663. while (j) {
  664. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  665. ++ctr;
  666. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  667. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  668. store_word_le(out + i,
  669. load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
  670. }
  671. out += 16;
  672. in += 16;
  673. j -= 16;
  674. }
  675. len -= GHASH_CHUNK;
  676. }
  677. size_t len_blocks = len & kSizeTWithoutLower4Bits;
  678. if (len_blocks != 0) {
  679. GHASH(ctx, in, len_blocks);
  680. while (len >= 16) {
  681. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  682. ++ctr;
  683. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  684. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  685. store_word_le(out + i,
  686. load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
  687. }
  688. out += 16;
  689. in += 16;
  690. len -= 16;
  691. }
  692. }
  693. #else
  694. while (len >= 16) {
  695. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  696. ++ctr;
  697. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  698. for (size_t i = 0; i < 16; i += sizeof(size_t)) {
  699. size_t c = load_word_le(in + i);
  700. store_word_le(out + i, c ^ ctx->EKi.t[i / sizeof(size_t)]);
  701. ctx->Xi.t[i / sizeof(size_t)] ^= c;
  702. }
  703. GCM_MUL(ctx, Xi);
  704. out += 16;
  705. in += 16;
  706. len -= 16;
  707. }
  708. #endif
  709. if (len) {
  710. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  711. ++ctr;
  712. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  713. while (len--) {
  714. uint8_t c = in[n];
  715. ctx->Xi.c[n] ^= c;
  716. out[n] = c ^ ctx->EKi.c[n];
  717. ++n;
  718. }
  719. }
  720. ctx->mres = n;
  721. return 1;
  722. }
  723. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  724. const uint8_t *in, uint8_t *out, size_t len,
  725. ctr128_f stream) {
  726. unsigned int n, ctr;
  727. uint64_t mlen = ctx->len.u[1];
  728. #ifdef GCM_FUNCREF_4BIT
  729. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  730. #ifdef GHASH
  731. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  732. size_t len) = ctx->ghash;
  733. #endif
  734. #endif
  735. mlen += len;
  736. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  737. (sizeof(len) == 8 && mlen < len)) {
  738. return 0;
  739. }
  740. ctx->len.u[1] = mlen;
  741. if (ctx->ares) {
  742. // First call to encrypt finalizes GHASH(AAD)
  743. GCM_MUL(ctx, Xi);
  744. ctx->ares = 0;
  745. }
  746. n = ctx->mres;
  747. if (n) {
  748. while (n && len) {
  749. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  750. --len;
  751. n = (n + 1) % 16;
  752. }
  753. if (n == 0) {
  754. GCM_MUL(ctx, Xi);
  755. } else {
  756. ctx->mres = n;
  757. return 1;
  758. }
  759. }
  760. #if defined(AESNI_GCM)
  761. if (ctx->use_aesni_gcm_crypt) {
  762. // |aesni_gcm_encrypt| may not process all the input given to it. It may
  763. // not process *any* of its input if it is deemed too small.
  764. size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  765. in += bulk;
  766. out += bulk;
  767. len -= bulk;
  768. }
  769. #endif
  770. ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
  771. #if defined(GHASH)
  772. while (len >= GHASH_CHUNK) {
  773. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  774. ctr += GHASH_CHUNK / 16;
  775. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  776. GHASH(ctx, out, GHASH_CHUNK);
  777. out += GHASH_CHUNK;
  778. in += GHASH_CHUNK;
  779. len -= GHASH_CHUNK;
  780. }
  781. #endif
  782. size_t i = len & kSizeTWithoutLower4Bits;
  783. if (i != 0) {
  784. size_t j = i / 16;
  785. (*stream)(in, out, j, key, ctx->Yi.c);
  786. ctr += (unsigned int)j;
  787. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  788. in += i;
  789. len -= i;
  790. #if defined(GHASH)
  791. GHASH(ctx, out, i);
  792. out += i;
  793. #else
  794. while (j--) {
  795. for (i = 0; i < 16; ++i) {
  796. ctx->Xi.c[i] ^= out[i];
  797. }
  798. GCM_MUL(ctx, Xi);
  799. out += 16;
  800. }
  801. #endif
  802. }
  803. if (len) {
  804. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  805. ++ctr;
  806. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  807. while (len--) {
  808. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  809. ++n;
  810. }
  811. }
  812. ctx->mres = n;
  813. return 1;
  814. }
  815. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  816. const uint8_t *in, uint8_t *out, size_t len,
  817. ctr128_f stream) {
  818. unsigned int n, ctr;
  819. uint64_t mlen = ctx->len.u[1];
  820. #ifdef GCM_FUNCREF_4BIT
  821. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  822. #ifdef GHASH
  823. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  824. size_t len) = ctx->ghash;
  825. #endif
  826. #endif
  827. mlen += len;
  828. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  829. (sizeof(len) == 8 && mlen < len)) {
  830. return 0;
  831. }
  832. ctx->len.u[1] = mlen;
  833. if (ctx->ares) {
  834. // First call to decrypt finalizes GHASH(AAD)
  835. GCM_MUL(ctx, Xi);
  836. ctx->ares = 0;
  837. }
  838. n = ctx->mres;
  839. if (n) {
  840. while (n && len) {
  841. uint8_t c = *(in++);
  842. *(out++) = c ^ ctx->EKi.c[n];
  843. ctx->Xi.c[n] ^= c;
  844. --len;
  845. n = (n + 1) % 16;
  846. }
  847. if (n == 0) {
  848. GCM_MUL(ctx, Xi);
  849. } else {
  850. ctx->mres = n;
  851. return 1;
  852. }
  853. }
  854. #if defined(AESNI_GCM)
  855. if (ctx->use_aesni_gcm_crypt) {
  856. // |aesni_gcm_decrypt| may not process all the input given to it. It may
  857. // not process *any* of its input if it is deemed too small.
  858. size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
  859. in += bulk;
  860. out += bulk;
  861. len -= bulk;
  862. }
  863. #endif
  864. ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
  865. #if defined(GHASH)
  866. while (len >= GHASH_CHUNK) {
  867. GHASH(ctx, in, GHASH_CHUNK);
  868. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  869. ctr += GHASH_CHUNK / 16;
  870. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  871. out += GHASH_CHUNK;
  872. in += GHASH_CHUNK;
  873. len -= GHASH_CHUNK;
  874. }
  875. #endif
  876. size_t i = len & kSizeTWithoutLower4Bits;
  877. if (i != 0) {
  878. size_t j = i / 16;
  879. #if defined(GHASH)
  880. GHASH(ctx, in, i);
  881. #else
  882. while (j--) {
  883. size_t k;
  884. for (k = 0; k < 16; ++k) {
  885. ctx->Xi.c[k] ^= in[k];
  886. }
  887. GCM_MUL(ctx, Xi);
  888. in += 16;
  889. }
  890. j = i / 16;
  891. in -= i;
  892. #endif
  893. (*stream)(in, out, j, key, ctx->Yi.c);
  894. ctr += (unsigned int)j;
  895. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  896. out += i;
  897. in += i;
  898. len -= i;
  899. }
  900. if (len) {
  901. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  902. ++ctr;
  903. ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
  904. while (len--) {
  905. uint8_t c = in[n];
  906. ctx->Xi.c[n] ^= c;
  907. out[n] = c ^ ctx->EKi.c[n];
  908. ++n;
  909. }
  910. }
  911. ctx->mres = n;
  912. return 1;
  913. }
  914. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  915. uint64_t alen = ctx->len.u[0] << 3;
  916. uint64_t clen = ctx->len.u[1] << 3;
  917. #ifdef GCM_FUNCREF_4BIT
  918. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  919. #endif
  920. if (ctx->mres || ctx->ares) {
  921. GCM_MUL(ctx, Xi);
  922. }
  923. alen = CRYPTO_bswap8(alen);
  924. clen = CRYPTO_bswap8(clen);
  925. ctx->Xi.u[0] ^= alen;
  926. ctx->Xi.u[1] ^= clen;
  927. GCM_MUL(ctx, Xi);
  928. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  929. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  930. if (tag && len <= sizeof(ctx->Xi)) {
  931. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  932. } else {
  933. return 0;
  934. }
  935. }
  936. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  937. CRYPTO_gcm128_finish(ctx, NULL, 0);
  938. OPENSSL_memcpy(tag, ctx->Xi.c,
  939. len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  940. }
  941. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  942. int crypto_gcm_clmul_enabled(void) {
  943. #ifdef GHASH_ASM
  944. const uint32_t *ia32cap = OPENSSL_ia32cap_get();
  945. return (ia32cap[0] & (1 << 24)) && // check FXSR bit
  946. (ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit
  947. #else
  948. return 0;
  949. #endif
  950. }
  951. #endif