gcm.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/base.h>
  49. #include <assert.h>
  50. #include <string.h>
  51. #include <openssl/mem.h>
  52. #include <openssl/cpu.h>
  53. #include "internal.h"
  54. #if !defined(OPENSSL_NO_ASM) && \
  55. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
  56. defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
  57. #define GHASH_ASM
  58. #endif
  59. #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
  60. /* redefine, because alignment is ensured */
  61. #undef GETU32
  62. #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
  63. #undef PUTU32
  64. #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
  65. #endif
  66. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  67. #define REDUCE1BIT(V) \
  68. do { \
  69. if (sizeof(size_t) == 8) { \
  70. uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V.lo & 1)); \
  71. V.lo = (V.hi << 63) | (V.lo >> 1); \
  72. V.hi = (V.hi >> 1) ^ T; \
  73. } else { \
  74. uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \
  75. V.lo = (V.hi << 63) | (V.lo >> 1); \
  76. V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \
  77. } \
  78. } while (0)
  79. // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
  80. // bits of a |size_t|.
  81. static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
  82. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  83. u128 V;
  84. Htable[0].hi = 0;
  85. Htable[0].lo = 0;
  86. V.hi = H[0];
  87. V.lo = H[1];
  88. Htable[8] = V;
  89. REDUCE1BIT(V);
  90. Htable[4] = V;
  91. REDUCE1BIT(V);
  92. Htable[2] = V;
  93. REDUCE1BIT(V);
  94. Htable[1] = V;
  95. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  96. V = Htable[4];
  97. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  98. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  99. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  100. V = Htable[8];
  101. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  102. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  103. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  104. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  105. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  106. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  107. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  108. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  109. /* ARM assembler expects specific dword order in Htable. */
  110. {
  111. int j;
  112. const union {
  113. long one;
  114. char little;
  115. } is_endian = {1};
  116. if (is_endian.little) {
  117. for (j = 0; j < 16; ++j) {
  118. V = Htable[j];
  119. Htable[j].hi = V.lo;
  120. Htable[j].lo = V.hi;
  121. }
  122. } else {
  123. for (j = 0; j < 16; ++j) {
  124. V = Htable[j];
  125. Htable[j].hi = V.lo << 32 | V.lo >> 32;
  126. Htable[j].lo = V.hi << 32 | V.hi >> 32;
  127. }
  128. }
  129. }
  130. #endif
  131. }
  132. #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64)
  133. static const size_t rem_4bit[16] = {
  134. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  135. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  136. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  137. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  138. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  139. u128 Z;
  140. int cnt = 15;
  141. size_t rem, nlo, nhi;
  142. const union {
  143. long one;
  144. char little;
  145. } is_endian = {1};
  146. nlo = ((const uint8_t *)Xi)[15];
  147. nhi = nlo >> 4;
  148. nlo &= 0xf;
  149. Z.hi = Htable[nlo].hi;
  150. Z.lo = Htable[nlo].lo;
  151. while (1) {
  152. rem = (size_t)Z.lo & 0xf;
  153. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  154. Z.hi = (Z.hi >> 4);
  155. if (sizeof(size_t) == 8) {
  156. Z.hi ^= rem_4bit[rem];
  157. } else {
  158. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  159. }
  160. Z.hi ^= Htable[nhi].hi;
  161. Z.lo ^= Htable[nhi].lo;
  162. if (--cnt < 0) {
  163. break;
  164. }
  165. nlo = ((const uint8_t *)Xi)[cnt];
  166. nhi = nlo >> 4;
  167. nlo &= 0xf;
  168. rem = (size_t)Z.lo & 0xf;
  169. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  170. Z.hi = (Z.hi >> 4);
  171. if (sizeof(size_t) == 8) {
  172. Z.hi ^= rem_4bit[rem];
  173. } else {
  174. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  175. }
  176. Z.hi ^= Htable[nlo].hi;
  177. Z.lo ^= Htable[nlo].lo;
  178. }
  179. if (is_endian.little) {
  180. #ifdef BSWAP8
  181. Xi[0] = BSWAP8(Z.hi);
  182. Xi[1] = BSWAP8(Z.lo);
  183. #else
  184. uint8_t *p = (uint8_t *)Xi;
  185. uint32_t v;
  186. v = (uint32_t)(Z.hi >> 32);
  187. PUTU32(p, v);
  188. v = (uint32_t)(Z.hi);
  189. PUTU32(p + 4, v);
  190. v = (uint32_t)(Z.lo >> 32);
  191. PUTU32(p + 8, v);
  192. v = (uint32_t)(Z.lo);
  193. PUTU32(p + 12, v);
  194. #endif
  195. } else {
  196. Xi[0] = Z.hi;
  197. Xi[1] = Z.lo;
  198. }
  199. }
  200. /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  201. * details... Compiler-generated code doesn't seem to give any
  202. * performance improvement, at least not on x86[_64]. It's here
  203. * mostly as reference and a placeholder for possible future
  204. * non-trivial optimization[s]... */
  205. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  206. size_t len) {
  207. u128 Z;
  208. int cnt;
  209. size_t rem, nlo, nhi;
  210. const union {
  211. long one;
  212. char little;
  213. } is_endian = {1};
  214. do {
  215. cnt = 15;
  216. nlo = ((const uint8_t *)Xi)[15];
  217. nlo ^= inp[15];
  218. nhi = nlo >> 4;
  219. nlo &= 0xf;
  220. Z.hi = Htable[nlo].hi;
  221. Z.lo = Htable[nlo].lo;
  222. while (1) {
  223. rem = (size_t)Z.lo & 0xf;
  224. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  225. Z.hi = (Z.hi >> 4);
  226. if (sizeof(size_t) == 8) {
  227. Z.hi ^= rem_4bit[rem];
  228. } else {
  229. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  230. }
  231. Z.hi ^= Htable[nhi].hi;
  232. Z.lo ^= Htable[nhi].lo;
  233. if (--cnt < 0) {
  234. break;
  235. }
  236. nlo = ((const uint8_t *)Xi)[cnt];
  237. nlo ^= inp[cnt];
  238. nhi = nlo >> 4;
  239. nlo &= 0xf;
  240. rem = (size_t)Z.lo & 0xf;
  241. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  242. Z.hi = (Z.hi >> 4);
  243. if (sizeof(size_t) == 8) {
  244. Z.hi ^= rem_4bit[rem];
  245. } else {
  246. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  247. }
  248. Z.hi ^= Htable[nlo].hi;
  249. Z.lo ^= Htable[nlo].lo;
  250. }
  251. if (is_endian.little) {
  252. #ifdef BSWAP8
  253. Xi[0] = BSWAP8(Z.hi);
  254. Xi[1] = BSWAP8(Z.lo);
  255. #else
  256. uint8_t *p = (uint8_t *)Xi;
  257. uint32_t v;
  258. v = (uint32_t)(Z.hi >> 32);
  259. PUTU32(p, v);
  260. v = (uint32_t)(Z.hi);
  261. PUTU32(p + 4, v);
  262. v = (uint32_t)(Z.lo >> 32);
  263. PUTU32(p + 8, v);
  264. v = (uint32_t)(Z.lo);
  265. PUTU32(p + 12, v);
  266. #endif
  267. } else {
  268. Xi[0] = Z.hi;
  269. Xi[1] = Z.lo;
  270. }
  271. } while (inp += 16, len -= 16);
  272. }
  273. #else /* GHASH_ASM */
  274. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  275. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  276. size_t len);
  277. #endif
  278. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
  279. #if defined(GHASH_ASM)
  280. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  281. /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  282. * trashing effect. In other words idea is to hash data while it's
  283. * still in L1 cache after encryption pass... */
  284. #define GHASH_CHUNK (3 * 1024)
  285. #endif
  286. #if defined(GHASH_ASM)
  287. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  288. #define GHASH_ASM_X86_OR_64
  289. #define GCM_FUNCREF_4BIT
  290. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  291. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  292. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  293. size_t len);
  294. #if defined(OPENSSL_X86)
  295. #define gcm_init_avx gcm_init_clmul
  296. #define gcm_gmult_avx gcm_gmult_clmul
  297. #define gcm_ghash_avx gcm_ghash_clmul
  298. #else
  299. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  300. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  301. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
  302. #endif
  303. #if defined(OPENSSL_X86)
  304. #define GHASH_ASM_X86
  305. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  306. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  307. size_t len);
  308. void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
  309. void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  310. size_t len);
  311. #endif
  312. #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
  313. #include <openssl/arm_arch.h>
  314. #if __ARM_ARCH__ >= 7
  315. #define GHASH_ASM_ARM
  316. #define GCM_FUNCREF_4BIT
  317. static int pmull_capable(void) {
  318. return CRYPTO_is_ARMv8_PMULL_capable();
  319. }
  320. void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
  321. void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
  322. void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  323. size_t len);
  324. #if defined(OPENSSL_ARM)
  325. /* 32-bit ARM also has support for doing GCM with NEON instructions. */
  326. static int neon_capable(void) {
  327. return CRYPTO_is_NEON_capable();
  328. }
  329. void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
  330. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  331. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  332. size_t len);
  333. #else
  334. /* AArch64 only has the ARMv8 versions of functions. */
  335. static int neon_capable(void) {
  336. return 0;
  337. }
  338. void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
  339. abort();
  340. }
  341. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
  342. abort();
  343. }
  344. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  345. size_t len) {
  346. abort();
  347. }
  348. #endif
  349. #endif
  350. #endif
  351. #endif
  352. #ifdef GCM_FUNCREF_4BIT
  353. #undef GCM_MUL
  354. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
  355. #ifdef GHASH
  356. #undef GHASH
  357. #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
  358. #endif
  359. #endif
  360. GCM128_CONTEXT *CRYPTO_gcm128_new(const void *key, block128_f block) {
  361. GCM128_CONTEXT *ret;
  362. ret = OPENSSL_malloc(sizeof(GCM128_CONTEXT));
  363. if (ret != NULL) {
  364. CRYPTO_gcm128_init(ret, key, block);
  365. }
  366. return ret;
  367. }
  368. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
  369. block128_f block) {
  370. const union {
  371. long one;
  372. char little;
  373. } is_endian = {1};
  374. memset(ctx, 0, sizeof(*ctx));
  375. ctx->block = block;
  376. (*block)(ctx->H.c, ctx->H.c, key);
  377. if (is_endian.little) {
  378. /* H is stored in host byte order */
  379. #ifdef BSWAP8
  380. ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
  381. ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
  382. #else
  383. uint8_t *p = ctx->H.c;
  384. uint64_t hi, lo;
  385. hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  386. lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  387. ctx->H.u[0] = hi;
  388. ctx->H.u[1] = lo;
  389. #endif
  390. }
  391. #if defined(GHASH_ASM_X86_OR_64)
  392. if (crypto_gcm_clmul_enabled()) {
  393. if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
  394. gcm_init_avx(ctx->Htable, ctx->H.u);
  395. ctx->gmult = gcm_gmult_avx;
  396. ctx->ghash = gcm_ghash_avx;
  397. } else {
  398. gcm_init_clmul(ctx->Htable, ctx->H.u);
  399. ctx->gmult = gcm_gmult_clmul;
  400. ctx->ghash = gcm_ghash_clmul;
  401. }
  402. return;
  403. }
  404. gcm_init_4bit(ctx->Htable, ctx->H.u);
  405. #if defined(GHASH_ASM_X86) /* x86 only */
  406. if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
  407. ctx->gmult = gcm_gmult_4bit_mmx;
  408. ctx->ghash = gcm_ghash_4bit_mmx;
  409. } else {
  410. ctx->gmult = gcm_gmult_4bit_x86;
  411. ctx->ghash = gcm_ghash_4bit_x86;
  412. }
  413. #else
  414. ctx->gmult = gcm_gmult_4bit;
  415. ctx->ghash = gcm_ghash_4bit;
  416. #endif
  417. #elif defined(GHASH_ASM_ARM)
  418. if (pmull_capable()) {
  419. gcm_init_v8(ctx->Htable, ctx->H.u);
  420. ctx->gmult = gcm_gmult_v8;
  421. ctx->ghash = gcm_ghash_v8;
  422. } else if (neon_capable()) {
  423. gcm_init_neon(ctx->Htable,ctx->H.u);
  424. ctx->gmult = gcm_gmult_neon;
  425. ctx->ghash = gcm_ghash_neon;
  426. } else {
  427. gcm_init_4bit(ctx->Htable, ctx->H.u);
  428. ctx->gmult = gcm_gmult_4bit;
  429. ctx->ghash = gcm_ghash_4bit;
  430. }
  431. #else
  432. gcm_init_4bit(ctx->Htable, ctx->H.u);
  433. ctx->gmult = gcm_gmult_4bit;
  434. ctx->ghash = gcm_ghash_4bit;
  435. #endif
  436. }
  437. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
  438. const uint8_t *iv, size_t len) {
  439. const union {
  440. long one;
  441. char little;
  442. } is_endian = {1};
  443. unsigned int ctr;
  444. #ifdef GCM_FUNCREF_4BIT
  445. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  446. #endif
  447. ctx->Yi.u[0] = 0;
  448. ctx->Yi.u[1] = 0;
  449. ctx->Xi.u[0] = 0;
  450. ctx->Xi.u[1] = 0;
  451. ctx->len.u[0] = 0; /* AAD length */
  452. ctx->len.u[1] = 0; /* message length */
  453. ctx->ares = 0;
  454. ctx->mres = 0;
  455. if (len == 12) {
  456. memcpy(ctx->Yi.c, iv, 12);
  457. ctx->Yi.c[15] = 1;
  458. ctr = 1;
  459. } else {
  460. size_t i;
  461. uint64_t len0 = len;
  462. while (len >= 16) {
  463. for (i = 0; i < 16; ++i) {
  464. ctx->Yi.c[i] ^= iv[i];
  465. }
  466. GCM_MUL(ctx, Yi);
  467. iv += 16;
  468. len -= 16;
  469. }
  470. if (len) {
  471. for (i = 0; i < len; ++i) {
  472. ctx->Yi.c[i] ^= iv[i];
  473. }
  474. GCM_MUL(ctx, Yi);
  475. }
  476. len0 <<= 3;
  477. if (is_endian.little) {
  478. #ifdef BSWAP8
  479. ctx->Yi.u[1] ^= BSWAP8(len0);
  480. #else
  481. ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
  482. ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
  483. ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
  484. ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
  485. ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
  486. ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
  487. ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
  488. ctx->Yi.c[15] ^= (uint8_t)(len0);
  489. #endif
  490. } else {
  491. ctx->Yi.u[1] ^= len0;
  492. }
  493. GCM_MUL(ctx, Yi);
  494. if (is_endian.little) {
  495. ctr = GETU32(ctx->Yi.c + 12);
  496. } else {
  497. ctr = ctx->Yi.d[3];
  498. }
  499. }
  500. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
  501. ++ctr;
  502. if (is_endian.little) {
  503. PUTU32(ctx->Yi.c + 12, ctr);
  504. } else {
  505. ctx->Yi.d[3] = ctr;
  506. }
  507. }
  508. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  509. size_t i;
  510. unsigned int n;
  511. uint64_t alen = ctx->len.u[0];
  512. #ifdef GCM_FUNCREF_4BIT
  513. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  514. #ifdef GHASH
  515. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  516. size_t len) = ctx->ghash;
  517. #endif
  518. #endif
  519. if (ctx->len.u[1]) {
  520. return 0;
  521. }
  522. alen += len;
  523. if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  524. return 0;
  525. }
  526. ctx->len.u[0] = alen;
  527. n = ctx->ares;
  528. if (n) {
  529. while (n && len) {
  530. ctx->Xi.c[n] ^= *(aad++);
  531. --len;
  532. n = (n + 1) % 16;
  533. }
  534. if (n == 0) {
  535. GCM_MUL(ctx, Xi);
  536. } else {
  537. ctx->ares = n;
  538. return 1;
  539. }
  540. }
  541. #ifdef GHASH
  542. i = len & kSizeTWithoutLower4Bits;
  543. if (i != 0) {
  544. GHASH(ctx, aad, i);
  545. aad += i;
  546. len -= i;
  547. }
  548. #else
  549. while (len >= 16) {
  550. for (i = 0; i < 16; ++i) {
  551. ctx->Xi.c[i] ^= aad[i];
  552. }
  553. GCM_MUL(ctx, Xi);
  554. aad += 16;
  555. len -= 16;
  556. }
  557. #endif
  558. if (len) {
  559. n = (unsigned int)len;
  560. for (i = 0; i < len; ++i) {
  561. ctx->Xi.c[i] ^= aad[i];
  562. }
  563. }
  564. ctx->ares = n;
  565. return 1;
  566. }
  567. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
  568. const unsigned char *in, unsigned char *out,
  569. size_t len) {
  570. const union {
  571. long one;
  572. char little;
  573. } is_endian = {1};
  574. unsigned int n, ctr;
  575. size_t i;
  576. uint64_t mlen = ctx->len.u[1];
  577. block128_f block = ctx->block;
  578. #ifdef GCM_FUNCREF_4BIT
  579. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  580. #ifdef GHASH
  581. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  582. size_t len) = ctx->ghash;
  583. #endif
  584. #endif
  585. mlen += len;
  586. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  587. (sizeof(len) == 8 && mlen < len)) {
  588. return 0;
  589. }
  590. ctx->len.u[1] = mlen;
  591. if (ctx->ares) {
  592. /* First call to encrypt finalizes GHASH(AAD) */
  593. GCM_MUL(ctx, Xi);
  594. ctx->ares = 0;
  595. }
  596. if (is_endian.little) {
  597. ctr = GETU32(ctx->Yi.c + 12);
  598. } else {
  599. ctr = ctx->Yi.d[3];
  600. }
  601. n = ctx->mres;
  602. if (n) {
  603. while (n && len) {
  604. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  605. --len;
  606. n = (n + 1) % 16;
  607. }
  608. if (n == 0) {
  609. GCM_MUL(ctx, Xi);
  610. } else {
  611. ctx->mres = n;
  612. return 1;
  613. }
  614. }
  615. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  616. for (i = 0; i < len; ++i) {
  617. if (n == 0) {
  618. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  619. ++ctr;
  620. if (is_endian.little) {
  621. PUTU32(ctx->Yi.c + 12, ctr);
  622. } else {
  623. ctx->Yi.d[3] = ctr;
  624. }
  625. }
  626. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  627. n = (n + 1) % 16;
  628. if (n == 0) {
  629. GCM_MUL(ctx, Xi);
  630. }
  631. }
  632. ctx->mres = n;
  633. return 1;
  634. }
  635. #if defined(GHASH) && defined(GHASH_CHUNK)
  636. while (len >= GHASH_CHUNK) {
  637. size_t j = GHASH_CHUNK;
  638. while (j) {
  639. size_t *out_t = (size_t *)out;
  640. const size_t *in_t = (const size_t *)in;
  641. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  642. ++ctr;
  643. if (is_endian.little) {
  644. PUTU32(ctx->Yi.c + 12, ctr);
  645. } else {
  646. ctx->Yi.d[3] = ctr;
  647. }
  648. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  649. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  650. }
  651. out += 16;
  652. in += 16;
  653. j -= 16;
  654. }
  655. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  656. len -= GHASH_CHUNK;
  657. }
  658. if ((i = (len & (size_t) - 16))) {
  659. size_t j = i;
  660. while (len >= 16) {
  661. size_t *out_t = (size_t *)out;
  662. const size_t *in_t = (const size_t *)in;
  663. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  664. ++ctr;
  665. if (is_endian.little) {
  666. PUTU32(ctx->Yi.c + 12, ctr);
  667. } else {
  668. ctx->Yi.d[3] = ctr;
  669. }
  670. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  671. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  672. }
  673. out += 16;
  674. in += 16;
  675. len -= 16;
  676. }
  677. GHASH(ctx, out - j, j);
  678. }
  679. #else
  680. while (len >= 16) {
  681. size_t *out_t = (size_t *)out;
  682. const size_t *in_t = (const size_t *)in;
  683. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  684. ++ctr;
  685. if (is_endian.little) {
  686. PUTU32(ctx->Yi.c + 12, ctr);
  687. } else {
  688. ctx->Yi.d[3] = ctr;
  689. }
  690. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  691. ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  692. }
  693. GCM_MUL(ctx, Xi);
  694. out += 16;
  695. in += 16;
  696. len -= 16;
  697. }
  698. #endif
  699. if (len) {
  700. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  701. ++ctr;
  702. if (is_endian.little) {
  703. PUTU32(ctx->Yi.c + 12, ctr);
  704. } else {
  705. ctx->Yi.d[3] = ctr;
  706. }
  707. while (len--) {
  708. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  709. ++n;
  710. }
  711. }
  712. ctx->mres = n;
  713. return 1;
  714. }
  715. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
  716. const unsigned char *in, unsigned char *out,
  717. size_t len) {
  718. const union {
  719. long one;
  720. char little;
  721. } is_endian = {1};
  722. unsigned int n, ctr;
  723. size_t i;
  724. uint64_t mlen = ctx->len.u[1];
  725. block128_f block = ctx->block;
  726. #ifdef GCM_FUNCREF_4BIT
  727. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  728. #ifdef GHASH
  729. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  730. size_t len) = ctx->ghash;
  731. #endif
  732. #endif
  733. mlen += len;
  734. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  735. (sizeof(len) == 8 && mlen < len)) {
  736. return 0;
  737. }
  738. ctx->len.u[1] = mlen;
  739. if (ctx->ares) {
  740. /* First call to decrypt finalizes GHASH(AAD) */
  741. GCM_MUL(ctx, Xi);
  742. ctx->ares = 0;
  743. }
  744. if (is_endian.little) {
  745. ctr = GETU32(ctx->Yi.c + 12);
  746. } else {
  747. ctr = ctx->Yi.d[3];
  748. }
  749. n = ctx->mres;
  750. if (n) {
  751. while (n && len) {
  752. uint8_t c = *(in++);
  753. *(out++) = c ^ ctx->EKi.c[n];
  754. ctx->Xi.c[n] ^= c;
  755. --len;
  756. n = (n + 1) % 16;
  757. }
  758. if (n == 0) {
  759. GCM_MUL(ctx, Xi);
  760. } else {
  761. ctx->mres = n;
  762. return 1;
  763. }
  764. }
  765. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  766. for (i = 0; i < len; ++i) {
  767. uint8_t c;
  768. if (n == 0) {
  769. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  770. ++ctr;
  771. if (is_endian.little) {
  772. PUTU32(ctx->Yi.c + 12, ctr);
  773. } else {
  774. ctx->Yi.d[3] = ctr;
  775. }
  776. }
  777. c = in[i];
  778. out[i] = c ^ ctx->EKi.c[n];
  779. ctx->Xi.c[n] ^= c;
  780. n = (n + 1) % 16;
  781. if (n == 0) {
  782. GCM_MUL(ctx, Xi);
  783. }
  784. }
  785. ctx->mres = n;
  786. return 1;
  787. }
  788. #if defined(GHASH) && defined(GHASH_CHUNK)
  789. while (len >= GHASH_CHUNK) {
  790. size_t j = GHASH_CHUNK;
  791. GHASH(ctx, in, GHASH_CHUNK);
  792. while (j) {
  793. size_t *out_t = (size_t *)out;
  794. const size_t *in_t = (const size_t *)in;
  795. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  796. ++ctr;
  797. if (is_endian.little) {
  798. PUTU32(ctx->Yi.c + 12, ctr);
  799. } else {
  800. ctx->Yi.d[3] = ctr;
  801. }
  802. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  803. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  804. }
  805. out += 16;
  806. in += 16;
  807. j -= 16;
  808. }
  809. len -= GHASH_CHUNK;
  810. }
  811. i = len & kSizeTWithoutLower4Bits;
  812. if (i != 0) {
  813. GHASH(ctx, in, i);
  814. while (len >= 16) {
  815. size_t *out_t = (size_t *)out;
  816. const size_t *in_t = (const size_t *)in;
  817. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  818. ++ctr;
  819. if (is_endian.little) {
  820. PUTU32(ctx->Yi.c + 12, ctr);
  821. } else {
  822. ctx->Yi.d[3] = ctr;
  823. }
  824. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  825. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  826. }
  827. out += 16;
  828. in += 16;
  829. len -= 16;
  830. }
  831. }
  832. #else
  833. while (len >= 16) {
  834. size_t *out_t = (size_t *)out;
  835. const size_t *in_t = (const size_t *)in;
  836. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  837. ++ctr;
  838. if (is_endian.little) {
  839. PUTU32(ctx->Yi.c + 12, ctr);
  840. } else {
  841. ctx->Yi.d[3] = ctr;
  842. }
  843. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  844. size_t c = in_t[i];
  845. out_t[i] = c ^ ctx->EKi.t[i];
  846. ctx->Xi.t[i] ^= c;
  847. }
  848. GCM_MUL(ctx, Xi);
  849. out += 16;
  850. in += 16;
  851. len -= 16;
  852. }
  853. #endif
  854. if (len) {
  855. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  856. ++ctr;
  857. if (is_endian.little) {
  858. PUTU32(ctx->Yi.c + 12, ctr);
  859. } else {
  860. ctx->Yi.d[3] = ctr;
  861. }
  862. while (len--) {
  863. uint8_t c = in[n];
  864. ctx->Xi.c[n] ^= c;
  865. out[n] = c ^ ctx->EKi.c[n];
  866. ++n;
  867. }
  868. }
  869. ctx->mres = n;
  870. return 1;
  871. }
  872. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  873. const uint8_t *in, uint8_t *out, size_t len,
  874. ctr128_f stream) {
  875. const union {
  876. long one;
  877. char little;
  878. } is_endian = {1};
  879. unsigned int n, ctr;
  880. uint64_t mlen = ctx->len.u[1];
  881. #ifdef GCM_FUNCREF_4BIT
  882. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  883. #ifdef GHASH
  884. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  885. size_t len) = ctx->ghash;
  886. #endif
  887. #endif
  888. mlen += len;
  889. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  890. (sizeof(len) == 8 && mlen < len)) {
  891. return 0;
  892. }
  893. ctx->len.u[1] = mlen;
  894. if (ctx->ares) {
  895. /* First call to encrypt finalizes GHASH(AAD) */
  896. GCM_MUL(ctx, Xi);
  897. ctx->ares = 0;
  898. }
  899. if (is_endian.little) {
  900. ctr = GETU32(ctx->Yi.c + 12);
  901. } else {
  902. ctr = ctx->Yi.d[3];
  903. }
  904. n = ctx->mres;
  905. if (n) {
  906. while (n && len) {
  907. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  908. --len;
  909. n = (n + 1) % 16;
  910. }
  911. if (n == 0) {
  912. GCM_MUL(ctx, Xi);
  913. } else {
  914. ctx->mres = n;
  915. return 1;
  916. }
  917. }
  918. #if defined(GHASH)
  919. while (len >= GHASH_CHUNK) {
  920. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  921. ctr += GHASH_CHUNK / 16;
  922. if (is_endian.little) {
  923. PUTU32(ctx->Yi.c + 12, ctr);
  924. } else {
  925. ctx->Yi.d[3] = ctr;
  926. }
  927. GHASH(ctx, out, GHASH_CHUNK);
  928. out += GHASH_CHUNK;
  929. in += GHASH_CHUNK;
  930. len -= GHASH_CHUNK;
  931. }
  932. #endif
  933. size_t i = len & kSizeTWithoutLower4Bits;
  934. if (i != 0) {
  935. size_t j = i / 16;
  936. (*stream)(in, out, j, key, ctx->Yi.c);
  937. ctr += (unsigned int)j;
  938. if (is_endian.little) {
  939. PUTU32(ctx->Yi.c + 12, ctr);
  940. } else {
  941. ctx->Yi.d[3] = ctr;
  942. }
  943. in += i;
  944. len -= i;
  945. #if defined(GHASH)
  946. GHASH(ctx, out, i);
  947. out += i;
  948. #else
  949. while (j--) {
  950. for (i = 0; i < 16; ++i) {
  951. ctx->Xi.c[i] ^= out[i];
  952. }
  953. GCM_MUL(ctx, Xi);
  954. out += 16;
  955. }
  956. #endif
  957. }
  958. if (len) {
  959. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  960. ++ctr;
  961. if (is_endian.little) {
  962. PUTU32(ctx->Yi.c + 12, ctr);
  963. } else {
  964. ctx->Yi.d[3] = ctr;
  965. }
  966. while (len--) {
  967. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  968. ++n;
  969. }
  970. }
  971. ctx->mres = n;
  972. return 1;
  973. }
  974. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
  975. const uint8_t *in, uint8_t *out, size_t len,
  976. ctr128_f stream) {
  977. const union {
  978. long one;
  979. char little;
  980. } is_endian = {1};
  981. unsigned int n, ctr;
  982. uint64_t mlen = ctx->len.u[1];
  983. #ifdef GCM_FUNCREF_4BIT
  984. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  985. #ifdef GHASH
  986. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  987. size_t len) = ctx->ghash;
  988. #endif
  989. #endif
  990. mlen += len;
  991. if (mlen > ((UINT64_C(1) << 36) - 32) ||
  992. (sizeof(len) == 8 && mlen < len)) {
  993. return 0;
  994. }
  995. ctx->len.u[1] = mlen;
  996. if (ctx->ares) {
  997. /* First call to decrypt finalizes GHASH(AAD) */
  998. GCM_MUL(ctx, Xi);
  999. ctx->ares = 0;
  1000. }
  1001. if (is_endian.little) {
  1002. ctr = GETU32(ctx->Yi.c + 12);
  1003. } else {
  1004. ctr = ctx->Yi.d[3];
  1005. }
  1006. n = ctx->mres;
  1007. if (n) {
  1008. while (n && len) {
  1009. uint8_t c = *(in++);
  1010. *(out++) = c ^ ctx->EKi.c[n];
  1011. ctx->Xi.c[n] ^= c;
  1012. --len;
  1013. n = (n + 1) % 16;
  1014. }
  1015. if (n == 0) {
  1016. GCM_MUL(ctx, Xi);
  1017. } else {
  1018. ctx->mres = n;
  1019. return 1;
  1020. }
  1021. }
  1022. #if defined(GHASH)
  1023. while (len >= GHASH_CHUNK) {
  1024. GHASH(ctx, in, GHASH_CHUNK);
  1025. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  1026. ctr += GHASH_CHUNK / 16;
  1027. if (is_endian.little) {
  1028. PUTU32(ctx->Yi.c + 12, ctr);
  1029. } else {
  1030. ctx->Yi.d[3] = ctr;
  1031. }
  1032. out += GHASH_CHUNK;
  1033. in += GHASH_CHUNK;
  1034. len -= GHASH_CHUNK;
  1035. }
  1036. #endif
  1037. size_t i = len & kSizeTWithoutLower4Bits;
  1038. if (i != 0) {
  1039. size_t j = i / 16;
  1040. #if defined(GHASH)
  1041. GHASH(ctx, in, i);
  1042. #else
  1043. while (j--) {
  1044. size_t k;
  1045. for (k = 0; k < 16; ++k) {
  1046. ctx->Xi.c[k] ^= in[k];
  1047. }
  1048. GCM_MUL(ctx, Xi);
  1049. in += 16;
  1050. }
  1051. j = i / 16;
  1052. in -= i;
  1053. #endif
  1054. (*stream)(in, out, j, key, ctx->Yi.c);
  1055. ctr += (unsigned int)j;
  1056. if (is_endian.little) {
  1057. PUTU32(ctx->Yi.c + 12, ctr);
  1058. } else {
  1059. ctx->Yi.d[3] = ctr;
  1060. }
  1061. out += i;
  1062. in += i;
  1063. len -= i;
  1064. }
  1065. if (len) {
  1066. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  1067. ++ctr;
  1068. if (is_endian.little) {
  1069. PUTU32(ctx->Yi.c + 12, ctr);
  1070. } else {
  1071. ctx->Yi.d[3] = ctr;
  1072. }
  1073. while (len--) {
  1074. uint8_t c = in[n];
  1075. ctx->Xi.c[n] ^= c;
  1076. out[n] = c ^ ctx->EKi.c[n];
  1077. ++n;
  1078. }
  1079. }
  1080. ctx->mres = n;
  1081. return 1;
  1082. }
  1083. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  1084. const union {
  1085. long one;
  1086. char little;
  1087. } is_endian = {1};
  1088. uint64_t alen = ctx->len.u[0] << 3;
  1089. uint64_t clen = ctx->len.u[1] << 3;
  1090. #ifdef GCM_FUNCREF_4BIT
  1091. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  1092. #endif
  1093. if (ctx->mres || ctx->ares) {
  1094. GCM_MUL(ctx, Xi);
  1095. }
  1096. if (is_endian.little) {
  1097. #ifdef BSWAP8
  1098. alen = BSWAP8(alen);
  1099. clen = BSWAP8(clen);
  1100. #else
  1101. uint8_t *p = ctx->len.c;
  1102. ctx->len.u[0] = alen;
  1103. ctx->len.u[1] = clen;
  1104. alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  1105. clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  1106. #endif
  1107. }
  1108. ctx->Xi.u[0] ^= alen;
  1109. ctx->Xi.u[1] ^= clen;
  1110. GCM_MUL(ctx, Xi);
  1111. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  1112. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  1113. if (tag && len <= sizeof(ctx->Xi)) {
  1114. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  1115. } else {
  1116. return 0;
  1117. }
  1118. }
  1119. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  1120. CRYPTO_gcm128_finish(ctx, NULL, 0);
  1121. memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  1122. }
  1123. void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
  1124. if (ctx) {
  1125. OPENSSL_cleanse(ctx, sizeof(*ctx));
  1126. OPENSSL_free(ctx);
  1127. }
  1128. }
  1129. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  1130. int crypto_gcm_clmul_enabled(void) {
  1131. #ifdef GHASH_ASM
  1132. return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
  1133. OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
  1134. #else
  1135. return 0;
  1136. #endif
  1137. }
  1138. #endif