From 3cb7fdea950dd2d0377f0d9ad8a88fcb7c48b842 Mon Sep 17 00:00:00 2001 From: Jedidiah Barber Date: Wed, 14 Jul 2021 11:27:03 +1200 Subject: Initial mirror commit --- Crypto/Aes.h | 215 +++++++ Crypto/AesSmall.c | 953 +++++++++++++++++++++++++++++++ Crypto/AesSmall.h | 169 ++++++ Crypto/AesSmall_x86.asm | 1444 +++++++++++++++++++++++++++++++++++++++++++++++ Crypto/Aes_hw_cpu.asm | 330 +++++++++++ Crypto/Aes_hw_cpu.h | 30 + Crypto/Aes_x64.asm | 907 +++++++++++++++++++++++++++++ Crypto/Aes_x86.asm | 646 +++++++++++++++++++++ Crypto/Aescrypt.c | 311 ++++++++++ Crypto/Aeskey.c | 573 +++++++++++++++++++ Crypto/Aesopt.h | 734 ++++++++++++++++++++++++ Crypto/Aestab.c | 428 ++++++++++++++ Crypto/Aestab.h | 174 ++++++ Crypto/Blowfish.c | 382 +++++++++++++ Crypto/Blowfish.h | 25 + Crypto/Cast.c | 703 +++++++++++++++++++++++ Crypto/Cast.h | 24 + Crypto/Crypto.vcproj | 318 +++++++++++ Crypto/Des.c | 406 +++++++++++++ Crypto/Des.h | 28 + Crypto/Makefile | 1 + Crypto/Makefile.inc | 15 + Crypto/Rmd160.c | 490 ++++++++++++++++ Crypto/Rmd160.h | 33 ++ Crypto/Serpent.c | 943 +++++++++++++++++++++++++++++++ Crypto/Serpent.h | 19 + Crypto/Sha1.c | 282 +++++++++ Crypto/Sha1.h | 80 +++ Crypto/Sha2.c | 770 +++++++++++++++++++++++++ Crypto/Sha2.h | 155 +++++ Crypto/Sources | 23 + Crypto/Twofish.c | 548 ++++++++++++++++++ Crypto/Twofish.h | 55 ++ Crypto/Whirlpool.c | 1058 ++++++++++++++++++++++++++++++++++ Crypto/Whirlpool.h | 151 +++++ 35 files changed, 13423 insertions(+) create mode 100644 Crypto/Aes.h create mode 100644 Crypto/AesSmall.c create mode 100644 Crypto/AesSmall.h create mode 100644 Crypto/AesSmall_x86.asm create mode 100644 Crypto/Aes_hw_cpu.asm create mode 100644 Crypto/Aes_hw_cpu.h create mode 100644 Crypto/Aes_x64.asm create mode 100644 Crypto/Aes_x86.asm create mode 100644 Crypto/Aescrypt.c create mode 100644 Crypto/Aeskey.c create mode 100644 Crypto/Aesopt.h create mode 100644 Crypto/Aestab.c create mode 100644 Crypto/Aestab.h create mode 100644 Crypto/Blowfish.c create mode 100644 Crypto/Blowfish.h create mode 100644 Crypto/Cast.c create mode 100644 Crypto/Cast.h create mode 100644 Crypto/Crypto.vcproj create mode 100644 Crypto/Des.c create mode 100644 Crypto/Des.h create mode 100644 Crypto/Makefile create mode 100644 Crypto/Makefile.inc create mode 100644 Crypto/Rmd160.c create mode 100644 Crypto/Rmd160.h create mode 100644 Crypto/Serpent.c create mode 100644 Crypto/Serpent.h create mode 100644 Crypto/Sha1.c create mode 100644 Crypto/Sha1.h create mode 100644 Crypto/Sha2.c create mode 100644 Crypto/Sha2.h create mode 100644 Crypto/Sources create mode 100644 Crypto/Twofish.c create mode 100644 Crypto/Twofish.h create mode 100644 Crypto/Whirlpool.c create mode 100644 Crypto/Whirlpool.h (limited to 'Crypto') diff --git a/Crypto/Aes.h b/Crypto/Aes.h new file mode 100644 index 0000000..e12c6fc --- /dev/null +++ b/Crypto/Aes.h @@ -0,0 +1,215 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the definitions required to use AES in C. See aesopt.h + for optimisation details. +*/ + +/* Adapted for TrueCrypt */ + +#ifndef _AES_H +#define _AES_H + +#include "Common/Tcdefs.h" + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif +#define INT_RETURN int + +#if defined(__cplusplus) +extern "C" +{ +#endif + +// #define AES_128 /* define if AES with 128 bit keys is needed */ +// #define AES_192 /* define if AES with 192 bit keys is needed */ +#define AES_256 /* define if AES with 256 bit keys is needed */ +// #define AES_VAR /* define if a variable key size is needed */ +// #define AES_MODES /* define if support is needed for modes */ + +/* The following must also be set in assembler files if being used */ + +#define AES_ENCRYPT /* if support for encryption is needed */ +#define AES_DECRYPT /* if support for decryption is needed */ +#define AES_ERR_CHK /* for parameter checks & error return codes */ +#define AES_REV_DKS /* define to reverse decryption key schedule */ + +#define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ +#define N_COLS 4 /* the number of columns in the state */ + +/* The key schedule length is 11, 13 or 15 16-byte blocks for 128, */ +/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes */ +/* or 44, 52 or 60 32-bit words. */ + +#if defined( AES_VAR ) || defined( AES_256 ) +#define KS_LENGTH 60 +#elif defined( AES_192 ) +#define KS_LENGTH 52 +#else +#define KS_LENGTH 44 +#endif + +#if defined( AES_ERR_CHK ) +#define AES_RETURN INT_RETURN +#else +#define AES_RETURN VOID_RETURN +#endif + +/* the character array 'inf' in the following structures is used */ +/* to hold AES context information. This AES code uses cx->inf.b[0] */ +/* to hold the number of rounds multiplied by 16. The other three */ +/* elements can be used by code that implements additional modes */ + +typedef union +{ uint_32t l; + uint_8t b[4]; +} aes_inf; + +typedef struct +{ uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_encrypt_ctx; + +typedef struct +{ uint_32t ks[KS_LENGTH]; + aes_inf inf; +} aes_decrypt_ctx; + +/* This routine must be called before first use if non-static */ +/* tables are being used */ + +AES_RETURN aes_init(void); + +/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */ +/* those in the range 128 <= key_len <= 256 are given in bits */ + +#if defined( AES_ENCRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]); +#endif + +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); + +#endif + +#if defined( AES_DECRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]); +#endif + +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); + +#endif + +#if defined(AES_MODES) + +/* Multiple calls to the following subroutines for multiple block */ +/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */ +/* long messages incremantally provided that the context AND the iv */ +/* are preserved between all such calls. For the ECB and CBC modes */ +/* each individual call within a series of incremental calls must */ +/* process only full blocks (i.e. len must be a multiple of 16) but */ +/* the CFB, OFB and CTR mode calls can handle multiple incremental */ +/* calls of any length. Each mode is reset when a new AES key is */ +/* set but ECB and CBC operations can be reset without setting a */ +/* new key by setting a new IV value. To reset CFB, OFB and CTR */ +/* without setting the key, aes_mode_reset() must be called and the */ +/* IV must be set. NOTE: All these calls update the IV on exit so */ +/* this has to be reset if a new operation with the same IV as the */ +/* previous one is required (or decryption follows encryption with */ +/* the same IV array). */ + +AES_RETURN aes_test_alignment_detection(unsigned int n); + +AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, const aes_encrypt_ctx cx[1]); + +AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, const aes_decrypt_ctx cx[1]); + +AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, const aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, const aes_decrypt_ctx cx[1]); + +AES_RETURN aes_mode_reset(aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +#define aes_ofb_encrypt aes_ofb_crypt +#define aes_ofb_decrypt aes_ofb_crypt + +AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *iv, aes_encrypt_ctx cx[1]); + +typedef void cbuf_inc(unsigned char *cbuf); + +#define aes_ctr_encrypt aes_ctr_crypt +#define aes_ctr_decrypt aes_ctr_crypt + +AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf, + int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]); + +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Crypto/AesSmall.c b/Crypto/AesSmall.c new file mode 100644 index 0000000..10e7cf8 --- /dev/null +++ b/Crypto/AesSmall.c @@ -0,0 +1,953 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + This is an AES implementation that uses only 8-bit byte operations on the + cipher state (there are options to use 32-bit types if available). + + The combination of mix columns and byte substitution used here is based on + that developed by Karl Malbrain. His contribution is acknowledged. + */ + +/* Adapted for TrueCrypt: + - Macro-generated tables were replaced with static data to enable compiling + with MSVC++ 1.5 which runs out of resources when expanding large macros. +*/ + +#pragma optimize ("t", on) + +/* define if you have a fast memcpy function on your system */ +#if 1 +# define HAVE_MEMCPY +# include +# if defined( _MSC_VER ) +# ifndef DEBUG +# pragma intrinsic( memcpy ) +# endif +# endif +#endif + +/* define if you have fast 32-bit types on your system */ +#if 1 +# define HAVE_UINT_32T +#endif + +/* alternative versions (test for performance on your system) */ +#if 0 +# define VERSION_1 +#endif + +#include "AesSmall.h" + +#define WPOLY 0x011b +#define DPOLY 0x008d +#define f1(x) (x) +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0)) + +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +static const uint_8t s_box[256] = { + 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +}; + +static const uint_8t inv_s_box[256] = { + 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +}; + +static const uint_8t gfm2_s_box[256] = { + 0xc6,0xf8,0xee,0xf6,0xff,0xd6,0xde,0x91, + 0x60,0x02,0xce,0x56,0xe7,0xb5,0x4d,0xec, + 0x8f,0x1f,0x89,0xfa,0xef,0xb2,0x8e,0xfb, + 0x41,0xb3,0x5f,0x45,0x23,0x53,0xe4,0x9b, + 0x75,0xe1,0x3d,0x4c,0x6c,0x7e,0xf5,0x83, + 0x68,0x51,0xd1,0xf9,0xe2,0xab,0x62,0x2a, + 0x08,0x95,0x46,0x9d,0x30,0x37,0x0a,0x2f, + 0x0e,0x24,0x1b,0xdf,0xcd,0x4e,0x7f,0xea, + 0x12,0x1d,0x58,0x34,0x36,0xdc,0xb4,0x5b, + 0xa4,0x76,0xb7,0x7d,0x52,0xdd,0x5e,0x13, + 0xa6,0xb9,0x00,0xc1,0x40,0xe3,0x79,0xb6, + 0xd4,0x8d,0x67,0x72,0x94,0x98,0xb0,0x85, + 0xbb,0xc5,0x4f,0xed,0x86,0x9a,0x66,0x11, + 0x8a,0xe9,0x04,0xfe,0xa0,0x78,0x25,0x4b, + 0xa2,0x5d,0x80,0x05,0x3f,0x21,0x70,0xf1, + 0x63,0x77,0xaf,0x42,0x20,0xe5,0xfd,0xbf, + 0x81,0x18,0x26,0xc3,0xbe,0x35,0x88,0x2e, + 0x93,0x55,0xfc,0x7a,0xc8,0xba,0x32,0xe6, + 0xc0,0x19,0x9e,0xa3,0x44,0x54,0x3b,0x0b, + 0x8c,0xc7,0x6b,0x28,0xa7,0xbc,0x16,0xad, + 0xdb,0x64,0x74,0x14,0x92,0x0c,0x48,0xb8, + 0x9f,0xbd,0x43,0xc4,0x39,0x31,0xd3,0xf2, + 0xd5,0x8b,0x6e,0xda,0x01,0xb1,0x9c,0x49, + 0xd8,0xac,0xf3,0xcf,0xca,0xf4,0x47,0x10, + 0x6f,0xf0,0x4a,0x5c,0x38,0x57,0x73,0x97, + 0xcb,0xa1,0xe8,0x3e,0x96,0x61,0x0d,0x0f, + 0xe0,0x7c,0x71,0xcc,0x90,0x06,0xf7,0x1c, + 0xc2,0x6a,0xae,0x69,0x17,0x99,0x3a,0x27, + 0xd9,0xeb,0x2b,0x22,0xd2,0xa9,0x07,0x33, + 0x2d,0x3c,0x15,0xc9,0x87,0xaa,0x50,0xa5, + 0x03,0x59,0x09,0x1a,0x65,0xd7,0x84,0xd0, + 0x82,0x29,0x5a,0x1e,0x7b,0xa8,0x6d,0x2c +}; + +static const uint_8t gfm3_s_box[256] = { + 0xa5,0x84,0x99,0x8d,0x0d,0xbd,0xb1,0x54, + 0x50,0x03,0xa9,0x7d,0x19,0x62,0xe6,0x9a, + 0x45,0x9d,0x40,0x87,0x15,0xeb,0xc9,0x0b, + 0xec,0x67,0xfd,0xea,0xbf,0xf7,0x96,0x5b, + 0xc2,0x1c,0xae,0x6a,0x5a,0x41,0x02,0x4f, + 0x5c,0xf4,0x34,0x08,0x93,0x73,0x53,0x3f, + 0x0c,0x52,0x65,0x5e,0x28,0xa1,0x0f,0xb5, + 0x09,0x36,0x9b,0x3d,0x26,0x69,0xcd,0x9f, + 0x1b,0x9e,0x74,0x2e,0x2d,0xb2,0xee,0xfb, + 0xf6,0x4d,0x61,0xce,0x7b,0x3e,0x71,0x97, + 0xf5,0x68,0x00,0x2c,0x60,0x1f,0xc8,0xed, + 0xbe,0x46,0xd9,0x4b,0xde,0xd4,0xe8,0x4a, + 0x6b,0x2a,0xe5,0x16,0xc5,0xd7,0x55,0x94, + 0xcf,0x10,0x06,0x81,0xf0,0x44,0xba,0xe3, + 0xf3,0xfe,0xc0,0x8a,0xad,0xbc,0x48,0x04, + 0xdf,0xc1,0x75,0x63,0x30,0x1a,0x0e,0x6d, + 0x4c,0x14,0x35,0x2f,0xe1,0xa2,0xcc,0x39, + 0x57,0xf2,0x82,0x47,0xac,0xe7,0x2b,0x95, + 0xa0,0x98,0xd1,0x7f,0x66,0x7e,0xab,0x83, + 0xca,0x29,0xd3,0x3c,0x79,0xe2,0x1d,0x76, + 0x3b,0x56,0x4e,0x1e,0xdb,0x0a,0x6c,0xe4, + 0x5d,0x6e,0xef,0xa6,0xa8,0xa4,0x37,0x8b, + 0x32,0x43,0x59,0xb7,0x8c,0x64,0xd2,0xe0, + 0xb4,0xfa,0x07,0x25,0xaf,0x8e,0xe9,0x18, + 0xd5,0x88,0x6f,0x72,0x24,0xf1,0xc7,0x51, + 0x23,0x7c,0x9c,0x21,0xdd,0xdc,0x86,0x85, + 0x90,0x42,0xc4,0xaa,0xd8,0x05,0x01,0x12, + 0xa3,0x5f,0xf9,0xd0,0x91,0x58,0x27,0xb9, + 0x38,0x13,0xb3,0x33,0xbb,0x70,0x89,0xa7, + 0xb6,0x22,0x92,0x20,0x49,0xff,0x78,0x7a, + 0x8f,0xf8,0x80,0x17,0xda,0x31,0xc6,0xb8, + 0xc3,0xb0,0x77,0x11,0xcb,0xfc,0xd6,0x3a +}; + +static const uint_8t gfmul_9[256] = { + 0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f, + 0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77, + 0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf, + 0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7, + 0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04, + 0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c, + 0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94, + 0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc, + 0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49, + 0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01, + 0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9, + 0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91, + 0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72, + 0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a, + 0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2, + 0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa, + 0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3, + 0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b, + 0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43, + 0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b, + 0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8, + 0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0, + 0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78, + 0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30, + 0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5, + 0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed, + 0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35, + 0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d, + 0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e, + 0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6, + 0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e, + 0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46 +}; + +static const uint_8t gfmul_b[256] = { + 0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31, + 0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69, + 0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81, + 0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9, + 0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a, + 0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12, + 0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa, + 0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2, + 0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7, + 0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f, + 0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77, + 0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f, + 0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc, + 0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4, + 0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c, + 0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54, + 0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6, + 0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e, + 0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76, + 0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e, + 0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd, + 0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5, + 0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d, + 0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55, + 0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30, + 0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68, + 0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80, + 0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8, + 0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b, + 0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13, + 0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb, + 0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3 +}; + +static const uint_8t gfmul_d[256] = { + 0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23, + 0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b, + 0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3, + 0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b, + 0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98, + 0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0, + 0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48, + 0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20, + 0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e, + 0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26, + 0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e, + 0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6, + 0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5, + 0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d, + 0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25, + 0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d, + 0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9, + 0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91, + 0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29, + 0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41, + 0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42, + 0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a, + 0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92, + 0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa, + 0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94, + 0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc, + 0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44, + 0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c, + 0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f, + 0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47, + 0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff, + 0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97 +}; + +static const uint_8t gfmul_e[256] = { + 0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a, + 0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a, + 0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca, + 0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba, + 0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1, + 0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81, + 0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11, + 0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61, + 0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87, + 0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7, + 0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67, + 0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17, + 0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c, + 0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c, + 0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc, + 0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc, + 0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b, + 0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b, + 0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b, + 0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb, + 0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0, + 0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0, + 0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50, + 0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20, + 0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6, + 0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6, + 0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26, + 0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56, + 0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d, + 0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d, + 0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd, + 0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d +}; + +#if defined( HAVE_UINT_32T ) + typedef unsigned long uint_32t; +#endif + +#if defined( HAVE_MEMCPY ) +# define block_copy(d, s, l) memcpy(d, s, l) +# define block16_copy(d, s) memcpy(d, s, N_BLOCK) +#else +# define block_copy(d, s, l) copy_block(d, s, l) +# define block16_copy(d, s) copy_block16(d, s) +#endif + +/* block size 'nn' must be a multiple of four */ + +static void copy_block16( void *d, const void *s ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0]; + ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1]; + ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2]; + ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3]; +#else + ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0]; + ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1]; + ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2]; + ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3]; + ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4]; + ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5]; + ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6]; + ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7]; + ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8]; + ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9]; + ((uint_8t*)d)[10] = ((uint_8t*)s)[10]; + ((uint_8t*)d)[11] = ((uint_8t*)s)[11]; + ((uint_8t*)d)[12] = ((uint_8t*)s)[12]; + ((uint_8t*)d)[13] = ((uint_8t*)s)[13]; + ((uint_8t*)d)[14] = ((uint_8t*)s)[14]; + ((uint_8t*)d)[15] = ((uint_8t*)s)[15]; +#endif +} + +static void copy_block( void * d, void *s, uint_8t nn ) +{ + while( nn-- ) + *((uint_8t*)d)++ = *((uint_8t*)s)++; +} + +static void xor_block( void *d, const void *s ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0]; + ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1]; + ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2]; + ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3]; +#else + ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0]; + ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1]; + ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2]; + ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3]; + ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4]; + ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5]; + ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6]; + ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7]; + ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8]; + ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9]; + ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10]; + ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11]; + ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12]; + ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13]; + ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14]; + ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15]; +#endif +} + +static void copy_and_key( void *d, const void *s, const void *k ) +{ +#if defined( HAVE_UINT_32T ) + ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0]; + ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1]; + ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2]; + ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3]; +#elif 1 + ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0]; + ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1]; + ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2]; + ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3]; + ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4]; + ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5]; + ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6]; + ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7]; + ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8]; + ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9]; + ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10]; + ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11]; + ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12]; + ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13]; + ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14]; + ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15]; +#else + block16_copy(d, s); + xor_block(d, k); +#endif +} + +static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] ) +{ + xor_block(d, k); +} + +static void shift_sub_rows( uint_8t st[N_BLOCK] ) +{ uint_8t tt; + + st[ 0] = s_box[st[ 0]]; st[ 4] = s_box[st[ 4]]; + st[ 8] = s_box[st[ 8]]; st[12] = s_box[st[12]]; + + tt = st[1]; st[ 1] = s_box[st[ 5]]; st[ 5] = s_box[st[ 9]]; + st[ 9] = s_box[st[13]]; st[13] = s_box[ tt ]; + + tt = st[2]; st[ 2] = s_box[st[10]]; st[10] = s_box[ tt ]; + tt = st[6]; st[ 6] = s_box[st[14]]; st[14] = s_box[ tt ]; + + tt = st[15]; st[15] = s_box[st[11]]; st[11] = s_box[st[ 7]]; + st[ 7] = s_box[st[ 3]]; st[ 3] = s_box[ tt ]; +} + +static void inv_shift_sub_rows( uint_8t st[N_BLOCK] ) +{ uint_8t tt; + + st[ 0] = inv_s_box[st[ 0]]; st[ 4] = inv_s_box[st[ 4]]; + st[ 8] = inv_s_box[st[ 8]]; st[12] = inv_s_box[st[12]]; + + tt = st[13]; st[13] = inv_s_box[st[9]]; st[ 9] = inv_s_box[st[5]]; + st[ 5] = inv_s_box[st[1]]; st[ 1] = inv_s_box[ tt ]; + + tt = st[2]; st[ 2] = inv_s_box[st[10]]; st[10] = inv_s_box[ tt ]; + tt = st[6]; st[ 6] = inv_s_box[st[14]]; st[14] = inv_s_box[ tt ]; + + tt = st[3]; st[ 3] = inv_s_box[st[ 7]]; st[ 7] = inv_s_box[st[11]]; + st[11] = inv_s_box[st[15]]; st[15] = inv_s_box[ tt ]; +} + +#if defined( VERSION_1 ) + static void mix_sub_columns( uint_8t dt[N_BLOCK] ) + { uint_8t st[N_BLOCK]; + block16_copy(st, dt); +#else + static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) + { +#endif + dt[ 0] = gfm2_s_box[st[0]] ^ gfm3_s_box[st[5]] ^ s_box[st[10]] ^ s_box[st[15]]; + dt[ 1] = s_box[st[0]] ^ gfm2_s_box[st[5]] ^ gfm3_s_box[st[10]] ^ s_box[st[15]]; + dt[ 2] = s_box[st[0]] ^ s_box[st[5]] ^ gfm2_s_box[st[10]] ^ gfm3_s_box[st[15]]; + dt[ 3] = gfm3_s_box[st[0]] ^ s_box[st[5]] ^ s_box[st[10]] ^ gfm2_s_box[st[15]]; + + dt[ 4] = gfm2_s_box[st[4]] ^ gfm3_s_box[st[9]] ^ s_box[st[14]] ^ s_box[st[3]]; + dt[ 5] = s_box[st[4]] ^ gfm2_s_box[st[9]] ^ gfm3_s_box[st[14]] ^ s_box[st[3]]; + dt[ 6] = s_box[st[4]] ^ s_box[st[9]] ^ gfm2_s_box[st[14]] ^ gfm3_s_box[st[3]]; + dt[ 7] = gfm3_s_box[st[4]] ^ s_box[st[9]] ^ s_box[st[14]] ^ gfm2_s_box[st[3]]; + + dt[ 8] = gfm2_s_box[st[8]] ^ gfm3_s_box[st[13]] ^ s_box[st[2]] ^ s_box[st[7]]; + dt[ 9] = s_box[st[8]] ^ gfm2_s_box[st[13]] ^ gfm3_s_box[st[2]] ^ s_box[st[7]]; + dt[10] = s_box[st[8]] ^ s_box[st[13]] ^ gfm2_s_box[st[2]] ^ gfm3_s_box[st[7]]; + dt[11] = gfm3_s_box[st[8]] ^ s_box[st[13]] ^ s_box[st[2]] ^ gfm2_s_box[st[7]]; + + dt[12] = gfm2_s_box[st[12]] ^ gfm3_s_box[st[1]] ^ s_box[st[6]] ^ s_box[st[11]]; + dt[13] = s_box[st[12]] ^ gfm2_s_box[st[1]] ^ gfm3_s_box[st[6]] ^ s_box[st[11]]; + dt[14] = s_box[st[12]] ^ s_box[st[1]] ^ gfm2_s_box[st[6]] ^ gfm3_s_box[st[11]]; + dt[15] = gfm3_s_box[st[12]] ^ s_box[st[1]] ^ s_box[st[6]] ^ gfm2_s_box[st[11]]; + } + +#if defined( VERSION_1 ) + static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] ) + { uint_8t st[N_BLOCK]; + block16_copy(st, dt); +#else + static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] ) + { +#endif + dt[ 0] = inv_s_box[gfmul_e[st[ 0]] ^ gfmul_b[st[ 1]] ^ gfmul_d[st[ 2]] ^ gfmul_9[st[ 3]]]; + dt[ 5] = inv_s_box[gfmul_9[st[ 0]] ^ gfmul_e[st[ 1]] ^ gfmul_b[st[ 2]] ^ gfmul_d[st[ 3]]]; + dt[10] = inv_s_box[gfmul_d[st[ 0]] ^ gfmul_9[st[ 1]] ^ gfmul_e[st[ 2]] ^ gfmul_b[st[ 3]]]; + dt[15] = inv_s_box[gfmul_b[st[ 0]] ^ gfmul_d[st[ 1]] ^ gfmul_9[st[ 2]] ^ gfmul_e[st[ 3]]]; + + dt[ 4] = inv_s_box[gfmul_e[st[ 4]] ^ gfmul_b[st[ 5]] ^ gfmul_d[st[ 6]] ^ gfmul_9[st[ 7]]]; + dt[ 9] = inv_s_box[gfmul_9[st[ 4]] ^ gfmul_e[st[ 5]] ^ gfmul_b[st[ 6]] ^ gfmul_d[st[ 7]]]; + dt[14] = inv_s_box[gfmul_d[st[ 4]] ^ gfmul_9[st[ 5]] ^ gfmul_e[st[ 6]] ^ gfmul_b[st[ 7]]]; + dt[ 3] = inv_s_box[gfmul_b[st[ 4]] ^ gfmul_d[st[ 5]] ^ gfmul_9[st[ 6]] ^ gfmul_e[st[ 7]]]; + + dt[ 8] = inv_s_box[gfmul_e[st[ 8]] ^ gfmul_b[st[ 9]] ^ gfmul_d[st[10]] ^ gfmul_9[st[11]]]; + dt[13] = inv_s_box[gfmul_9[st[ 8]] ^ gfmul_e[st[ 9]] ^ gfmul_b[st[10]] ^ gfmul_d[st[11]]]; + dt[ 2] = inv_s_box[gfmul_d[st[ 8]] ^ gfmul_9[st[ 9]] ^ gfmul_e[st[10]] ^ gfmul_b[st[11]]]; + dt[ 7] = inv_s_box[gfmul_b[st[ 8]] ^ gfmul_d[st[ 9]] ^ gfmul_9[st[10]] ^ gfmul_e[st[11]]]; + + dt[12] = inv_s_box[gfmul_e[st[12]] ^ gfmul_b[st[13]] ^ gfmul_d[st[14]] ^ gfmul_9[st[15]]]; + dt[ 1] = inv_s_box[gfmul_9[st[12]] ^ gfmul_e[st[13]] ^ gfmul_b[st[14]] ^ gfmul_d[st[15]]]; + dt[ 6] = inv_s_box[gfmul_d[st[12]] ^ gfmul_9[st[13]] ^ gfmul_e[st[14]] ^ gfmul_b[st[15]]]; + dt[11] = inv_s_box[gfmul_b[st[12]] ^ gfmul_d[st[13]] ^ gfmul_9[st[14]] ^ gfmul_e[st[15]]]; + } + +#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) + +/* Set the cipher key for the pre-keyed version */ + +return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] ) +{ + uint_8t cc, rc, hi; + + switch( keylen ) + { + case 16: + case 128: + keylen = 16; + break; + case 24: + case 192: + keylen = 24; + break; + case 32: + case 256: + keylen = 32; + break; + default: + ctx->rnd = 0; + return (return_type) -1; + } + block_copy(ctx->ksch, key, keylen); + hi = (keylen + 28) << 2; + ctx->rnd = (hi >> 4) - 1; + for( cc = keylen, rc = 1; cc < hi; cc += 4 ) + { uint_8t tt, t0, t1, t2, t3; + + t0 = ctx->ksch[cc - 4]; + t1 = ctx->ksch[cc - 3]; + t2 = ctx->ksch[cc - 2]; + t3 = ctx->ksch[cc - 1]; + if( cc % keylen == 0 ) + { + tt = t0; + t0 = s_box[t1] ^ rc; + t1 = s_box[t2]; + t2 = s_box[t3]; + t3 = s_box[tt]; + rc = f2(rc); + } + else if( keylen > 24 && cc % keylen == 16 ) + { + t0 = s_box[t0]; + t1 = s_box[t1]; + t2 = s_box[t2]; + t3 = s_box[t3]; + } + tt = cc - keylen; + ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0; + ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1; + ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2; + ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3; + } + return 0; +} + +#endif + +#if defined( AES_ENC_PREKEYED ) + +/* Encrypt a single block of 16 bytes */ + +return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) +{ + if( ctx->rnd ) + { + uint_8t s1[N_BLOCK], r; + copy_and_key( s1, in, ctx->ksch ); + + for( r = 1 ; r < ctx->rnd ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns( s1 ); + add_round_key( s1, ctx->ksch + r * N_BLOCK); + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK); + } +#endif + shift_sub_rows( s1 ); + copy_and_key( out, s1, ctx->ksch + r * N_BLOCK ); + } + else + return (return_type) -1; + return 0; +} + +#endif + +#if defined( AES_DEC_PREKEYED ) + +/* Decrypt a single block of 16 bytes */ + +return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] ) +{ + if( ctx->rnd ) + { + uint_8t s1[N_BLOCK], r; + copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK ); + inv_shift_sub_rows( s1 ); + + for( r = ctx->rnd ; --r ; ) +#if defined( VERSION_1 ) + { + add_round_key( s1, ctx->ksch + r * N_BLOCK ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + copy_and_key( out, s1, ctx->ksch ); + } + else + return (return_type) -1; + return 0; +} + +#endif + +#if defined( AES_ENC_128_OTFK ) + +/* The 'on the fly' encryption key update for for 128 bit keys */ + +static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + k[0] ^= s_box[k[13]] ^ *rc; + k[1] ^= s_box[k[14]]; + k[2] ^= s_box[k[15]]; + k[3] ^= s_box[k[12]]; + *rc = f2( *rc ); + + for(cc = 4; cc < 16; cc += 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } +} + +/* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ + +void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) +{ uint_8t s1[N_BLOCK], r, rc = 1; + + if(o_key != key) + block16_copy( o_key, key ); + copy_and_key( s1, in, o_key ); + + for( r = 1 ; r < 10 ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns( s1 ); + update_encrypt_key_128( o_key, &rc ); + add_round_key( s1, o_key ); + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + update_encrypt_key_128( o_key, &rc ); + copy_and_key( s1, s2, o_key ); + } +#endif + + shift_sub_rows( s1 ); + update_encrypt_key_128( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_DEC_128_OTFK ) + +/* The 'on the fly' decryption key update for for 128 bit keys */ + +static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + for( cc = 12; cc > 0; cc -= 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + *rc = d2(*rc); + k[0] ^= s_box[k[13]] ^ *rc; + k[1] ^= s_box[k[14]]; + k[2] ^= s_box[k[15]]; + k[3] ^= s_box[k[12]]; +} + +/* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */ + +void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 0x6c; + if(o_key != key) + block16_copy( o_key, key ); + + copy_and_key( s1, in, o_key ); + inv_shift_sub_rows( s1 ); + + for( r = 10 ; --r ; ) +#if defined( VERSION_1 ) + { + update_decrypt_key_128( o_key, &rc ); + add_round_key( s1, o_key ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + update_decrypt_key_128( o_key, &rc ); + copy_and_key( s2, s1, o_key ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + update_decrypt_key_128( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_ENC_256_OTFK ) + +/* The 'on the fly' encryption key update for for 256 bit keys */ + +static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + k[0] ^= s_box[k[29]] ^ *rc; + k[1] ^= s_box[k[30]]; + k[2] ^= s_box[k[31]]; + k[3] ^= s_box[k[28]]; + *rc = f2( *rc ); + + for(cc = 4; cc < 16; cc += 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + k[16] ^= s_box[k[12]]; + k[17] ^= s_box[k[13]]; + k[18] ^= s_box[k[14]]; + k[19] ^= s_box[k[15]]; + + for( cc = 20; cc < 32; cc += 4 ) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } +} + +/* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */ + +void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 1; + if(o_key != key) + { + block16_copy( o_key, key ); + block16_copy( o_key + 16, key + 16 ); + } + copy_and_key( s1, in, o_key ); + + for( r = 1 ; r < 14 ; ++r ) +#if defined( VERSION_1 ) + { + mix_sub_columns(s1); + if( r & 1 ) + add_round_key( s1, o_key + 16 ); + else + { + update_encrypt_key_256( o_key, &rc ); + add_round_key( s1, o_key ); + } + } +#else + { uint_8t s2[N_BLOCK]; + mix_sub_columns( s2, s1 ); + if( r & 1 ) + copy_and_key( s1, s2, o_key + 16 ); + else + { + update_encrypt_key_256( o_key, &rc ); + copy_and_key( s1, s2, o_key ); + } + } +#endif + + shift_sub_rows( s1 ); + update_encrypt_key_256( o_key, &rc ); + copy_and_key( out, s1, o_key ); +} + +#endif + +#if defined( AES_DEC_256_OTFK ) + +/* The 'on the fly' encryption key update for for 256 bit keys */ + +static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc ) +{ uint_8t cc; + + for(cc = 28; cc > 16; cc -= 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + k[16] ^= s_box[k[12]]; + k[17] ^= s_box[k[13]]; + k[18] ^= s_box[k[14]]; + k[19] ^= s_box[k[15]]; + + for(cc = 12; cc > 0; cc -= 4) + { + k[cc + 0] ^= k[cc - 4]; + k[cc + 1] ^= k[cc - 3]; + k[cc + 2] ^= k[cc - 2]; + k[cc + 3] ^= k[cc - 1]; + } + + *rc = d2(*rc); + k[0] ^= s_box[k[29]] ^ *rc; + k[1] ^= s_box[k[30]]; + k[2] ^= s_box[k[31]]; + k[3] ^= s_box[k[28]]; +} + +/* Decrypt a single block of 16 bytes with 'on the fly' + 256 bit keying +*/ +void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] ) +{ + uint_8t s1[N_BLOCK], r, rc = 0x80; + + if(o_key != key) + { + block16_copy( o_key, key ); + block16_copy( o_key + 16, key + 16 ); + } + + copy_and_key( s1, in, o_key ); + inv_shift_sub_rows( s1 ); + + for( r = 14 ; --r ; ) +#if defined( VERSION_1 ) + { + if( ( r & 1 ) ) + { + update_decrypt_key_256( o_key, &rc ); + add_round_key( s1, o_key + 16 ); + } + else + add_round_key( s1, o_key ); + inv_mix_sub_columns( s1 ); + } +#else + { uint_8t s2[N_BLOCK]; + if( ( r & 1 ) ) + { + update_decrypt_key_256( o_key, &rc ); + copy_and_key( s2, s1, o_key + 16 ); + } + else + copy_and_key( s2, s1, o_key ); + inv_mix_sub_columns( s1, s2 ); + } +#endif + copy_and_key( out, s1, o_key ); +} + +#endif diff --git a/Crypto/AesSmall.h b/Crypto/AesSmall.h new file mode 100644 index 0000000..ebeb24e --- /dev/null +++ b/Crypto/AesSmall.h @@ -0,0 +1,169 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + This is an AES implementation that uses only 8-bit byte operations on the + cipher state. + */ + +#ifndef AES_H +#define AES_H + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* This provides speed optimisation opportunities if 32-bit word + operations are available +*/ +#if 1 +# define HAVE_UINT_32T +#endif + +#if 1 +# define AES_ENC_PREKEYED /* AES encryption with a precomputed key schedule */ +#endif +#if 1 +# define AES_DEC_PREKEYED /* AES decryption with a precomputed key schedule */ +#endif +#if 0 +# define AES_ENC_128_OTFK /* AES encryption with 'on the fly' 128 bit keying */ +#endif +#if 0 +# define AES_DEC_128_OTFK /* AES decryption with 'on the fly' 128 bit keying */ +#endif +#if 0 +# define AES_ENC_256_OTFK /* AES encryption with 'on the fly' 256 bit keying */ +#endif +#if 0 +# define AES_DEC_256_OTFK /* AES decryption with 'on the fly' 256 bit keying */ +#endif + +#define N_ROW 4 +#define N_COL 4 +#define N_BLOCK (N_ROW * N_COL) +#define N_MAX_ROUNDS 14 + +typedef unsigned char uint_8t; + +typedef uint_8t return_type; +typedef uint_8t length_type; +typedef uint_8t uint_type; + +typedef unsigned char uint_8t; + +typedef struct +{ uint_8t ksch[(N_MAX_ROUNDS + 1) * N_BLOCK]; + uint_8t rnd; +} aes_context; + +/* The following calls are for a precomputed key schedule + + NOTE: If the length_type used for the key length is an + unsigned 8-bit character, a key length of 256 bits must + be entered as a length in bytes (valid inputs are hence + 128, 192, 16, 24 and 32). +*/ + +#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED ) + +return_type aes_set_key( const unsigned char key[], + length_type keylen, + aes_context ctx[1] ); +#endif + +#if defined( AES_ENC_PREKEYED ) + +return_type aes_encrypt( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const aes_context ctx[1] ); +#endif + +#if defined( AES_DEC_PREKEYED ) + +return_type aes_decrypt( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const aes_context ctx[1] ); +#endif + +/* The following calls are for 'on the fly' keying. In this case the + encryption and decryption keys are different. + + The encryption subroutines take a key in an array of bytes in + key[L] where L is 16, 24 or 32 bytes for key lengths of 128, + 192, and 256 bits respectively. They then encrypts the input + data, in[] with this key and put the reult in the output array + out[]. In addition, the second key array, o_key[L], is used + to output the key that is needed by the decryption subroutine + to reverse the encryption operation. The two key arrays can + be the same array but in this case the original key will be + overwritten. + + In the same way, the decryption subroutines output keys that + can be used to reverse their effect when used for encryption. + + Only 128 and 256 bit keys are supported in these 'on the fly' + modes. +*/ + +#if defined( AES_ENC_128_OTFK ) +void aes_encrypt_128( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], + uint_8t o_key[N_BLOCK] ); +#endif + +#if defined( AES_DEC_128_OTFK ) +void aes_decrypt_128( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[N_BLOCK], + unsigned char o_key[N_BLOCK] ); +#endif + +#if defined( AES_ENC_256_OTFK ) +void aes_encrypt_256( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], + unsigned char o_key[2 * N_BLOCK] ); +#endif + +#if defined( AES_DEC_256_OTFK ) +void aes_decrypt_256( const unsigned char in[N_BLOCK], + unsigned char out[N_BLOCK], + const unsigned char key[2 * N_BLOCK], + unsigned char o_key[2 * N_BLOCK] ); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Crypto/AesSmall_x86.asm b/Crypto/AesSmall_x86.asm new file mode 100644 index 0000000..fe7dc47 --- /dev/null +++ b/Crypto/AesSmall_x86.asm @@ -0,0 +1,1444 @@ + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h +; and the same define to be set here as well. If AES_V2C is set this file +; requires the C files aeskey.c and aestab.c for support. + +; An AES implementation for x86 processors using the YASM (or NASM) assembler. +; This is a full assembler implementation covering encryption, decryption and +; key scheduling. It uses 2k bytes of tables but its encryption and decryption +; performance is very close to that obtained using large tables. Key schedule +; expansion is slower for both encryption and decryption but this is likely to +; be offset by the much smaller load that this version places on the processor +; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of +; the design of the AES round function used here. +; +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. The ebx, esi, edi and ebp registers are +; preserved across calls but eax, ecx and edx and the artihmetic status flags +; are not. Although this is a full assembler implementation, it can be used +; in conjunction with my C code which provides faster key scheduling using +; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C +; defined. It is also important that the defines below match those used in the +; C code. This code uses the VC++ register saving conentions; if it is used +; with another compiler, conventions for using and saving registers may need +; to be checked (and calling conventions). The YASM command line for the VC++ +; custom build step is: +; +; yasm -Xvc -f win32 -D -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" +; +; For the cryptlib build this is (pcg): +; +; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm +; +; where is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are: +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. + +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +; +; Adapted for TrueCrypt: +; - All tables generated at run-time +; - Adapted for 16-bit environment +; + +CPU 386 +USE16 +SEGMENT _TEXT PUBLIC CLASS=CODE USE16 +SEGMENT _DATA PUBLIC CLASS=DATA USE16 + +GROUP DGROUP _TEXT _DATA + +extern _aes_dec_tab ; Aestab.c +extern _aes_enc_tab + +; %define DLL_EXPORT + +; The size of the code can be reduced by using functions for the encryption +; and decryption rounds in place of macro expansion + +%define REDUCE_CODE_SIZE + +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +; %define AES_REV_DKS ; define if key decryption schedule is reversed + +%ifndef ASM_X86_V2C +%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed +%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed +%endif + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +; These macros implement stack based local variables + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +%ifdef REDUCE_CODE_SIZE + %macro mf_call 1 + call %1 + %endmacro +%else + %macro mf_call 1 + %1 + %endmacro +%endif + +; the DLL has to implement the _stdcall calling interface on return +; In this case we have to take our parameters (3 4-byte pointers) +; off the stack + +%define parms 12 + +%macro do_name 1-2 parms +%ifndef DLL_EXPORT + global %1 +%1: +%else + global %1@%2 + export %1@%2 +%1@%2: +%endif +%endmacro + +%macro do_call 1-2 parms +%ifndef DLL_EXPORT + call %1 + add esp,%2 +%else + call %1@%2 +%endif +%endmacro + +%macro do_exit 0-1 parms +%ifdef DLL_EXPORT + ret %1 +%else + ret +%endif +%endmacro + +; finite field multiplies by {02}, {04} and {08} + +%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +; finite field multiplies required in table generation + +%define f3(x) (f2(x) ^ x) +%define f9(x) (f8(x) ^ x) +%define fb(x) (f8(x) ^ f2(x) ^ x) +%define fd(x) (f8(x) ^ f4(x) ^ x) +%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +%define etab_0(x) [_aes_enc_tab+4+8*x] +%define etab_1(x) [_aes_enc_tab+3+8*x] +%define etab_2(x) [_aes_enc_tab+2+8*x] +%define etab_3(x) [_aes_enc_tab+1+8*x] +%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx +%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00 + +%define btab_0(x) [_aes_enc_tab+6+8*x] +%define btab_1(x) [_aes_enc_tab+5+8*x] +%define btab_2(x) [_aes_enc_tab+4+8*x] +%define btab_3(x) [_aes_enc_tab+3+8*x] + +; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the +; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. +; +; Input: +; +; EAX column[0] +; EBX column[1] +; ECX column[2] +; EDX column[3] +; ESI column key[round][2] +; EDI column key[round][3] +; EBP scratch +; +; Output: +; +; EBP column[0] unkeyed +; EBX column[1] unkeyed +; ESI column[2] keyed +; EDI column[3] keyed +; EAX scratch +; ECX scratch +; EDX scratch + +%macro rnd_fun 2 + + rol ebx,16 + %1 esi, cl, 0, ebp + %1 esi, dh, 1, ebp + %1 esi, bh, 3, ebp + %1 edi, dl, 0, ebp + %1 edi, ah, 1, ebp + %1 edi, bl, 2, ebp + %2 ebp, al, 0, ebp + shr ebx,16 + and eax,0xffff0000 + or eax,ebx + shr edx,16 + %1 ebp, ah, 1, ebx + %1 ebp, dh, 3, ebx + %2 ebx, dl, 2, ebx + %1 ebx, ch, 1, edx + %1 ebx, al, 0, edx + shr eax,16 + shr ecx,16 + %1 ebp, cl, 2, edx + %1 edi, ch, 3, edx + %1 esi, al, 2, edx + %1 ebx, ah, 3, edx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro nr_xor 4 + movzx %4,%2 + xor %1,etab_%3(%4) +%endmacro + +%macro nr_mov 4 + movzx %4,%2 + mov %1,etab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%if 1 + + %macro lr_xor 4 + movzx %4,%2 + movzx %4,etab_b(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + movzx %1,etab_b(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%else ; less effective but worth leaving as an option + + %macro lr_xor 4 + movzx %4,%2 + mov %4,btab_%3(%4) + and %4,0x000000ff << 8 * %3 + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + mov %1,btab_%3(%4) + and %1,0x000000ff << 8 * %3 + %endmacro + +%endif + +; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions + +%ifdef REDUCE_CODE_SIZE + +l3s_col: + movzx ecx,al ; in eax + movzx ecx, etab_b(ecx) ; out eax + xor edx,ecx ; scratch ecx,edx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,8 + xor edx,ecx + shr eax,16 + movzx ecx,al + movzx ecx, etab_b(ecx) + shl ecx,16 + xor edx,ecx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,24 + xor edx,ecx + mov eax,edx + ret + +%else + +%macro l3s_col 0 + + movzx ecx,al ; in eax + movzx ecx, etab_b(ecx) ; out eax + xor edx,ecx ; scratch ecx,edx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,8 + xor edx,ecx + shr eax,16 + movzx ecx,al + movzx ecx, etab_b(ecx) + shl ecx,16 + xor edx,ecx + movzx ecx,ah + movzx ecx, etab_b(ecx) + shl ecx,24 + xor edx,ecx + mov eax,edx + +%endmacro + +%endif + +; offsets to parameters + +in_blk equ 2 ; input byte array address parameter +out_blk equ 4 ; output byte array address parameter +ctx equ 6 ; AES context structure +stk_spc equ 20 ; stack space + +%ifdef ENCRYPTION + +; %define ENCRYPTION_TABLE + +%ifdef REDUCE_CODE_SIZE + +enc_round: + sub sp, 2 + add ebp,16 + save 1,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,1 + xor eax,[ebp] + xor ebx,[ebp+4] + add sp, 2 + ret + +%else + +%macro enc_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%endif + +%macro enc_last_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun lr_xor, lr_mov + + mov eax,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section _TEXT + +; AES Encryption Subroutine + + do_name _aes_encrypt,12 + + mov ax, sp + movzx esp, ax + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + + movzx esi,word [esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + + movzx ebp,word [esp+ctx+stk_spc] ; key pointer + movzx edi,byte [ebp+4*KS_LENGTH] + xor eax,[ebp ] + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + +%ifndef AES_256 + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 +%endif + +.1: mf_call enc_round + mf_call enc_round +.2: mf_call enc_round + mf_call enc_round +.3: mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + mf_call enc_round + enc_last_round + + movzx edx,word [esp+out_blk+stk_spc] + mov [edx],eax + mov [edx+4],ebx + mov [edx+8],esi + mov [edx+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit 12 + +%endif + +%macro f_key 2 + + push ecx + push edx + mov edx,esi + ror eax,8 + mf_call l3s_col + mov esi,eax + pop edx + pop ecx + xor esi,rc_val + + mov [ebp+%1*%2],esi + xor edi,esi + mov [ebp+%1*%2+4],edi + xor ecx,edi + mov [ebp+%1*%2+8],ecx + xor edx,ecx + mov [ebp+%1*%2+12],edx + mov eax,edx + +%if %2 == 24 + +%if %1 < 7 + xor eax,[ebp+%1*%2+16-%2] + mov [ebp+%1*%2+16],eax + xor eax,[ebp+%1*%2+20-%2] + mov [ebp+%1*%2+20],eax +%endif + +%elif %2 == 32 + +%if %1 < 6 + push ecx + push edx + mov edx,[ebp+%1*%2+16-%2] + mf_call l3s_col + pop edx + pop ecx + mov [ebp+%1*%2+16],eax + xor eax,[ebp+%1*%2+20-%2] + mov [ebp+%1*%2+20],eax + xor eax,[ebp+%1*%2+24-%2] + mov [ebp+%1*%2+24],eax + xor eax,[ebp+%1*%2+28-%2] + mov [ebp+%1*%2+28],eax +%endif + +%endif + +%assign rc_val f2(rc_val) + +%endmacro + +%ifdef ENCRYPTION_KEY_SCHEDULE + +%ifdef AES_128 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key128,8 + + push ebp + push ebx + push esi + push edi + + mov ebp,[esp+24] + mov [ebp+4*KS_LENGTH],dword 10*16 + mov ebx,[esp+20] + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + add ebp,16 + mov eax,edx + + f_key 0,16 ; 11 * 4 = 44 unsigned longs + f_key 1,16 ; 4 + 4 * 10 generated = 44 + f_key 2,16 + f_key 3,16 + f_key 4,16 + f_key 5,16 + f_key 6,16 + f_key 7,16 + f_key 8,16 + f_key 9,16 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_192 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key192,8 + + push ebp + push ebx + push esi + push edi + + mov ebp,[esp+24] + mov [ebp+4*KS_LENGTH],dword 12 * 16 + mov ebx,[esp+20] + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + mov eax,[ebx+16] + mov [ebp+16],eax + mov eax,[ebx+20] + mov [ebp+20],eax + add ebp,24 + + f_key 0,24 ; 13 * 4 = 52 unsigned longs + f_key 1,24 ; 6 + 6 * 8 generated = 54 + f_key 2,24 + f_key 3,24 + f_key 4,24 + f_key 5,24 + f_key 6,24 + f_key 7,24 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_256 + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + +%assign rc_val 1 + + do_name _aes_encrypt_key256,8 + + mov ax, sp + movzx esp, ax + + push ebp + push ebx + push esi + push edi + + movzx ebp, word [esp+20] ; ks + mov [ebp+4*KS_LENGTH],dword 14 * 16 + movzx ebx, word [esp+18] ; key + + mov esi,[ebx] + mov [ebp],esi + mov edi,[ebx+4] + mov [ebp+4],edi + mov ecx,[ebx+8] + mov [ebp+8],ecx + mov edx,[ebx+12] + mov [ebp+12],edx + mov eax,[ebx+16] + mov [ebp+16],eax + mov eax,[ebx+20] + mov [ebp+20],eax + mov eax,[ebx+24] + mov [ebp+24],eax + mov eax,[ebx+28] + mov [ebp+28],eax + add ebp,32 + + f_key 0,32 ; 15 * 4 = 60 unsigned longs + f_key 1,32 ; 8 + 8 * 7 generated = 64 + f_key 2,32 + f_key 3,32 + f_key 4,32 + f_key 5,32 + f_key 6,32 + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%endif + +%ifdef AES_VAR + +%ifndef ENCRYPTION_TABLE +; %define ENCRYPTION_TABLE +%endif + + do_name _aes_encrypt_key,12 + + mov ecx,[esp+4] + mov eax,[esp+8] + mov edx,[esp+12] + push edx + push ecx + + cmp eax,16 + je .1 + cmp eax,128 + je .1 + + cmp eax,24 + je .2 + cmp eax,192 + je .2 + + cmp eax,32 + je .3 + cmp eax,256 + je .3 + mov eax,-1 + add esp,8 + do_exit 12 + +.1: do_call _aes_encrypt_key128,8 + do_exit 12 +.2: do_call _aes_encrypt_key192,8 + do_exit 12 +.3: do_call _aes_encrypt_key256,8 + do_exit 12 + +%endif + +%endif + +%ifdef ENCRYPTION_TABLE + +; S-box data - 256 entries + + section _DATA + +%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x) + +_aes_enc_tab: + db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5) + db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76) + db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0) + db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0) + db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc) + db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15) + db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a) + db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75) + db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0) + db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84) + db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b) + db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf) + db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85) + db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8) + db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5) + db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2) + db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17) + db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73) + db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88) + db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb) + db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c) + db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79) + db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9) + db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08) + db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6) + db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a) + db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e) + db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e) + db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94) + db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf) + db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68) + db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16) + +%endif + +%ifdef DECRYPTION + +; %define DECRYPTION_TABLE + +%define dtab_0(x) [_aes_dec_tab+ 8*x] +%define dtab_1(x) [_aes_dec_tab+3+8*x] +%define dtab_2(x) [_aes_dec_tab+2+8*x] +%define dtab_3(x) [_aes_dec_tab+1+8*x] +%define dtab_x(x) byte [_aes_dec_tab+7+8*x] + +%macro irn_fun 2 + + rol eax,16 + %1 esi, cl, 0, ebp + %1 esi, bh, 1, ebp + %1 esi, al, 2, ebp + %1 edi, dl, 0, ebp + %1 edi, ch, 1, ebp + %1 edi, ah, 3, ebp + %2 ebp, bl, 0, ebp + shr eax,16 + and ebx,0xffff0000 + or ebx,eax + shr ecx,16 + %1 ebp, bh, 1, eax + %1 ebp, ch, 3, eax + %2 eax, cl, 2, ecx + %1 eax, bl, 0, ecx + %1 eax, dh, 1, ecx + shr ebx,16 + shr edx,16 + %1 esi, dh, 3, ecx + %1 ebp, dl, 2, ecx + %1 eax, bh, 3, ecx + %1 edi, bl, 2, ecx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro ni_xor 4 + movzx %4,%2 + xor %1,dtab_%3(%4) +%endmacro + +%macro ni_mov 4 + movzx %4,%2 + mov %1,dtab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%macro li_xor 4 + movzx %4,%2 + movzx %4,dtab_x(%4) +%if %3 != 0 + shl %4,8*%3 +%endif + xor %1,%4 +%endmacro + +%macro li_mov 4 + movzx %4,%2 + movzx %1,dtab_x(%4) +%if %3 != 0 + shl %1,8*%3 +%endif +%endmacro + +%ifdef REDUCE_CODE_SIZE + +dec_round: + sub sp, 2 +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 1,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,1 + xor eax,[ebp] + xor ebx,[ebp+4] + add sp, 2 + ret + +%else + +%macro dec_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%endif + +%macro dec_last_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun li_xor, li_mov + + mov ebx,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section _TEXT + +; AES Decryption Subroutine + + do_name _aes_decrypt,12 + + mov ax, sp + movzx esp, ax + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + +; input four columns and xor in first round key + + movzx esi,word [esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + lea esi,[esi+16] + + movzx ebp, word [esp+ctx+stk_spc] ; key pointer + movzx edi,byte[ebp+4*KS_LENGTH] +%ifndef AES_REV_DKS ; if decryption key schedule is not reversed + lea ebp,[ebp+edi] ; we have to access it from the top down +%endif + xor eax,[ebp ] ; key schedule + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + +%ifndef AES_256 + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 +%endif + +.1: mf_call dec_round + mf_call dec_round +.2: mf_call dec_round + mf_call dec_round +.3: mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + mf_call dec_round + dec_last_round + +; move final values to the output array. + + movzx ebp,word [esp+out_blk+stk_spc] + mov [ebp],eax + mov [ebp+4],ebx + mov [ebp+8],esi + mov [ebp+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit 12 + +%endif + +%ifdef REDUCE_CODE_SIZE + +inv_mix_col: + movzx ecx,dl ; input eax, edx + movzx ecx,etab_b(ecx) ; output eax + mov eax,dtab_0(ecx) ; used ecx + movzx ecx,dh + shr edx,16 + movzx ecx,etab_b(ecx) + xor eax,dtab_1(ecx) + movzx ecx,dl + movzx ecx,etab_b(ecx) + xor eax,dtab_2(ecx) + movzx ecx,dh + movzx ecx,etab_b(ecx) + xor eax,dtab_3(ecx) + ret + +%else + +%macro inv_mix_col 0 + + movzx ecx,dl ; input eax, edx + movzx ecx,etab_b(ecx) ; output eax + mov eax,dtab_0(ecx) ; used ecx + movzx ecx,dh + shr edx,16 + movzx ecx,etab_b(ecx) + xor eax,dtab_1(ecx) + movzx ecx,dl + movzx ecx,etab_b(ecx) + xor eax,dtab_2(ecx) + movzx ecx,dh + movzx ecx,etab_b(ecx) + xor eax,dtab_3(ecx) + +%endmacro + +%endif + +%ifdef DECRYPTION_KEY_SCHEDULE + +%ifdef AES_128 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key128,8 + + push ebp + push ebx + push esi + push edi + mov eax,[esp+24] ; context + mov edx,[esp+20] ; key + push eax + push edx + do_call _aes_encrypt_key128,8 ; generate expanded encryption key + mov eax,10*16 + mov esi,[esp+24] ; pointer to first round key + lea edi,[esi+eax] ; pointer to last round key + add esi,32 + ; the inverse mix column transformation + mov edx,[esi-16] ; needs to be applied to all round keys + mf_call inv_mix_col ; except first and last. Hence start by + mov [esi-16],eax ; transforming the four sub-keys in the + mov edx,[esi-12] ; second round key + mf_call inv_mix_col + mov [esi-12],eax ; transformations for subsequent rounds + mov edx,[esi-8] ; can then be made more efficient by + mf_call inv_mix_col ; noting that for three of the four sub-keys + mov [esi-8],eax ; in the encryption round key ek[r]: + mov edx,[esi-4] ; + mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n] + mov [esi-4],eax ; + ; where n is 1..3. Hence the corresponding +.0: mov edx,[esi] ; subkeys in the decryption round key dk[r] + mf_call inv_mix_col ; also obey since inv_mix_col is linear in + mov [esi],eax ; GF(256): + xor eax,[esi-12] ; + mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] + xor eax,[esi-8] ; + mov [esi+8],eax ; So we only need one inverse mix column + xor eax,[esi-4] ; operation (n = 0) for each four word cycle + mov [esi+12],eax ; in the expanded key. + add esi,16 + cmp edi,esi + jg .0 + jmp dec_end + +%endif + +%ifdef AES_192 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key192,8 + + push ebp + push ebx + push esi + push edi + mov eax,[esp+24] ; context + mov edx,[esp+20] ; key + push eax + push edx + do_call _aes_encrypt_key192,8 ; generate expanded encryption key + mov eax,12*16 + mov esi,[esp+24] ; first round key + lea edi,[esi+eax] ; last round key + add esi,48 ; the first 6 words are the key, of + ; which the top 2 words are part of + mov edx,[esi-32] ; the second round key and hence + mf_call inv_mix_col ; need to be modified. After this we + mov [esi-32],eax ; need to do a further six values prior + mov edx,[esi-28] ; to using a more efficient technique + mf_call inv_mix_col ; based on: + mov [esi-28],eax ; + ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n] + mov edx,[esi-24] ; + mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion + mov [esi-24],eax ; cycle is now 6 words long + mov edx,[esi-20] + mf_call inv_mix_col + mov [esi-20],eax + mov edx,[esi-16] + mf_call inv_mix_col + mov [esi-16],eax + mov edx,[esi-12] + mf_call inv_mix_col + mov [esi-12],eax + mov edx,[esi-8] + mf_call inv_mix_col + mov [esi-8],eax + mov edx,[esi-4] + mf_call inv_mix_col + mov [esi-4],eax + +.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words + mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified + mov [esi],eax ; using inv_mix_col. We have already done 8 + xor eax,[esi-20] ; of these so 36 are left - hence we need + mov [esi+4],eax ; exactly 6 loops of six here + xor eax,[esi-16] + mov [esi+8],eax + xor eax,[esi-12] + mov [esi+12],eax + xor eax,[esi-8] + mov [esi+16],eax + xor eax,[esi-4] + mov [esi+20],eax + add esi,24 + cmp edi,esi + jg .0 + jmp dec_end + +%endif + +%ifdef AES_256 + +%ifndef DECRYPTION_TABLE +; %define DECRYPTION_TABLE +%endif + + do_name _aes_decrypt_key256,8 + + mov ax, sp + movzx esp, ax + push ebp + push ebx + push esi + push edi + + movzx eax, word [esp+20] ; ks + movzx edx, word [esp+18] ; key + push ax + push dx + do_call _aes_encrypt_key256,4 ; generate expanded encryption key + mov eax,14*16 + movzx esi, word [esp+20] ; ks + lea edi,[esi+eax] + add esi,64 + + mov edx,[esi-48] ; the primary key is 8 words, of which + mf_call inv_mix_col ; the top four require modification + mov [esi-48],eax + mov edx,[esi-44] + mf_call inv_mix_col + mov [esi-44],eax + mov edx,[esi-40] + mf_call inv_mix_col + mov [esi-40],eax + mov edx,[esi-36] + mf_call inv_mix_col + mov [esi-36],eax + + mov edx,[esi-32] ; the encryption key expansion cycle is + mf_call inv_mix_col ; now eight words long so we need to + mov [esi-32],eax ; start by doing one complete block + mov edx,[esi-28] + mf_call inv_mix_col + mov [esi-28],eax + mov edx,[esi-24] + mf_call inv_mix_col + mov [esi-24],eax + mov edx,[esi-20] + mf_call inv_mix_col + mov [esi-20],eax + mov edx,[esi-16] + mf_call inv_mix_col + mov [esi-16],eax + mov edx,[esi-12] + mf_call inv_mix_col + mov [esi-12],eax + mov edx,[esi-8] + mf_call inv_mix_col + mov [esi-8],eax + mov edx,[esi-4] + mf_call inv_mix_col + mov [esi-4],eax + +.0: mov edx,[esi] ; we can now speed up the remaining + mf_call inv_mix_col ; rounds by using the technique + mov [esi],eax ; outlined earlier. But note that + xor eax,[esi-28] ; there is one extra inverse mix + mov [esi+4],eax ; column operation as the 256 bit + xor eax,[esi-24] ; key has an extra non-linear step + mov [esi+8],eax ; for the midway element. + xor eax,[esi-20] + mov [esi+12],eax ; the expanded key is 15 * 4 = 60 + mov edx,[esi+16] ; 32-bit words of which 52 need to + mf_call inv_mix_col ; be modified. We have already done + mov [esi+16],eax ; 12 so 40 are left - which means + xor eax,[esi-12] ; that we need exactly 5 loops of 8 + mov [esi+20],eax + xor eax,[esi-8] + mov [esi+24],eax + xor eax,[esi-4] + mov [esi+28],eax + add esi,32 + cmp edi,esi + jg .0 + +%endif + +dec_end: + +%ifdef AES_REV_DKS + + movzx esi,word [esp+20] ; this reverses the order of the +.1: mov eax,[esi] ; round keys if required + mov ebx,[esi+4] + mov ebp,[edi] + mov edx,[edi+4] + mov [esi],ebp + mov [esi+4],edx + mov [edi],eax + mov [edi+4],ebx + + mov eax,[esi+8] + mov ebx,[esi+12] + mov ebp,[edi+8] + mov edx,[edi+12] + mov [esi+8],ebp + mov [esi+12],edx + mov [edi+8],eax + mov [edi+12],ebx + + add esi,16 + sub edi,16 + cmp edi,esi + jg .1 + +%endif + + pop edi + pop esi + pop ebx + pop ebp + xor eax,eax + do_exit 8 + +%ifdef AES_VAR + + do_name _aes_decrypt_key,12 + + mov ecx,[esp+4] + mov eax,[esp+8] + mov edx,[esp+12] + push edx + push ecx + + cmp eax,16 + je .1 + cmp eax,128 + je .1 + + cmp eax,24 + je .2 + cmp eax,192 + je .2 + + cmp eax,32 + je .3 + cmp eax,256 + je .3 + mov eax,-1 + add esp,8 + do_exit 12 + +.1: do_call _aes_decrypt_key128,8 + do_exit 12 +.2: do_call _aes_decrypt_key192,8 + do_exit 12 +.3: do_call _aes_decrypt_key256,8 + do_exit 12 + +%endif + +%endif + +%ifdef DECRYPTION_TABLE + +; Inverse S-box data - 256 entries + + section _DATA + +%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x + +_aes_dec_tab: + db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38) + db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb) + db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87) + db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb) + db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d) + db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e) + db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2) + db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25) + db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16) + db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92) + db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda) + db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84) + db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a) + db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06) + db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02) + db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b) + db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea) + db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73) + db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85) + db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e) + db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89) + db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b) + db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20) + db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4) + db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31) + db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f) + db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d) + db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef) + db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0) + db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61) + db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26) + db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d) + +%endif diff --git a/Crypto/Aes_hw_cpu.asm b/Crypto/Aes_hw_cpu.asm new file mode 100644 index 0000000..64c3bad --- /dev/null +++ b/Crypto/Aes_hw_cpu.asm @@ -0,0 +1,330 @@ +; +; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved. +; +; Governed by the TrueCrypt License 3.0 the full text of which is contained in +; the file License.txt included in TrueCrypt binary and source code distribution +; packages. +; + + +%ifidn __BITS__, 16 + %define R e +%elifidn __BITS__, 32 + %define R e +%elifidn __BITS__, 64 + %define R r +%endif + + +%macro export_function 1-2 0 + + %ifdef MS_STDCALL + global %1@%2 + export _%1@%2 + %1@%2: + %elifidn __BITS__, 16 + global _%1 + _%1: + %else + global %1 + %1: + %endif + +%endmacro + + +%macro aes_function_entry 1 + + ; void (const byte *ks, byte *data); + + export_function %1, 8 + + %ifidn __BITS__, 32 + mov ecx, [esp + 4 + 4 * 0] + mov edx, [esp + 4 + 4 * 1] + %elifidn __BITS__, 64 + %ifnidn __OUTPUT_FORMAT__, win64 + mov rcx, rdi + mov rdx, rsi + %endif + %endif + + ; ecx/rcx = ks + ; edx/rdx = data + +%endmacro + + +%macro aes_function_exit 0 + + ; void (const byte *, byte *); + + %ifdef MS_STDCALL + ret 8 + %else + ret + %endif + +%endmacro + + +%macro push_xmm 2 + sub rsp, 16 * (%2 - %1 + 1) + + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu [rsp + 16 * stackoffset], xmm%[regnumber] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep +%endmacro + + +%macro pop_xmm 2 + %assign stackoffset 0 + %assign regnumber %1 + + %rep (%2 - %1 + 1) + movdqu xmm%[regnumber], [rsp + 16 * stackoffset] + + %assign stackoffset stackoffset+1 + %assign regnumber regnumber+1 + %endrep + + add rsp, 16 * (%2 - %1 + 1) +%endmacro + + +%macro aes_hw_cpu 2 + %define OPERATION %1 + %define BLOCK_COUNT %2 + + ; Load data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu xmm%[block], [%[R]dx + 16 * (block - 1)] + %assign block block+1 + %endrep + + ; Encrypt/decrypt data blocks + %assign round 0 + %rep 15 + movdqu xmm0, [%[R]cx + 16 * round] + + %assign block 1 + %rep BLOCK_COUNT + + %if round = 0 + pxor xmm%[block], xmm0 + %else + %if round < 14 + aes%[OPERATION] xmm%[block], xmm0 + %else + aes%[OPERATION]last xmm%[block], xmm0 + %endif + %endif + + %assign block block+1 + %endrep + + %assign round round+1 + %endrep + + ; Store data blocks + %assign block 1 + %rep BLOCK_COUNT + movdqu [%[R]dx + 16 * (block - 1)], xmm%[block] + %assign block block+1 + %endrep + + %undef OPERATION + %undef BLOCK_COUNT +%endmacro + + +%macro aes_hw_cpu_32_blocks 1 + %define OPERATION_32_BLOCKS %1 + + %ifidn __BITS__, 64 + %define MAX_REG_BLOCK_COUNT 15 + %else + %define MAX_REG_BLOCK_COUNT 7 + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + push_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + mov eax, 32 / MAX_REG_BLOCK_COUNT + .1: + aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT + + add %[R]dx, 16 * MAX_REG_BLOCK_COUNT + dec eax + jnz .1 + + %if (32 % MAX_REG_BLOCK_COUNT) != 0 + aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT) + %endif + + %ifidn __OUTPUT_FORMAT__, win64 + %if MAX_REG_BLOCK_COUNT > 5 + pop_xmm 6, MAX_REG_BLOCK_COUNT + %endif + %endif + + %undef OPERATION_32_BLOCKS + %undef MAX_REG_BLOCK_COUNT +%endmacro + + +%ifidn __BITS__, 16 + + USE16 + SEGMENT _TEXT PUBLIC CLASS=CODE USE16 + SEGMENT _DATA PUBLIC CLASS=DATA USE16 + GROUP DGROUP _TEXT _DATA + SECTION _TEXT + +%else + + SECTION .text + +%endif + + +; void aes_hw_cpu_enable_sse (); + + export_function aes_hw_cpu_enable_sse + mov %[R]ax, cr4 + or ax, 1 << 9 + mov cr4, %[R]ax + ret + + +%ifidn __BITS__, 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and al, 1 + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_decrypt + mov ax, -16 + jmp aes_hw_cpu_encrypt_decrypt + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + export_function aes_hw_cpu_encrypt + mov ax, 16 + + aes_hw_cpu_encrypt_decrypt: + push bp + mov bp, sp + push di + push si + + mov si, [bp + 4] ; ks + mov di, [bp + 4 + 2] ; data + + movdqu xmm0, [si] + movdqu xmm1, [di] + + pxor xmm1, xmm0 + + mov cx, 13 + + .round1_13: + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt + + aesenc xmm1, xmm0 + jmp .2 + .decrypt: + aesdec xmm1, xmm0 + .2: + loop .round1_13 + + add si, ax + movdqu xmm0, [si] + + cmp ax, 0 + jl .decrypt_last + + aesenclast xmm1, xmm0 + jmp .3 + .decrypt_last: + aesdeclast xmm1, xmm0 + .3: + movdqu [di], xmm1 + + pop si + pop di + pop bp + ret + + +%else ; __BITS__ != 16 + + +; byte is_aes_hw_cpu_supported (); + + export_function is_aes_hw_cpu_supported + push %[R]bx + + mov eax, 1 + cpuid + mov eax, ecx + shr eax, 25 + and eax, 1 + + pop %[R]bx + ret + + +; void aes_hw_cpu_decrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt + aes_hw_cpu dec, 1 + aes_function_exit + + +; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_decrypt_32_blocks + aes_hw_cpu_32_blocks dec + aes_function_exit + + +; void aes_hw_cpu_encrypt (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt + aes_hw_cpu enc, 1 + aes_function_exit + + +; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); + + aes_function_entry aes_hw_cpu_encrypt_32_blocks + aes_hw_cpu_32_blocks enc + aes_function_exit + + +%endif ; __BITS__ != 16 diff --git a/Crypto/Aes_hw_cpu.h b/Crypto/Aes_hw_cpu.h new file mode 100644 index 0000000..b38032c --- /dev/null +++ b/Crypto/Aes_hw_cpu.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved. + + Governed by the TrueCrypt License 3.0 the full text of which is contained in + the file License.txt included in TrueCrypt binary and source code distribution + packages. +*/ + +#ifndef TC_HEADER_Crypto_Aes_Hw_Cpu +#define TC_HEADER_Crypto_Aes_Hw_Cpu + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +byte is_aes_hw_cpu_supported (); +void aes_hw_cpu_enable_sse (); +void aes_hw_cpu_decrypt (const byte *ks, byte *data); +void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data); +void aes_hw_cpu_encrypt (const byte *ks, byte *data); +void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data); + +#if defined(__cplusplus) +} +#endif + +#endif // TC_HEADER_Crypto_Aes_Hw_Cpu diff --git a/Crypto/Aes_x64.asm b/Crypto/Aes_x64.asm new file mode 100644 index 0000000..b29fdca --- /dev/null +++ b/Crypto/Aes_x64.asm @@ -0,0 +1,907 @@ + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; I am grateful to Dag Arne Osvik for many discussions of the techniques that +; can be used to optimise AES assembler code on AMD64/EM64T architectures. +; Some of the techniques used in this implementation are the result of +; suggestions made by him for which I am most grateful. + +; +; Adapted for TrueCrypt: +; - Compatibility with NASM +; + +; An AES implementation for AMD64 processors using the YASM assembler. This +; implemetation provides only encryption, decryption and hence requires key +; scheduling support in C. It uses 8k bytes of tables but its encryption and +; decryption performance is very close to that obtained using large tables. +; It can use either Windows or Gnu/Linux calling conventions, which are as +; follows: +; windows gnu/linux +; +; in_blk rcx rdi +; out_blk rdx rsi +; context (cx) r8 rdx +; +; preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15 +; registers rdi - on both +; +; destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11 +; registers - rdi on both +; +; The default convention is that for windows, the gnu/linux convention being +; used if __GNUC__ is defined. +; +; Define _SEH_ to include support for Win64 structured exception handling +; (this requires YASM version 0.6 or later). +; +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. It uses the Microsoft C AMD64 calling +; conventions in which the three parameters are placed in rcx, rdx and r8 +; respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved. +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define for the faster version using extra tables + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +%define r0 rax +%define r1 rdx +%define r2 rcx +%define r3 rbx +%define r4 rsi +%define r5 rdi +%define r6 rbp +%define r7 rsp + +%define raxd eax +%define rdxd edx +%define rcxd ecx +%define rbxd ebx +%define rsid esi +%define rdid edi +%define rbpd ebp +%define rspd esp + +%define raxb al +%define rdxb dl +%define rcxb cl +%define rbxb bl +%define rsib sil +%define rdib dil +%define rbpb bpl +%define rspb spl + +%define r0h ah +%define r1h dh +%define r2h ch +%define r3h bh + +%define r0d eax +%define r1d edx +%define r2d ecx +%define r3d ebx + +; finite field multiplies by {02}, {04} and {08} + +%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) + +; finite field multiplies required in table generation + +%define f3(x) (f2(x) ^ x) +%define f9(x) (f8(x) ^ x) +%define fb(x) (f8(x) ^ f2(x) ^ x) +%define fd(x) (f8(x) ^ f4(x) ^ x) +%define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +; macro for expanding S-box data + +%macro enc_vals 1 + db %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5) + db %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76) + db %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0) + db %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0) + db %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc) + db %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15) + db %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a) + db %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75) + db %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0) + db %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84) + db %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b) + db %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf) + db %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85) + db %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8) + db %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5) + db %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2) + db %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17) + db %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73) + db %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88) + db %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb) + db %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c) + db %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79) + db %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9) + db %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08) + db %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6) + db %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a) + db %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e) + db %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e) + db %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94) + db %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf) + db %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68) + db %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16) +%endmacro + +%macro dec_vals 1 + db %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38) + db %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb) + db %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87) + db %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb) + db %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d) + db %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e) + db %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2) + db %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25) + db %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16) + db %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92) + db %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda) + db %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84) + db %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a) + db %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06) + db %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02) + db %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b) + db %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea) + db %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73) + db %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85) + db %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e) + db %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89) + db %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b) + db %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20) + db %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4) + db %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31) + db %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f) + db %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d) + db %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef) + db %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0) + db %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61) + db %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26) + db %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d) +%endmacro + +%define u8(x) f2(x), x, x, f3(x), f2(x), x, x, f3(x) +%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x +%define w8(x) x, 0, 0, 0, x, 0, 0, 0 + +%define tptr rbp ; table pointer +%define kptr r8 ; key schedule pointer +%define fofs 128 ; adjust offset in key schedule to keep |disp| < 128 +%define fk_ref(x,y) [kptr-16*x+fofs+4*y] +%ifdef AES_REV_DKS +%define rofs 128 +%define ik_ref(x,y) [kptr-16*x+rofs+4*y] +%else +%define rofs -128 +%define ik_ref(x,y) [kptr+16*x+rofs+4*y] +%endif + +%define tab_0(x) [tptr+8*x] +%define tab_1(x) [tptr+8*x+3] +%define tab_2(x) [tptr+8*x+2] +%define tab_3(x) [tptr+8*x+1] +%define tab_f(x) byte [tptr+8*x+1] +%define tab_i(x) byte [tptr+8*x+7] +%define t_ref(x,r) tab_ %+ x(r) + +%macro ff_rnd 5 ; normal forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro fl_rnd 5 ; last forward round + add tptr, 2048 + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) +%endmacro + +%else + +%macro fl_rnd 5 ; last forward round + mov %1d, fk_ref(%5,0) + mov %2d, fk_ref(%5,1) + mov %3d, fk_ref(%5,2) + mov %4d, fk_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %1d, esi + rol edi, 8 + xor %4d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %2d, edi + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + xor %2d, esi + rol edi, 8 + xor %1d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %3d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %2d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %4d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %3d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(f,rsi) + movzx edi, t_ref(f,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %1d, edi +%endmacro + +%endif + +%macro ii_rnd 5 ; normal inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) + + mov eax,%1d + mov ebx,%2d + mov ecx,%3d + mov edx,%4d +%endmacro + +%ifdef LAST_ROUND_TABLES + +%macro il_rnd 5 ; last inverse round + add tptr, 2048 + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + shr eax, 16 + xor %1d, t_ref(0,rsi) + xor %2d, t_ref(1,rdi) + movzx esi, al + movzx edi, ah + xor %3d, t_ref(2,rsi) + xor %4d, t_ref(3,rdi) + + movzx esi, bl + movzx edi, bh + shr ebx, 16 + xor %2d, t_ref(0,rsi) + xor %3d, t_ref(1,rdi) + movzx esi, bl + movzx edi, bh + xor %4d, t_ref(2,rsi) + xor %1d, t_ref(3,rdi) + + movzx esi, cl + movzx edi, ch + shr ecx, 16 + xor %3d, t_ref(0,rsi) + xor %4d, t_ref(1,rdi) + movzx esi, cl + movzx edi, ch + xor %1d, t_ref(2,rsi) + xor %2d, t_ref(3,rdi) + + movzx esi, dl + movzx edi, dh + shr edx, 16 + xor %4d, t_ref(0,rsi) + xor %1d, t_ref(1,rdi) + movzx esi, dl + movzx edi, dh + xor %2d, t_ref(2,rsi) + xor %3d, t_ref(3,rdi) +%endmacro + +%else + +%macro il_rnd 5 ; last inverse round + mov %1d, ik_ref(%5,0) + mov %2d, ik_ref(%5,1) + mov %3d, ik_ref(%5,2) + mov %4d, ik_ref(%5,3) + + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr eax, 16 + xor %1d, esi + rol edi, 8 + xor %2d, edi + movzx esi, al + movzx edi, ah + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %3d, esi + xor %4d, edi + + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ebx, 16 + xor %2d, esi + rol edi, 8 + xor %3d, edi + movzx esi, bl + movzx edi, bh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %4d, esi + xor %1d, edi + + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr ecx, 16 + xor %3d, esi + rol edi, 8 + xor %4d, edi + movzx esi, cl + movzx edi, ch + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %1d, esi + xor %2d, edi + + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + shr edx, 16 + xor %4d, esi + rol edi, 8 + xor %1d, edi + movzx esi, dl + movzx edi, dh + movzx esi, t_ref(i,rsi) + movzx edi, t_ref(i,rdi) + rol esi, 16 + rol edi, 24 + xor %2d, esi + xor %3d, edi +%endmacro + +%endif + +%ifdef ENCRYPTION + + global aes_encrypt +%ifdef DLL_EXPORT + export aes_encrypt +%endif + + section .data align=64 + align 64 +enc_tab: + enc_vals u8 +%ifdef LAST_ROUND_TABLES + enc_vals w8 +%endif + + section .text align=16 + align 16 + +%ifdef _SEH_ +proc_frame aes_encrypt + alloc_stack 7*8 ; 7 to align stack to 16 bytes + save_reg rsi,4*8 + save_reg rdi,5*8 + save_reg rbx,1*8 + save_reg rbp,2*8 + save_reg r12,3*8 +end_prologue + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer +%else + aes_encrypt: + %ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context + %else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer + %endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 +%endif + + movzx esi, byte [kptr+4*KS_LENGTH] + lea tptr, [rel enc_tab] + sub kptr, fofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + + xor eax, [kptr+fofs] + xor ebx, [kptr+fofs+4] + xor ecx, [kptr+fofs+8] + xor edx, [kptr+fofs+12] + + lea kptr,[kptr+rsi] + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ff_rnd r9, r10, r11, r12, 13 + ff_rnd r9, r10, r11, r12, 12 +.2: ff_rnd r9, r10, r11, r12, 11 + ff_rnd r9, r10, r11, r12, 10 +.3: ff_rnd r9, r10, r11, r12, 9 + ff_rnd r9, r10, r11, r12, 8 + ff_rnd r9, r10, r11, r12, 7 + ff_rnd r9, r10, r11, r12, 6 + ff_rnd r9, r10, r11, r12, 5 + ff_rnd r9, r10, r11, r12, 4 + ff_rnd r9, r10, r11, r12, 3 + ff_rnd r9, r10, r11, r12, 2 + ff_rnd r9, r10, r11, r12, 1 + fl_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: + mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 + ret +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + %ifdef _SEH_ + add rsp, 7*8 + ret + endproc_frame + %else + add rsp, 6*8 + ret + %endif +%endif + +%endif + +%ifdef DECRYPTION + + global aes_decrypt +%ifdef DLL_EXPORT + export aes_decrypt +%endif + + section .data + align 64 +dec_tab: + dec_vals v8 +%ifdef LAST_ROUND_TABLES + dec_vals w8 +%endif + + section .text + align 16 + +%ifdef _SEH_ +proc_frame aes_decrypt + alloc_stack 7*8 ; 7 to align stack to 16 bytes + save_reg rsi,4*8 + save_reg rdi,5*8 + save_reg rbx,1*8 + save_reg rbp,2*8 + save_reg r12,3*8 +end_prologue + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer +%else + aes_decrypt: + %ifdef __GNUC__ + sub rsp, 4*8 ; gnu/linux binary interface + mov [rsp+0*8], rsi ; output pointer + mov r8, rdx ; context + %else + sub rsp, 6*8 ; windows binary interface + mov [rsp+4*8], rsi + mov [rsp+5*8], rdi + mov rdi, rcx ; input pointer + mov [rsp+0*8], rdx ; output pointer + %endif + mov [rsp+1*8], rbx ; input pointer in rdi + mov [rsp+2*8], rbp ; output pointer in [rsp] + mov [rsp+3*8], r12 ; context in r8 +%endif + + movzx esi,byte[kptr+4*KS_LENGTH] + lea tptr, [rel dec_tab] + sub kptr, rofs + + mov eax, [rdi+0*4] + mov ebx, [rdi+1*4] + mov ecx, [rdi+2*4] + mov edx, [rdi+3*4] + +%ifdef AES_REV_DKS + mov rdi, kptr + lea kptr,[kptr+rsi] +%else + lea rdi,[kptr+rsi] +%endif + + xor eax, [rdi+rofs] + xor ebx, [rdi+rofs+4] + xor ecx, [rdi+rofs+8] + xor edx, [rdi+rofs+12] + + cmp esi, 10*16 + je .3 + cmp esi, 12*16 + je .2 + cmp esi, 14*16 + je .1 + mov rax, -1 + jmp .4 + +.1: ii_rnd r9, r10, r11, r12, 13 + ii_rnd r9, r10, r11, r12, 12 +.2: ii_rnd r9, r10, r11, r12, 11 + ii_rnd r9, r10, r11, r12, 10 +.3: ii_rnd r9, r10, r11, r12, 9 + ii_rnd r9, r10, r11, r12, 8 + ii_rnd r9, r10, r11, r12, 7 + ii_rnd r9, r10, r11, r12, 6 + ii_rnd r9, r10, r11, r12, 5 + ii_rnd r9, r10, r11, r12, 4 + ii_rnd r9, r10, r11, r12, 3 + ii_rnd r9, r10, r11, r12, 2 + ii_rnd r9, r10, r11, r12, 1 + il_rnd r9, r10, r11, r12, 0 + + mov rbx, [rsp] + mov [rbx], r9d + mov [rbx+4], r10d + mov [rbx+8], r11d + mov [rbx+12], r12d + xor rax, rax +.4: mov rbx, [rsp+1*8] + mov rbp, [rsp+2*8] + mov r12, [rsp+3*8] +%ifdef __GNUC__ + add rsp, 4*8 + ret +%else + mov rsi, [rsp+4*8] + mov rdi, [rsp+5*8] + %ifdef _SEH_ + add rsp, 7*8 + ret + endproc_frame + %else + add rsp, 6*8 + ret + %endif +%endif + +%endif diff --git a/Crypto/Aes_x86.asm b/Crypto/Aes_x86.asm new file mode 100644 index 0000000..239da3e --- /dev/null +++ b/Crypto/Aes_x86.asm @@ -0,0 +1,646 @@ + +; --------------------------------------------------------------------------- +; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software is allowed (with or without +; changes) provided that: +; +; 1. source code distributions include the above copyright notice, this +; list of conditions and the following disclaimer; +; +; 2. binary distributions include the above copyright notice, this list +; of conditions and the following disclaimer in their documentation; +; +; 3. the name of the copyright holder is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explicit or implied warranties +; in respect of its properties, including, but not limited to, correctness +; and/or fitness for purpose. +; --------------------------------------------------------------------------- +; Issue 20/12/2007 +; +; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files +; aeskey.c and aestab.c for support. + +; +; Adapted for TrueCrypt: +; - Compatibility with NASM and GCC +; + +; An AES implementation for x86 processors using the YASM (or NASM) assembler. +; This is an assembler implementation that covers encryption and decryption +; only and is intended as a replacement of the C file aescrypt.c. It hence +; requires the file aeskey.c for keying and aestab.c for the AES tables. It +; employs full tables rather than compressed tables. + +; This code provides the standard AES block size (128 bits, 16 bytes) and the +; three standard AES key sizes (128, 192 and 256 bits). It has the same call +; interface as my C implementation. The ebx, esi, edi and ebp registers are +; preserved across calls but eax, ecx and edx and the artihmetic status flags +; are not. It is also important that the defines below match those used in the +; C code. This code uses the VC++ register saving conentions; if it is used +; with another compiler, conventions for using and saving registers may need to +; be checked (and calling conventions). The YASM command line for the VC++ +; custom build step is: +; +; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)" +; +; The calling intefaces are: +; +; AES_RETURN aes_encrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt(const unsigned char in_blk[], +; unsigned char out_blk[], const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; const aes_encrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_encrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; AES_RETURN aes_decrypt_key(const unsigned char key[], +; unsigned int len, const aes_decrypt_ctx cx[1]); +; +; where is 128, 102 or 256. In the last two calls the length can be in +; either bits or bytes. +; +; Comment in/out the following lines to obtain the desired subroutines. These +; selections MUST match those in the C header file aes.h + +; %define AES_128 ; define if AES with 128 bit keys is needed +; %define AES_192 ; define if AES with 192 bit keys is needed +%define AES_256 ; define if AES with 256 bit keys is needed +; %define AES_VAR ; define if a variable key size is needed +%define ENCRYPTION ; define if encryption is needed +%define DECRYPTION ; define if decryption is needed +%define AES_REV_DKS ; define if key decryption schedule is reversed +%define LAST_ROUND_TABLES ; define if tables are to be used for last round + +; offsets to parameters + +in_blk equ 4 ; input byte array address parameter +out_blk equ 8 ; output byte array address parameter +ctx equ 12 ; AES context structure +stk_spc equ 20 ; stack space +%define parms 12 ; parameter space on stack + +; The encryption key schedule has the following in memory layout where N is the +; number of rounds (10, 12 or 14): +; +; lo: | input key (round 0) | ; each round is four 32-bit words +; | encryption round 1 | +; | encryption round 2 | +; .... +; | encryption round N-1 | +; hi: | encryption round N | +; +; The decryption key schedule is normally set up so that it has the same +; layout as above by actually reversing the order of the encryption key +; schedule in memory (this happens when AES_REV_DKS is set): +; +; lo: | decryption round 0 | = | encryption round N | +; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] +; hi: | decryption round N | = | input key (round 0) | +; +; with rounds except the first and last modified using inv_mix_column() +; But if AES_REV_DKS is NOT set the order of keys is left as it is for +; encryption so that it has to be accessed in reverse when used for +; decryption (although the inverse mix column modifications are done) +; +; lo: | decryption round 0 | = | input key (round 0) | +; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] +; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] +; .... .... +; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] +; hi: | decryption round N | = | encryption round N | +; +; This layout is faster when the assembler key scheduling provided here +; is used. +; +; The DLL interface must use the _stdcall convention in which the number +; of bytes of parameter space is added after an @ to the sutine's name. +; We must also remove our parameters from the stack before return (see +; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version. + +;%define DLL_EXPORT + +; End of user defines + +%ifdef AES_VAR +%ifndef AES_128 +%define AES_128 +%endif +%ifndef AES_192 +%define AES_192 +%endif +%ifndef AES_256 +%define AES_256 +%endif +%endif + +%ifdef AES_VAR +%define KS_LENGTH 60 +%elifdef AES_256 +%define KS_LENGTH 60 +%elifdef AES_192 +%define KS_LENGTH 52 +%else +%define KS_LENGTH 44 +%endif + +; These macros implement stack based local variables + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +; the DLL has to implement the _stdcall calling interface on return +; In this case we have to take our parameters (3 4-byte pointers) +; off the stack + +%macro do_name 1-2 parms +%ifndef DLL_EXPORT + align 32 + global %1 +%1: +%else + align 32 + global %1@%2 + export _%1@%2 +%1@%2: +%endif +%endmacro + +%macro do_call 1-2 parms +%ifndef DLL_EXPORT + call %1 + add esp,%2 +%else + call %1@%2 +%endif +%endmacro + +%macro do_exit 0-1 parms +%ifdef DLL_EXPORT + ret %1 +%else + ret +%endif +%endmacro + +%ifdef ENCRYPTION + + extern t_fn + +%define etab_0(x) [t_fn+4*x] +%define etab_1(x) [t_fn+1024+4*x] +%define etab_2(x) [t_fn+2048+4*x] +%define etab_3(x) [t_fn+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_fl + +%define eltab_0(x) [t_fl+4*x] +%define eltab_1(x) [t_fl+1024+4*x] +%define eltab_2(x) [t_fl+2048+4*x] +%define eltab_3(x) [t_fl+3072+4*x] + +%else + +%define etab_b(x) byte [t_fn+3072+4*x] + +%endif + +; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the +; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX. +; +; Input: +; +; EAX column[0] +; EBX column[1] +; ECX column[2] +; EDX column[3] +; ESI column key[round][2] +; EDI column key[round][3] +; EBP scratch +; +; Output: +; +; EBP column[0] unkeyed +; EBX column[1] unkeyed +; ESI column[2] keyed +; EDI column[3] keyed +; EAX scratch +; ECX scratch +; EDX scratch + +%macro rnd_fun 2 + + rol ebx,16 + %1 esi, cl, 0, ebp + %1 esi, dh, 1, ebp + %1 esi, bh, 3, ebp + %1 edi, dl, 0, ebp + %1 edi, ah, 1, ebp + %1 edi, bl, 2, ebp + %2 ebp, al, 0, ebp + shr ebx,16 + and eax,0xffff0000 + or eax,ebx + shr edx,16 + %1 ebp, ah, 1, ebx + %1 ebp, dh, 3, ebx + %2 ebx, dl, 2, ebx + %1 ebx, ch, 1, edx + %1 ebx, al, 0, edx + shr eax,16 + shr ecx,16 + %1 ebp, cl, 2, edx + %1 edi, ch, 3, edx + %1 esi, al, 2, edx + %1 ebx, ah, 3, edx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro nr_xor 4 + movzx %4,%2 + xor %1,etab_%3(%4) +%endmacro + +%macro nr_mov 4 + movzx %4,%2 + mov %1,etab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + + %macro lr_xor 4 + movzx %4,%2 + xor %1,eltab_%3(%4) + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + mov %1,eltab_%3(%4) + %endmacro + +%else + + %macro lr_xor 4 + movzx %4,%2 + movzx %4,etab_b(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro lr_mov 4 + movzx %4,%2 + movzx %1,etab_b(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro enc_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun nr_xor, nr_mov + + mov eax,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro enc_last_round 0 + + add ebp,16 + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + rnd_fun lr_xor, lr_mov + + mov eax,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text align=32 + +; AES Encryption Subroutine + + do_name aes_encrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte [ebp+4*KS_LENGTH] + xor eax,[ebp ] + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: enc_round + enc_round +.2: enc_round + enc_round +.3: enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_round + enc_last_round + + mov edx,[esp+out_blk+stk_spc] + mov [edx],eax + mov [edx+4],ebx + mov [edx+8],esi + mov [edx+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif + +%ifdef DECRYPTION + + extern t_in + +%define dtab_0(x) [t_in+4*x] +%define dtab_1(x) [t_in+1024+4*x] +%define dtab_2(x) [t_in+2048+4*x] +%define dtab_3(x) [t_in+3072+4*x] + +%ifdef LAST_ROUND_TABLES + + extern t_il + +%define dltab_0(x) [t_il+4*x] +%define dltab_1(x) [t_il+1024+4*x] +%define dltab_2(x) [t_il+2048+4*x] +%define dltab_3(x) [t_il+3072+4*x] + +%else + + extern _t_ibox + +%define dtab_x(x) byte [_t_ibox+x] + +%endif + +%macro irn_fun 2 + + rol eax,16 + %1 esi, cl, 0, ebp + %1 esi, bh, 1, ebp + %1 esi, al, 2, ebp + %1 edi, dl, 0, ebp + %1 edi, ch, 1, ebp + %1 edi, ah, 3, ebp + %2 ebp, bl, 0, ebp + shr eax,16 + and ebx,0xffff0000 + or ebx,eax + shr ecx,16 + %1 ebp, bh, 1, eax + %1 ebp, ch, 3, eax + %2 eax, cl, 2, ecx + %1 eax, bl, 0, ecx + %1 eax, dh, 1, ecx + shr ebx,16 + shr edx,16 + %1 esi, dh, 3, ecx + %1 ebp, dl, 2, ecx + %1 eax, bh, 3, ecx + %1 edi, bl, 2, ecx + +%endmacro + +; Basic MOV and XOR Operations for normal rounds + +%macro ni_xor 4 + movzx %4,%2 + xor %1,dtab_%3(%4) +%endmacro + +%macro ni_mov 4 + movzx %4,%2 + mov %1,dtab_%3(%4) +%endmacro + +; Basic MOV and XOR Operations for last round + +%ifdef LAST_ROUND_TABLES + +%macro li_xor 4 + movzx %4,%2 + xor %1,dltab_%3(%4) +%endmacro + +%macro li_mov 4 + movzx %4,%2 + mov %1,dltab_%3(%4) +%endmacro + +%else + + %macro li_xor 4 + movzx %4,%2 + movzx %4,dtab_x(%4) + %if %3 != 0 + shl %4,8*%3 + %endif + xor %1,%4 + %endmacro + + %macro li_mov 4 + movzx %4,%2 + movzx %1,dtab_x(%4) + %if %3 != 0 + shl %1,8*%3 + %endif + %endmacro + +%endif + +%macro dec_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun ni_xor, ni_mov + + mov ebx,ebp + mov ecx,esi + mov edx,edi + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + +%macro dec_last_round 0 + +%ifdef AES_REV_DKS + add ebp,16 +%else + sub ebp,16 +%endif + save 0,ebp + mov esi,[ebp+8] + mov edi,[ebp+12] + + irn_fun li_xor, li_mov + + mov ebx,ebp + restore ebp,0 + xor eax,[ebp] + xor ebx,[ebp+4] + +%endmacro + + section .text + +; AES Decryption Subroutine + + do_name aes_decrypt + + sub esp,stk_spc + mov [esp+16],ebp + mov [esp+12],ebx + mov [esp+ 8],esi + mov [esp+ 4],edi + +; input four columns and xor in first round key + + mov esi,[esp+in_blk+stk_spc] ; input pointer + mov eax,[esi ] + mov ebx,[esi+ 4] + mov ecx,[esi+ 8] + mov edx,[esi+12] + lea esi,[esi+16] + + mov ebp,[esp+ctx+stk_spc] ; key pointer + movzx edi,byte[ebp+4*KS_LENGTH] +%ifndef AES_REV_DKS ; if decryption key schedule is not reversed + lea ebp,[ebp+edi] ; we have to access it from the top down +%endif + xor eax,[ebp ] ; key schedule + xor ebx,[ebp+ 4] + xor ecx,[ebp+ 8] + xor edx,[ebp+12] + +; determine the number of rounds + + cmp edi,10*16 + je .3 + cmp edi,12*16 + je .2 + cmp edi,14*16 + je .1 + mov eax,-1 + jmp .5 + +.1: dec_round + dec_round +.2: dec_round + dec_round +.3: dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_round + dec_last_round + +; move final values to the output array. + + mov ebp,[esp+out_blk+stk_spc] + mov [ebp],eax + mov [ebp+4],ebx + mov [ebp+8],esi + mov [ebp+12],edi + xor eax,eax + +.5: mov ebp,[esp+16] + mov ebx,[esp+12] + mov esi,[esp+ 8] + mov edi,[esp+ 4] + add esp,stk_spc + do_exit + +%endif diff --git a/Crypto/Aescrypt.c b/Crypto/Aescrypt.c new file mode 100644 index 0000000..4617598 --- /dev/null +++ b/Crypto/Aescrypt.c @@ -0,0 +1,311 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +#include "Aesopt.h" +#include "Aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) +#define so(y,x,c) word_out(y, c, s(x,c)) + +#if defined(ARRAYS) +#define locals(y,x) x[4],y[4] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 +#endif + +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined( _MSC_VER ) && !defined( _WIN64 ) +#pragma optimize( "s", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#if defined(FT4_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) +#elif defined(FT1_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) +#else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) +#endif + +#if defined(FL4_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) +#elif defined(FL1_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) +#else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) +#endif + +AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); + const uint_32t *kp; +#if defined( dec_fmvars ) + dec_fmvars; /* declare variables for fwd_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) + return EXIT_FAILURE; +#endif + + kp = cx->ks; + state_in(b0, in, kp); + +#if (ENC_UNROLL == FULL) + + switch(cx->inf.b[0]) + { + case 14 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10 * 16: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); + } + +#else + +#if (ENC_UNROLL == PARTIAL) + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + kp += N_COLS; + round(fwd_rnd, b0, b1, kp); + } + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); +#else + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif + kp += N_COLS; + round(fwd_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if ( FUNCS_IN_C & DECRYPTION_IN_C) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined( _MSC_VER ) && !defined( _WIN64 ) +#pragma optimize( "t", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) + +#if defined(IT4_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) +#elif defined(IT1_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) +#else +#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) +#endif + +#if defined(IL4_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) +#elif defined(IL1_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) +#else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) +#endif + +/* This code can work with the decryption key schedule in the */ +/* order that is used for encrytpion (where the 1st decryption */ +/* round key is at the high end ot the schedule) or with a key */ +/* schedule that has been reversed to put the 1st decryption */ +/* round key at the low end of the schedule in memory (when */ +/* AES_REV_DKS is defined) */ + +#ifdef AES_REV_DKS +#define key_ofs 0 +#define rnd_key(n) (kp + n * N_COLS) +#else +#define key_ofs 1 +#define rnd_key(n) (kp - n * N_COLS) +#endif + +AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) +{ uint_32t locals(b0, b1); +#if defined( dec_imvars ) + dec_imvars; /* declare variables for inv_mcol() if needed */ +#endif + const uint_32t *kp; + +#if defined( AES_ERR_CHK ) + if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) + return EXIT_FAILURE; +#endif + + kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); + state_in(b0, in, kp); + +#if (DEC_UNROLL == FULL) + + kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); + switch(cx->inf.b[0]) + { + case 14 * 16: + round(inv_rnd, b1, b0, rnd_key(-13)); + round(inv_rnd, b0, b1, rnd_key(-12)); + case 12 * 16: + round(inv_rnd, b1, b0, rnd_key(-11)); + round(inv_rnd, b0, b1, rnd_key(-10)); + case 10 * 16: + round(inv_rnd, b1, b0, rnd_key(-9)); + round(inv_rnd, b0, b1, rnd_key(-8)); + round(inv_rnd, b1, b0, rnd_key(-7)); + round(inv_rnd, b0, b1, rnd_key(-6)); + round(inv_rnd, b1, b0, rnd_key(-5)); + round(inv_rnd, b0, b1, rnd_key(-4)); + round(inv_rnd, b1, b0, rnd_key(-3)); + round(inv_rnd, b0, b1, rnd_key(-2)); + round(inv_rnd, b1, b0, rnd_key(-1)); + round(inv_lrnd, b0, b1, rnd_key( 0)); + } + +#else + +#if (DEC_UNROLL == PARTIAL) + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) + { + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); + kp = rnd_key(1); + round(inv_rnd, b0, b1, kp); + } + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); +#else + { uint_32t rnd; + for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) + { + kp = rnd_key(1); + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif + kp = rnd_key(1); + round(inv_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/Crypto/Aeskey.c b/Crypto/Aeskey.c new file mode 100644 index 0000000..96933e2 --- /dev/null +++ b/Crypto/Aeskey.c @@ -0,0 +1,573 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +#include "Aesopt.h" +#include "Aestab.h" + +#ifdef USE_VIA_ACE_IF_PRESENT +# include "aes_via_ace.h" +#endif + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Initialise the key schedule from the user supplied key. The key + length can be specified in bytes, with legal values of 16, 24 + and 32, or in bits, with legal values of 128, 192 and 256. These + values correspond with Nk values of 4, 6 and 8 respectively. + + The following macros implement a single cycle in the key + schedule generation process. The number of cycles needed + for each cx->n_col and nk value is: + + nk = 4 5 6 7 8 + ------------------------------ + cx->n_col = 4 10 9 8 7 7 + cx->n_col = 5 14 11 10 9 9 + cx->n_col = 6 19 15 12 11 11 + cx->n_col = 7 21 19 16 13 14 + cx->n_col = 8 29 23 19 17 14 +*/ + +#if (FUNCS_IN_C & ENC_KEYING_IN_C) + +#if defined(AES_128) || defined(AES_VAR) + +#define ke4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[4]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 9; ++i) + ke4(cx->ks, i); + } +#else + ke4(cx->ks, 0); ke4(cx->ks, 1); + ke4(cx->ks, 2); ke4(cx->ks, 3); + ke4(cx->ks, 4); ke4(cx->ks, 5); + ke4(cx->ks, 6); ke4(cx->ks, 7); + ke4(cx->ks, 8); +#endif + ke4(cx->ks, 9); + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +#define kef6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ kef6(k,i); \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} + +AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[6]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 7; ++i) + ke6(cx->ks, i); + } +#else + ke6(cx->ks, 0); ke6(cx->ks, 1); + ke6(cx->ks, 2); ke6(cx->ks, 3); + ke6(cx->ks, 4); ke6(cx->ks, 5); + ke6(cx->ks, 6); +#endif + kef6(cx->ks, 7); + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +#define kef8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ kef8(k,i); \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} + +AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ uint_32t ss[8]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + +#if ENC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 6; ++i) + ke8(cx->ks, i); + } +#else + ke8(cx->ks, 0); ke8(cx->ks, 1); + ke8(cx->ks, 2); ke8(cx->ks, 3); + ke8(cx->ks, 4); ke8(cx->ks, 5); +#endif + kef8(cx->ks, 6); + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_VAR) + +AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_encrypt_key128(key, cx); + case 24: case 192: return aes_encrypt_key192(key, cx); + case 32: case 256: return aes_encrypt_key256(key, cx); + default: return EXIT_FAILURE; +#else + case 16: case 128: aes_encrypt_key128(key, cx); return; + case 24: case 192: aes_encrypt_key192(key, cx); return; + case 32: case 256: aes_encrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if (FUNCS_IN_C & DEC_KEYING_IN_C) + +/* this is used to store the decryption round keys */ +/* in forward or reverse order */ + +#ifdef AES_REV_DKS +#define v(n,i) ((n) - (i) + 2 * ((i) & 3)) +#else +#define v(n,i) (i) +#endif + +#if DEC_ROUND == NO_TABLES +#define ff(x) (x) +#else +#define ff(x) inv_mcol(x) +#if defined( dec_imvars ) +#define d_vars dec_imvars +#endif +#endif + +#if defined(AES_128) || defined(AES_VAR) + +#define k4e(k,i) \ +{ k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \ + k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \ + k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \ +} + +#if 1 + +#define kdf4(k,i) \ +{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[v(40,(4*(i)))]; k[v(40,(4*(i))+4)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \ + ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \ +} + +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \ + ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ + k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \ + k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \ + k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \ + k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \ +} + +#define kdl4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \ + k[v(40,(4*(i))+6)] = ss[0]; \ + k[v(40,(4*(i))+7)] = ss[1]; \ +} + +#else + +#define kdf4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \ +} + +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \ +} + +#define kdl4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \ +} + +#endif + +AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[5]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(40,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(40,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(40,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(40,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + { uint_32t i; + for(i = 0; i < 10; ++i) + k4e(cx->ks, i); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 10 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + kdf4(cx->ks, 0); kd4(cx->ks, 1); + kd4(cx->ks, 2); kd4(cx->ks, 3); + kd4(cx->ks, 4); kd4(cx->ks, 5); + kd4(cx->ks, 6); kd4(cx->ks, 7); + kd4(cx->ks, 8); kdl4(cx->ks, 9); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +#define k6ef(k,i) \ +{ k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \ + k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \ + k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \ +} + +#define k6e(k,i) \ +{ k6ef(k,i); \ + k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \ + k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \ +} + +#define kdf6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \ +} + +#define kd6(k,i) \ +{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \ + ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \ +} + +#define kdl6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \ +} + +AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[7]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(48,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(48,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(48,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(48,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[v(48,(4))] = ss[4] = word_in(key, 4); + cx->ks[v(48,(5))] = ss[5] = word_in(key, 5); + { uint_32t i; + + for(i = 0; i < 7; ++i) + k6e(cx->ks, i); + k6ef(cx->ks, 7); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 12 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4)); + cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5)); + kdf6(cx->ks, 0); kd6(cx->ks, 1); + kd6(cx->ks, 2); kd6(cx->ks, 3); + kd6(cx->ks, 4); kd6(cx->ks, 5); + kd6(cx->ks, 6); kdl6(cx->ks, 7); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +#define k8ef(k,i) \ +{ k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \ + k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \ + k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \ +} + +#define k8e(k,i) \ +{ k8ef(k,i); \ + k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \ + k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \ + k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \ + k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \ +} + +#define kdf8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \ + ss[8] = ls_box(ss[3],0); \ + ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \ + ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \ + ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \ + ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \ +} + +#define kdl8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \ + ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \ + ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \ + ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \ +} + +AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ uint_32t ss[9]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[v(56,(0))] = ss[0] = word_in(key, 0); + cx->ks[v(56,(1))] = ss[1] = word_in(key, 1); + cx->ks[v(56,(2))] = ss[2] = word_in(key, 2); + cx->ks[v(56,(3))] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[v(56,(4))] = ss[4] = word_in(key, 4); + cx->ks[v(56,(5))] = ss[5] = word_in(key, 5); + cx->ks[v(56,(6))] = ss[6] = word_in(key, 6); + cx->ks[v(56,(7))] = ss[7] = word_in(key, 7); + { uint_32t i; + + for(i = 0; i < 6; ++i) + k8e(cx->ks, i); + k8ef(cx->ks, 6); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 14 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); + +#endif + } +#else + cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4)); + cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5)); + cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6)); + cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7)); + kdf8(cx->ks, 0); kd8(cx->ks, 1); + kd8(cx->ks, 2); kd8(cx->ks, 3); + kd8(cx->ks, 4); kd8(cx->ks, 5); + kdl8(cx->ks, 6); +#endif + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; + +#ifdef USE_VIA_ACE_IF_PRESENT + if(VIA_ACE_AVAILABLE) + cx->inf.b[1] = 0xff; +#endif + +#if defined( AES_ERR_CHK ) + return EXIT_SUCCESS; +#endif +} + +#endif + +#if defined(AES_VAR) + +AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_decrypt_key128(key, cx); + case 24: case 192: return aes_decrypt_key192(key, cx); + case 32: case 256: return aes_decrypt_key256(key, cx); + default: return EXIT_FAILURE; +#else + case 16: case 128: aes_decrypt_key128(key, cx); return; + case 24: case 192: aes_decrypt_key192(key, cx); return; + case 32: case 256: aes_decrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/Crypto/Aesopt.h b/Crypto/Aesopt.h new file mode 100644 index 0000000..cf7edbe --- /dev/null +++ b/Crypto/Aesopt.h @@ -0,0 +1,734 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the compilation options for AES (Rijndael) and code + that is common across encryption, key scheduling and table generation. + + OPERATION + + These source code files implement the AES algorithm Rijndael designed by + Joan Daemen and Vincent Rijmen. This version is designed for the standard + block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 + and 32 bytes). + + This version is designed for flexibility and speed using operations on + 32-bit words rather than operations on bytes. It can be compiled with + either big or little endian internal byte order but is faster when the + native byte order for the processor is used. + + THE CIPHER INTERFACE + + The cipher interface is implemented as an array of bytes in which lower + AES bit sequence indexes map to higher numeric significance within bytes. + + uint_8t (an unsigned 8-bit type) + uint_32t (an unsigned 32-bit type) + struct aes_encrypt_ctx (structure for the cipher encryption context) + struct aes_decrypt_ctx (structure for the cipher decryption context) + AES_RETURN the function return type + + C subroutine calls: + + AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); + AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, + const aes_encrypt_ctx cx[1]); + + AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); + AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, + const aes_decrypt_ctx cx[1]); + + IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that + you call aes_init() before AES is used so that the tables are initialised. + + C++ aes class subroutines: + + Class AESencrypt for encryption + + Construtors: + AESencrypt(void) + AESencrypt(const unsigned char *key) - 128 bit key + Members: + AES_RETURN key128(const unsigned char *key) + AES_RETURN key192(const unsigned char *key) + AES_RETURN key256(const unsigned char *key) + AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const + + Class AESdecrypt for encryption + Construtors: + AESdecrypt(void) + AESdecrypt(const unsigned char *key) - 128 bit key + Members: + AES_RETURN key128(const unsigned char *key) + AES_RETURN key192(const unsigned char *key) + AES_RETURN key256(const unsigned char *key) + AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const +*/ + +/* Adapted for TrueCrypt */ + +#if !defined( _AESOPT_H ) +#define _AESOPT_H + +#ifdef TC_WINDOWS_BOOT +#define ASM_X86_V2 +#endif + +#if defined( __cplusplus ) +#include "Aescpp.h" +#else +#include "Aes.h" +#endif + + +#include "Common/Endian.h" +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ + +#if BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#endif + + +/* CONFIGURATION - THE USE OF DEFINES + + Later in this section there are a number of defines that control the + operation of the code. In each section, the purpose of each define is + explained so that the relevant form can be included or excluded by + setting either 1's or 0's respectively on the branches of the related + #if clauses. The following local defines should not be changed. +*/ + +#define ENCRYPTION_IN_C 1 +#define DECRYPTION_IN_C 2 +#define ENC_KEYING_IN_C 4 +#define DEC_KEYING_IN_C 8 + +#define NO_TABLES 0 +#define ONE_TABLE 1 +#define FOUR_TABLES 4 +#define NONE 0 +#define PARTIAL 1 +#define FULL 2 + +/* --- START OF USER CONFIGURED OPTIONS --- */ + +/* 1. BYTE ORDER WITHIN 32 BIT WORDS + + The fundamental data processing units in Rijndael are 8-bit bytes. The + input, output and key input are all enumerated arrays of bytes in which + bytes are numbered starting at zero and increasing to one less than the + number of bytes in the array in question. This enumeration is only used + for naming bytes and does not imply any adjacency or order relationship + from one byte to another. When these inputs and outputs are considered + as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to + byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. + In this implementation bits are numbered from 0 to 7 starting at the + numerically least significant end of each byte (bit n represents 2^n). + + However, Rijndael can be implemented more efficiently using 32-bit + words by packing bytes into words so that bytes 4*n to 4*n+3 are placed + into word[n]. While in principle these bytes can be assembled into words + in any positions, this implementation only supports the two formats in + which bytes in adjacent positions within words also have adjacent byte + numbers. This order is called big-endian if the lowest numbered bytes + in words have the highest numeric significance and little-endian if the + opposite applies. + + This code can work in either order irrespective of the order used by the + machine on which it runs. Normally the internal byte order will be set + to the order of the processor on which the code is to be run but this + define can be used to reverse this in special situations + + WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. + This define will hence be redefined later (in section 4) if necessary +*/ + +#if 1 +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#elif 0 +#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 +#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN +#else +#error The algorithm byte order is not defined +#endif + +/* 2. VIA ACE SUPPORT + + Define this option if support for the VIA ACE is required. This uses + inline assembler instructions and is only implemented for the Microsoft, + Intel and GCC compilers. If VIA ACE is known to be present, then defining + ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption + code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if + it is detected (both present and enabled) but the normal AES code will + also be present. + + When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte + aligned; other input/output buffers do not need to be 16 byte aligned + but there are very large performance gains if this can be arranged. + VIA ACE also requires the decryption key schedule to be in reverse + order (which later checks below ensure). +*/ + +#if 0 && !defined( USE_VIA_ACE_IF_PRESENT ) +# define USE_VIA_ACE_IF_PRESENT +#endif + +#if 0 && !defined( ASSUME_VIA_ACE_PRESENT ) +# define ASSUME_VIA_ACE_PRESENT +# endif + +#if defined ( _WIN64 ) || defined( _WIN32_WCE ) || \ + defined( _MSC_VER ) && ( _MSC_VER <= 800 ) +# if defined( USE_VIA_ACE_IF_PRESENT ) +# undef USE_VIA_ACE_IF_PRESENT +# endif +# if defined( ASSUME_VIA_ACE_PRESENT ) +# undef ASSUME_VIA_ACE_PRESENT +# endif +#endif + +/* 3. ASSEMBLER SUPPORT + + This define (which can be on the command line) enables the use of the + assembler code routines for encryption, decryption and key scheduling + as follows: + + ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for + encryption and decryption and but with key scheduling in C + ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for + encryption, decryption and key scheduling + ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for + encryption and decryption and but with key scheduling in C + ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for + encryption and decryption and but with key scheduling in C + + Change one 'if 0' below to 'if 1' to select the version or define + as a compilation option. +*/ + +#if 0 && !defined( ASM_X86_V1C ) +# define ASM_X86_V1C +#elif 0 && !defined( ASM_X86_V2 ) +# define ASM_X86_V2 +#elif 0 && !defined( ASM_X86_V2C ) +# define ASM_X86_V2C +#elif 0 && !defined( ASM_AMD64_C ) +# define ASM_AMD64_C +#endif + +#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ + && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 ) +//# error Assembler code is only available for x86 and AMD64 systems +#endif + +/* 4. FAST INPUT/OUTPUT OPERATIONS. + + On some machines it is possible to improve speed by transferring the + bytes in the input and output arrays to and from the internal 32-bit + variables by addressing these arrays as if they are arrays of 32-bit + words. On some machines this will always be possible but there may + be a large performance penalty if the byte arrays are not aligned on + the normal word boundaries. On other machines this technique will + lead to memory access errors when such 32-bit word accesses are not + properly aligned. The option SAFE_IO avoids such problems but will + often be slower on those machines that support misaligned access + (especially so if care is taken to align the input and output byte + arrays on 32-bit word boundaries). If SAFE_IO is not defined it is + assumed that access to byte arrays as if they are arrays of 32-bit + words will not cause problems when such accesses are misaligned. +*/ +#if 1 && !defined( _MSC_VER ) +#define SAFE_IO +#endif + +/* 5. LOOP UNROLLING + + The code for encryption and decrytpion cycles through a number of rounds + that can be implemented either in a loop or by expanding the code into a + long sequence of instructions, the latter producing a larger program but + one that will often be much faster. The latter is called loop unrolling. + There are also potential speed advantages in expanding two iterations in + a loop with half the number of iterations, which is called partial loop + unrolling. The following options allow partial or full loop unrolling + to be set independently for encryption and decryption +*/ +#if 1 +#define ENC_UNROLL FULL +#elif 0 +#define ENC_UNROLL PARTIAL +#else +#define ENC_UNROLL NONE +#endif + +#if 1 +#define DEC_UNROLL FULL +#elif 0 +#define DEC_UNROLL PARTIAL +#else +#define DEC_UNROLL NONE +#endif + +/* 6. FAST FINITE FIELD OPERATIONS + + If this section is included, tables are used to provide faster finite + field arithmetic (this has no effect if FIXED_TABLES is defined). +*/ +#if !defined (TC_WINDOWS_BOOT) +#define FF_TABLES +#endif + +/* 7. INTERNAL STATE VARIABLE FORMAT + + The internal state of Rijndael is stored in a number of local 32-bit + word varaibles which can be defined either as an array or as individual + names variables. Include this section if you want to store these local + varaibles in arrays. Otherwise individual local variables will be used. +*/ +#if 1 +#define ARRAYS +#endif + +/* 8. FIXED OR DYNAMIC TABLES + + When this section is included the tables used by the code are compiled + statically into the binary file. Otherwise the subroutine aes_init() + must be called to compute them before the code is first used. +*/ +#if !defined (TC_WINDOWS_BOOT) && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) +#define FIXED_TABLES +#endif + +/* 9. TABLE ALIGNMENT + + On some sytsems speed will be improved by aligning the AES large lookup + tables on particular boundaries. This define should be set to a power of + two giving the desired alignment. It can be left undefined if alignment + is not needed. This option is specific to the Microsft VC++ compiler - + it seems to sometimes cause trouble for the VC++ version 6 compiler. +*/ + +#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) +#define TABLE_ALIGN 32 +#endif + +/* 10. TABLE OPTIONS + + This cipher proceeds by repeating in a number of cycles known as 'rounds' + which are implemented by a round function which can optionally be speeded + up using tables. The basic tables are each 256 32-bit words, with either + one or four tables being required for each round function depending on + how much speed is required. The encryption and decryption round functions + are different and the last encryption and decrytpion round functions are + different again making four different round functions in all. + + This means that: + 1. Normal encryption and decryption rounds can each use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + 2. The last encryption and decryption rounds can also use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + + Include or exclude the appropriate definitions below to set the number + of tables used by this implementation. +*/ + +#if 1 /* set tables for the normal encryption round */ +#define ENC_ROUND FOUR_TABLES +#elif 0 +#define ENC_ROUND ONE_TABLE +#else +#define ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last encryption round */ +#define LAST_ENC_ROUND FOUR_TABLES +#elif 0 +#define LAST_ENC_ROUND ONE_TABLE +#else +#define LAST_ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the normal decryption round */ +#define DEC_ROUND FOUR_TABLES +#elif 0 +#define DEC_ROUND ONE_TABLE +#else +#define DEC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last decryption round */ +#define LAST_DEC_ROUND FOUR_TABLES +#elif 0 +#define LAST_DEC_ROUND ONE_TABLE +#else +#define LAST_DEC_ROUND NO_TABLES +#endif + +/* The decryption key schedule can be speeded up with tables in the same + way that the round functions can. Include or exclude the following + defines to set this requirement. +*/ +#if 1 +#define KEY_SCHED FOUR_TABLES +#elif 0 +#define KEY_SCHED ONE_TABLE +#else +#define KEY_SCHED NO_TABLES +#endif + +/* ---- END OF USER CONFIGURED OPTIONS ---- */ + +/* VIA ACE support is only available for VC++ and GCC */ + +#if !defined( _MSC_VER ) && !defined( __GNUC__ ) +# if defined( ASSUME_VIA_ACE_PRESENT ) +# undef ASSUME_VIA_ACE_PRESENT +# endif +# if defined( USE_VIA_ACE_IF_PRESENT ) +# undef USE_VIA_ACE_IF_PRESENT +# endif +#endif + +#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) +#define USE_VIA_ACE_IF_PRESENT +#endif + +#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) +#define AES_REV_DKS +#endif + +/* Assembler support requires the use of platform byte order */ + +#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ + && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) +#undef ALGORITHM_BYTE_ORDER +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#endif + +/* In this implementation the columns of the state array are each held in + 32-bit words. The state array can be held in various ways: in an array + of words, in a number of individual word variables or in a number of + processor registers. The following define maps a variable name x and + a column number c to the way the state array variable is to be held. + The first define below maps the state into an array x[c] whereas the + second form maps the state into a number of individual variables x0, + x1, etc. Another form could map individual state colums to machine + register names. +*/ + +#if defined( ARRAYS ) +#define s(x,c) x[c] +#else +#define s(x,c) x##c +#endif + +/* This implementation provides subroutines for encryption, decryption + and for setting the three key lengths (separately) for encryption + and decryption. Since not all functions are needed, masks are set + up here to determine which will be implemented in C +*/ + +#if !defined( AES_ENCRYPT ) +# define EFUNCS_IN_C 0 +#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ + || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) +# define EFUNCS_IN_C ENC_KEYING_IN_C +#elif !defined( ASM_X86_V2 ) +# define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) +#else +# define EFUNCS_IN_C 0 +#endif + +#if !defined( AES_DECRYPT ) +# define DFUNCS_IN_C 0 +#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ + || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) +# define DFUNCS_IN_C DEC_KEYING_IN_C +#elif !defined( ASM_X86_V2 ) +# define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) +#else +# define DFUNCS_IN_C 0 +#endif + +#define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) + +/* END OF CONFIGURATION OPTIONS */ + +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +/* Disable or report errors on some combinations of options */ + +#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND NO_TABLES +#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND ONE_TABLE +#endif + +#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE +#undef ENC_UNROLL +#define ENC_UNROLL NONE +#endif + +#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND NO_TABLES +#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND ONE_TABLE +#endif + +#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE +#undef DEC_UNROLL +#define DEC_UNROLL NONE +#endif + +#if defined( bswap32 ) +#define aes_sw32 bswap32 +#elif defined( bswap_32 ) +#define aes_sw32 bswap_32 +#else +#define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) +#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) +#endif + +/* upr(x,n): rotates bytes within words by n positions, moving bytes to + higher index positions with wrap around into low positions + ups(x,n): moves bytes by n positions to higher index positions in + words but without wrap around + bval(x,n): extracts a byte from a word + + WARNING: The definitions given here are intended only for use with + unsigned variables and with shift counts that are compile + time constants +*/ + +#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) +#define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) << (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0)) +#endif + +#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) +#define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n)))) +#define ups(x,n) ((uint_32t) (x) >> (8 * (n))) +#define bval(x,n) ((uint_8t)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3)) +#endif + +#if defined( SAFE_IO ) + +#define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \ + ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3]) +#define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ + ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); } + +#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) + +#define word_in(x,c) (*((uint_32t*)(x)+(c))) +#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v)) + +#else + +#define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c))) +#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v)) + +#endif + +/* the finite field modular polynomial and elements */ + +#define WPOLY 0x011b +#define BPOLY 0x1b + +/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ + +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) + +/* The following defines provide alternative definitions of gf_mulx that might + give improved performance if a fast 32-bit multiply is not available. Note + that a temporary variable u needs to be defined where gf_mulx is used. + +#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) +#define m4 (0x01010101 * BPOLY) +#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) +*/ + +/* Work out which tables are needed for the different options */ + +#if defined( ASM_X86_V1C ) +#if defined( ENC_ROUND ) +#undef ENC_ROUND +#endif +#define ENC_ROUND FOUR_TABLES +#if defined( LAST_ENC_ROUND ) +#undef LAST_ENC_ROUND +#endif +#define LAST_ENC_ROUND FOUR_TABLES +#if defined( DEC_ROUND ) +#undef DEC_ROUND +#endif +#define DEC_ROUND FOUR_TABLES +#if defined( LAST_DEC_ROUND ) +#undef LAST_DEC_ROUND +#endif +#define LAST_DEC_ROUND FOUR_TABLES +#if defined( KEY_SCHED ) +#undef KEY_SCHED +#define KEY_SCHED FOUR_TABLES +#endif +#endif + +#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) +#if ENC_ROUND == ONE_TABLE +#define FT1_SET +#elif ENC_ROUND == FOUR_TABLES +#define FT4_SET +#else +#define SBX_SET +#endif +#if LAST_ENC_ROUND == ONE_TABLE +#define FL1_SET +#elif LAST_ENC_ROUND == FOUR_TABLES +#define FL4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) +#if DEC_ROUND == ONE_TABLE +#define IT1_SET +#elif DEC_ROUND == FOUR_TABLES +#define IT4_SET +#else +#define ISB_SET +#endif +#if LAST_DEC_ROUND == ONE_TABLE +#define IL1_SET +#elif LAST_DEC_ROUND == FOUR_TABLES +#define IL4_SET +#elif !defined(ISB_SET) +#define ISB_SET +#endif +#endif + +#if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C) +#if KEY_SCHED == ONE_TABLE +#define LS1_SET +#elif KEY_SCHED == FOUR_TABLES +#define LS4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +#if (FUNCS_IN_C & DEC_KEYING_IN_C) +#if KEY_SCHED == ONE_TABLE +#define IM1_SET +#elif KEY_SCHED == FOUR_TABLES +#define IM4_SET +#elif !defined( SBX_SET ) +#define SBX_SET +#endif +#endif + +/* generic definitions of Rijndael macros that use tables */ + +#define no_table(x,box,vf,rf,c) bytes2word( \ + box[bval(vf(x,0,c),rf(0,c))], \ + box[bval(vf(x,1,c),rf(1,c))], \ + box[bval(vf(x,2,c),rf(2,c))], \ + box[bval(vf(x,3,c),rf(3,c))]) + +#define one_table(x,op,tab,vf,rf,c) \ + ( tab[bval(vf(x,0,c),rf(0,c))] \ + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((8+r-c)&3) + +/* perform forward and inverse column mix operation on four bytes in long word x in */ +/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ + +#if defined( FM4_SET ) /* not currently used */ +#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) +#elif defined( FM1_SET ) /* not currently used */ +#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) +#else +#define dec_fmvars uint_32t g2 +#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) +#endif + +#if defined( IM4_SET ) +#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) +#elif defined( IM1_SET ) +#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) +#else +#define dec_imvars uint_32t g2, g4, g9 +#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ + (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) +#endif + +#if defined( FL4_SET ) +#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) +#elif defined( LS4_SET ) +#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) +#elif defined( FL1_SET ) +#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) +#elif defined( LS1_SET ) +#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) +#else +#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) +#endif + +#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) +#define ISB_SET +#endif + +#endif diff --git a/Crypto/Aestab.c b/Crypto/Aestab.c new file mode 100644 index 0000000..1effb6f --- /dev/null +++ b/Crypto/Aestab.c @@ -0,0 +1,428 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 +*/ + +/* Adapted for TrueCrypt: + - Added run-time table generator for Aes_x86_v2.asm +*/ + +#define DO_TABLES + +#include "Aes.h" +#include "Aesopt.h" + +#if defined(FIXED_TABLES) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define isb_data(w) {\ + w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ + w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ + w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ + w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ + w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ + w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ + w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ + w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ + w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ + w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ + w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ + w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ + w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ + w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ + w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ + w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ + w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ + w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ + w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ + w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ + w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ + w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ + w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ + w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ + w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ + w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ + w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ + w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ + w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ + w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ + w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ + w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } + +#define mm_data(w) {\ + w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ + w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ + w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ + w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ + w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ + w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ + w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ + w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ + w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ + w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ + w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ + w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ + w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ + w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ + w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ + w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ + w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ + w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ + w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ + w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ + w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ + w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ + w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ + w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ + w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ + w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ + w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ + w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ + w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ + w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ + w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ + w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define h0(x) (x) + +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#endif + +#if defined(FIXED_TABLES) || !defined(FF_TABLES) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#else + +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[ 255 - log[x]] : 0) + +#endif + +#include "Aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(FIXED_TABLES) + +/* implemented in case of wrong call for fixed tables */ + +AES_RETURN aes_init(void) +{ + return EXIT_SUCCESS; +} + +#else /* dynamic table generation */ + +#if !defined(FF_TABLES) + +/* Generate the tables for the dynamic table option + + It will generally be sensible to use tables to compute finite + field multiplies and inverses but where memory is scarse this + code might sometimes be better. But it only has effect during + initialisation so its pretty unimportant in overall terms. +*/ + +/* return 2 ^ (n - 1) where n is the bit number of the highest bit + set in x with x in the range 1 < x < 0x00000200. This form is + used so that locals within fi can be bytes rather than words +*/ + +static uint_8t hibit(const uint_32t x) +{ uint_8t r = (uint_8t)((x >> 1) | (x >> 2)); + + r |= (r >> 2); + r |= (r >> 4); + return (r + 1) >> 1; +} + +/* return the inverse of the finite field element x */ + +static uint_8t fi(const uint_8t x) +{ uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; + + if(x < 2) return x; + + for(;;) + { + if(!n1) return v1; + + while(n2 >= n1) + { + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); + } + + if(!n2) return v2; + + while(n1 >= n2) + { + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); + } + } +} + +#endif + +/* The forward and inverse affine transformations used in the S-box */ + +#define fwd_affine(x) \ + (w = (uint_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(uint_8t)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (uint_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(uint_8t)(w^(w>>8))) + +static int init = 0; + +#ifdef TC_WINDOWS_BOOT + +#pragma optimize ("l", on) +uint_8t aes_enc_tab[256][8]; +uint_8t aes_dec_tab[256][8]; + +#endif + +AES_RETURN aes_init(void) +{ uint_32t i, w; + +#ifdef TC_WINDOWS_BOOT + + if (init) + return EXIT_SUCCESS; + + for (i = 0; i < 256; ++i) + { + uint_8t x = fwd_affine(fi((uint_8t)i)); + aes_enc_tab[i][0] = 0; + aes_enc_tab[i][1] = x; + aes_enc_tab[i][2] = x; + aes_enc_tab[i][3] = f3(x); + aes_enc_tab[i][4] = f2(x); + aes_enc_tab[i][5] = x; + aes_enc_tab[i][6] = x; + aes_enc_tab[i][7] = f3(x); + + x = fi((uint_8t)inv_affine((uint_8t)i)); + aes_dec_tab[i][0] = fe(x); + aes_dec_tab[i][1] = f9(x); + aes_dec_tab[i][2] = fd(x); + aes_dec_tab[i][3] = fb(x); + aes_dec_tab[i][4] = fe(x); + aes_dec_tab[i][5] = f9(x); + aes_dec_tab[i][6] = fd(x); + aes_dec_tab[i][7] = x; + } + +#else // TC_WINDOWS_BOOT + +#if defined(FF_TABLES) + + uint_8t pow[512], log[256]; + + if(init) + return EXIT_SUCCESS; + /* log and power tables for GF(2^8) finite field with + WPOLY as modular polynomial - the simplest primitive + root is 0x03, used here to generate the tables + */ + + i = 0; w = 1; + do + { + pow[i] = (uint_8t)w; + pow[i + 255] = (uint_8t)w; + log[w] = (uint_8t)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } + while (w != 1); + +#else + if(init) + return EXIT_SUCCESS; +#endif + + for(i = 0, w = 1; i < RC_LENGTH; ++i) + { + t_set(r,c)[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) + { uint_8t b; + + b = fwd_affine(fi((uint_8t)i)); + w = bytes2word(f2(b), b, b, f3(b)); + +#if defined( SBX_SET ) + t_set(s,box)[i] = b; +#endif + +#if defined( FT1_SET ) /* tables for a normal encryption round */ + t_set(f,n)[i] = w; +#endif +#if defined( FT4_SET ) + t_set(f,n)[0][i] = w; + t_set(f,n)[1][i] = upr(w,1); + t_set(f,n)[2][i] = upr(w,2); + t_set(f,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); + +#if defined( FL1_SET ) /* tables for last encryption round (may also */ + t_set(f,l)[i] = w; /* be used in the key schedule) */ +#endif +#if defined( FL4_SET ) + t_set(f,l)[0][i] = w; + t_set(f,l)[1][i] = upr(w,1); + t_set(f,l)[2][i] = upr(w,2); + t_set(f,l)[3][i] = upr(w,3); +#endif + +#if defined( LS1_SET ) /* table for key schedule if t_set(f,l) above is*/ + t_set(l,s)[i] = w; /* not of the required form */ +#endif +#if defined( LS4_SET ) + t_set(l,s)[0][i] = w; + t_set(l,s)[1][i] = upr(w,1); + t_set(l,s)[2][i] = upr(w,2); + t_set(l,s)[3][i] = upr(w,3); +#endif + + b = fi(inv_affine((uint_8t)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + +#if defined( IM1_SET ) /* tables for the inverse mix column operation */ + t_set(i,m)[b] = w; +#endif +#if defined( IM4_SET ) + t_set(i,m)[0][b] = w; + t_set(i,m)[1][b] = upr(w,1); + t_set(i,m)[2][b] = upr(w,2); + t_set(i,m)[3][b] = upr(w,3); +#endif + +#if defined( ISB_SET ) + t_set(i,box)[i] = b; +#endif +#if defined( IT1_SET ) /* tables for a normal decryption round */ + t_set(i,n)[i] = w; +#endif +#if defined( IT4_SET ) + t_set(i,n)[0][i] = w; + t_set(i,n)[1][i] = upr(w,1); + t_set(i,n)[2][i] = upr(w,2); + t_set(i,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); +#if defined( IL1_SET ) /* tables for last decryption round */ + t_set(i,l)[i] = w; +#endif +#if defined( IL4_SET ) + t_set(i,l)[0][i] = w; + t_set(i,l)[1][i] = upr(w,1); + t_set(i,l)[2][i] = upr(w,2); + t_set(i,l)[3][i] = upr(w,3); +#endif + } + +#endif // TC_WINDOWS_BOOT + + init = 1; + return EXIT_SUCCESS; +} + +#endif + +#if defined(__cplusplus) +} +#endif + diff --git a/Crypto/Aestab.h b/Crypto/Aestab.h new file mode 100644 index 0000000..e52e005 --- /dev/null +++ b/Crypto/Aestab.h @@ -0,0 +1,174 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + + This file contains the code for declaring the tables needed to implement + AES. The file aesopt.h is assumed to be included before this header file. + If there are no global variables, the definitions here can be used to put + the AES tables in a structure so that a pointer can then be added to the + AES context to pass them to the AES routines that need them. If this + facility is used, the calling program has to ensure that this pointer is + managed appropriately. In particular, the value of the t_dec(in,it) item + in the table structure must be set to zero in order to ensure that the + tables are initialised. In practice the three code sequences in aeskey.c + that control the calls to aes_init() and the aes_init() routine itself will + have to be changed for a specific implementation. If global variables are + available it will generally be preferable to use them with the precomputed + FIXED_TABLES option that uses static global tables. + + The following defines can be used to control the way the tables + are defined, initialised and used in embedded environments that + require special features for these purposes + + the 't_dec' construction is used to declare fixed table arrays + the 't_set' construction is used to set fixed table values + the 't_use' construction is used to access fixed table values + + 256 byte tables: + + t_xxx(s,box) => forward S box + t_xxx(i,box) => inverse S box + + 256 32-bit word OR 4 x 256 32-bit word tables: + + t_xxx(f,n) => forward normal round + t_xxx(f,l) => forward last round + t_xxx(i,n) => inverse normal round + t_xxx(i,l) => inverse last round + t_xxx(l,s) => key schedule table + t_xxx(i,m) => key schedule table + + Other variables and tables: + + t_xxx(r,c) => the rcon table +*/ + +#if !defined( _AESTAB_H ) +#define _AESTAB_H + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#if defined(FIXED_TABLES) +# if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ )) +/* make tables far data to avoid using too much DGROUP space (PG) */ +# define CONST const far +# else +# define CONST const +# endif +#else +# define CONST +#endif + +#if defined(__cplusplus) +# define EXTERN extern "C" +#elif defined(DO_TABLES) +# define EXTERN +#else +# define EXTERN extern +#endif + +#if defined(_MSC_VER) && defined(TABLE_ALIGN) +#define ALIGN __declspec(align(TABLE_ALIGN)) +#else +#define ALIGN +#endif + +#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 ) +# define XP_DIR __cdecl +#else +# define XP_DIR +#endif + +#if defined(DO_TABLES) && defined(FIXED_TABLES) +#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] = b(e) +#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) } +EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); +#else +#define d_1(t,n,b,e) EXTERN ALIGN CONST XP_DIR t n[256] +#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] +EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH]; +#endif + +#if defined( SBX_SET ) + d_1(uint_8t, t_dec(s,box), sb_data, h0); +#endif +#if defined( ISB_SET ) + d_1(uint_8t, t_dec(i,box), isb_data, h0); +#endif + +#if defined( FT1_SET ) + d_1(uint_32t, t_dec(f,n), sb_data, u0); +#endif +#if defined( FT4_SET ) + d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); +#endif + +#if defined( FL1_SET ) + d_1(uint_32t, t_dec(f,l), sb_data, w0); +#endif +#if defined( FL4_SET ) + d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); +#endif + +#if defined( IT1_SET ) + d_1(uint_32t, t_dec(i,n), isb_data, v0); +#endif +#if defined( IT4_SET ) + d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); +#endif + +#if defined( IL1_SET ) + d_1(uint_32t, t_dec(i,l), isb_data, w0); +#endif +#if defined( IL4_SET ) + d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); +#endif + +#if defined( LS1_SET ) +#if defined( FL1_SET ) +#undef LS1_SET +#else + d_1(uint_32t, t_dec(l,s), sb_data, w0); +#endif +#endif + +#if defined( LS4_SET ) +#if defined( FL4_SET ) +#undef LS4_SET +#else + d_4(uint_32t, t_dec(l,s), sb_data, w0, w1, w2, w3); +#endif +#endif + +#if defined( IM1_SET ) + d_1(uint_32t, t_dec(i,m), mm_data, v0); +#endif +#if defined( IM4_SET ) + d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); +#endif + +#endif diff --git a/Crypto/Blowfish.c b/Crypto/Blowfish.c new file mode 100644 index 0000000..34977aa --- /dev/null +++ b/Crypto/Blowfish.c @@ -0,0 +1,382 @@ +/* Deprecated/legacy */ + + +// blowfish.cpp - written and placed in the public domain by Wei Dai + +/* Adapted for TrueCrypt */ + +#include +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Blowfish.h" + +#define word32 unsigned __int32 +#define byte unsigned __int8 +#define GETBYTE(x, y) (unsigned int)(byte)((x)>>(8*(y))) +#define ROUNDS 16 + +static const unsigned __int32 p_init[16+2] = +{ + 608135816U, 2242054355U, 320440878U, 57701188U, + 2752067618U, 698298832U, 137296536U, 3964562569U, + 1160258022U, 953160567U, 3193202383U, 887688300U, + 3232508343U, 3380367581U, 1065670069U, 3041331479U, + 2450970073U, 2306472731U +} ; + +static const unsigned __int32 s_init[4*256] = { + 3509652390U, 2564797868U, 805139163U, 3491422135U, + 3101798381U, 1780907670U, 3128725573U, 4046225305U, + 614570311U, 3012652279U, 134345442U, 2240740374U, + 1667834072U, 1901547113U, 2757295779U, 4103290238U, + 227898511U, 1921955416U, 1904987480U, 2182433518U, + 2069144605U, 3260701109U, 2620446009U, 720527379U, + 3318853667U, 677414384U, 3393288472U, 3101374703U, + 2390351024U, 1614419982U, 1822297739U, 2954791486U, + 3608508353U, 3174124327U, 2024746970U, 1432378464U, + 3864339955U, 2857741204U, 1464375394U, 1676153920U, + 1439316330U, 715854006U, 3033291828U, 289532110U, + 2706671279U, 2087905683U, 3018724369U, 1668267050U, + 732546397U, 1947742710U, 3462151702U, 2609353502U, + 2950085171U, 1814351708U, 2050118529U, 680887927U, + 999245976U, 1800124847U, 3300911131U, 1713906067U, + 1641548236U, 4213287313U, 1216130144U, 1575780402U, + 4018429277U, 3917837745U, 3693486850U, 3949271944U, + 596196993U, 3549867205U, 258830323U, 2213823033U, + 772490370U, 2760122372U, 1774776394U, 2652871518U, + 566650946U, 4142492826U, 1728879713U, 2882767088U, + 1783734482U, 3629395816U, 2517608232U, 2874225571U, + 1861159788U, 326777828U, 3124490320U, 2130389656U, + 2716951837U, 967770486U, 1724537150U, 2185432712U, + 2364442137U, 1164943284U, 2105845187U, 998989502U, + 3765401048U, 2244026483U, 1075463327U, 1455516326U, + 1322494562U, 910128902U, 469688178U, 1117454909U, + 936433444U, 3490320968U, 3675253459U, 1240580251U, + 122909385U, 2157517691U, 634681816U, 4142456567U, + 3825094682U, 3061402683U, 2540495037U, 79693498U, + 3249098678U, 1084186820U, 1583128258U, 426386531U, + 1761308591U, 1047286709U, 322548459U, 995290223U, + 1845252383U, 2603652396U, 3431023940U, 2942221577U, + 3202600964U, 3727903485U, 1712269319U, 422464435U, + 3234572375U, 1170764815U, 3523960633U, 3117677531U, + 1434042557U, 442511882U, 3600875718U, 1076654713U, + 1738483198U, 4213154764U, 2393238008U, 3677496056U, + 1014306527U, 4251020053U, 793779912U, 2902807211U, + 842905082U, 4246964064U, 1395751752U, 1040244610U, + 2656851899U, 3396308128U, 445077038U, 3742853595U, + 3577915638U, 679411651U, 2892444358U, 2354009459U, + 1767581616U, 3150600392U, 3791627101U, 3102740896U, + 284835224U, 4246832056U, 1258075500U, 768725851U, + 2589189241U, 3069724005U, 3532540348U, 1274779536U, + 3789419226U, 2764799539U, 1660621633U, 3471099624U, + 4011903706U, 913787905U, 3497959166U, 737222580U, + 2514213453U, 2928710040U, 3937242737U, 1804850592U, + 3499020752U, 2949064160U, 2386320175U, 2390070455U, + 2415321851U, 4061277028U, 2290661394U, 2416832540U, + 1336762016U, 1754252060U, 3520065937U, 3014181293U, + 791618072U, 3188594551U, 3933548030U, 2332172193U, + 3852520463U, 3043980520U, 413987798U, 3465142937U, + 3030929376U, 4245938359U, 2093235073U, 3534596313U, + 375366246U, 2157278981U, 2479649556U, 555357303U, + 3870105701U, 2008414854U, 3344188149U, 4221384143U, + 3956125452U, 2067696032U, 3594591187U, 2921233993U, + 2428461U, 544322398U, 577241275U, 1471733935U, + 610547355U, 4027169054U, 1432588573U, 1507829418U, + 2025931657U, 3646575487U, 545086370U, 48609733U, + 2200306550U, 1653985193U, 298326376U, 1316178497U, + 3007786442U, 2064951626U, 458293330U, 2589141269U, + 3591329599U, 3164325604U, 727753846U, 2179363840U, + 146436021U, 1461446943U, 4069977195U, 705550613U, + 3059967265U, 3887724982U, 4281599278U, 3313849956U, + 1404054877U, 2845806497U, 146425753U, 1854211946U, + + 1266315497U, 3048417604U, 3681880366U, 3289982499U, + 2909710000U, 1235738493U, 2632868024U, 2414719590U, + 3970600049U, 1771706367U, 1449415276U, 3266420449U, + 422970021U, 1963543593U, 2690192192U, 3826793022U, + 1062508698U, 1531092325U, 1804592342U, 2583117782U, + 2714934279U, 4024971509U, 1294809318U, 4028980673U, + 1289560198U, 2221992742U, 1669523910U, 35572830U, + 157838143U, 1052438473U, 1016535060U, 1802137761U, + 1753167236U, 1386275462U, 3080475397U, 2857371447U, + 1040679964U, 2145300060U, 2390574316U, 1461121720U, + 2956646967U, 4031777805U, 4028374788U, 33600511U, + 2920084762U, 1018524850U, 629373528U, 3691585981U, + 3515945977U, 2091462646U, 2486323059U, 586499841U, + 988145025U, 935516892U, 3367335476U, 2599673255U, + 2839830854U, 265290510U, 3972581182U, 2759138881U, + 3795373465U, 1005194799U, 847297441U, 406762289U, + 1314163512U, 1332590856U, 1866599683U, 4127851711U, + 750260880U, 613907577U, 1450815602U, 3165620655U, + 3734664991U, 3650291728U, 3012275730U, 3704569646U, + 1427272223U, 778793252U, 1343938022U, 2676280711U, + 2052605720U, 1946737175U, 3164576444U, 3914038668U, + 3967478842U, 3682934266U, 1661551462U, 3294938066U, + 4011595847U, 840292616U, 3712170807U, 616741398U, + 312560963U, 711312465U, 1351876610U, 322626781U, + 1910503582U, 271666773U, 2175563734U, 1594956187U, + 70604529U, 3617834859U, 1007753275U, 1495573769U, + 4069517037U, 2549218298U, 2663038764U, 504708206U, + 2263041392U, 3941167025U, 2249088522U, 1514023603U, + 1998579484U, 1312622330U, 694541497U, 2582060303U, + 2151582166U, 1382467621U, 776784248U, 2618340202U, + 3323268794U, 2497899128U, 2784771155U, 503983604U, + 4076293799U, 907881277U, 423175695U, 432175456U, + 1378068232U, 4145222326U, 3954048622U, 3938656102U, + 3820766613U, 2793130115U, 2977904593U, 26017576U, + 3274890735U, 3194772133U, 1700274565U, 1756076034U, + 4006520079U, 3677328699U, 720338349U, 1533947780U, + 354530856U, 688349552U, 3973924725U, 1637815568U, + 332179504U, 3949051286U, 53804574U, 2852348879U, + 3044236432U, 1282449977U, 3583942155U, 3416972820U, + 4006381244U, 1617046695U, 2628476075U, 3002303598U, + 1686838959U, 431878346U, 2686675385U, 1700445008U, + 1080580658U, 1009431731U, 832498133U, 3223435511U, + 2605976345U, 2271191193U, 2516031870U, 1648197032U, + 4164389018U, 2548247927U, 300782431U, 375919233U, + 238389289U, 3353747414U, 2531188641U, 2019080857U, + 1475708069U, 455242339U, 2609103871U, 448939670U, + 3451063019U, 1395535956U, 2413381860U, 1841049896U, + 1491858159U, 885456874U, 4264095073U, 4001119347U, + 1565136089U, 3898914787U, 1108368660U, 540939232U, + 1173283510U, 2745871338U, 3681308437U, 4207628240U, + 3343053890U, 4016749493U, 1699691293U, 1103962373U, + 3625875870U, 2256883143U, 3830138730U, 1031889488U, + 3479347698U, 1535977030U, 4236805024U, 3251091107U, + 2132092099U, 1774941330U, 1199868427U, 1452454533U, + 157007616U, 2904115357U, 342012276U, 595725824U, + 1480756522U, 206960106U, 497939518U, 591360097U, + 863170706U, 2375253569U, 3596610801U, 1814182875U, + 2094937945U, 3421402208U, 1082520231U, 3463918190U, + 2785509508U, 435703966U, 3908032597U, 1641649973U, + 2842273706U, 3305899714U, 1510255612U, 2148256476U, + 2655287854U, 3276092548U, 4258621189U, 236887753U, + 3681803219U, 274041037U, 1734335097U, 3815195456U, + 3317970021U, 1899903192U, 1026095262U, 4050517792U, + 356393447U, 2410691914U, 3873677099U, 3682840055U, + + 3913112168U, 2491498743U, 4132185628U, 2489919796U, + 1091903735U, 1979897079U, 3170134830U, 3567386728U, + 3557303409U, 857797738U, 1136121015U, 1342202287U, + 507115054U, 2535736646U, 337727348U, 3213592640U, + 1301675037U, 2528481711U, 1895095763U, 1721773893U, + 3216771564U, 62756741U, 2142006736U, 835421444U, + 2531993523U, 1442658625U, 3659876326U, 2882144922U, + 676362277U, 1392781812U, 170690266U, 3921047035U, + 1759253602U, 3611846912U, 1745797284U, 664899054U, + 1329594018U, 3901205900U, 3045908486U, 2062866102U, + 2865634940U, 3543621612U, 3464012697U, 1080764994U, + 553557557U, 3656615353U, 3996768171U, 991055499U, + 499776247U, 1265440854U, 648242737U, 3940784050U, + 980351604U, 3713745714U, 1749149687U, 3396870395U, + 4211799374U, 3640570775U, 1161844396U, 3125318951U, + 1431517754U, 545492359U, 4268468663U, 3499529547U, + 1437099964U, 2702547544U, 3433638243U, 2581715763U, + 2787789398U, 1060185593U, 1593081372U, 2418618748U, + 4260947970U, 69676912U, 2159744348U, 86519011U, + 2512459080U, 3838209314U, 1220612927U, 3339683548U, + 133810670U, 1090789135U, 1078426020U, 1569222167U, + 845107691U, 3583754449U, 4072456591U, 1091646820U, + 628848692U, 1613405280U, 3757631651U, 526609435U, + 236106946U, 48312990U, 2942717905U, 3402727701U, + 1797494240U, 859738849U, 992217954U, 4005476642U, + 2243076622U, 3870952857U, 3732016268U, 765654824U, + 3490871365U, 2511836413U, 1685915746U, 3888969200U, + 1414112111U, 2273134842U, 3281911079U, 4080962846U, + 172450625U, 2569994100U, 980381355U, 4109958455U, + 2819808352U, 2716589560U, 2568741196U, 3681446669U, + 3329971472U, 1835478071U, 660984891U, 3704678404U, + 4045999559U, 3422617507U, 3040415634U, 1762651403U, + 1719377915U, 3470491036U, 2693910283U, 3642056355U, + 3138596744U, 1364962596U, 2073328063U, 1983633131U, + 926494387U, 3423689081U, 2150032023U, 4096667949U, + 1749200295U, 3328846651U, 309677260U, 2016342300U, + 1779581495U, 3079819751U, 111262694U, 1274766160U, + 443224088U, 298511866U, 1025883608U, 3806446537U, + 1145181785U, 168956806U, 3641502830U, 3584813610U, + 1689216846U, 3666258015U, 3200248200U, 1692713982U, + 2646376535U, 4042768518U, 1618508792U, 1610833997U, + 3523052358U, 4130873264U, 2001055236U, 3610705100U, + 2202168115U, 4028541809U, 2961195399U, 1006657119U, + 2006996926U, 3186142756U, 1430667929U, 3210227297U, + 1314452623U, 4074634658U, 4101304120U, 2273951170U, + 1399257539U, 3367210612U, 3027628629U, 1190975929U, + 2062231137U, 2333990788U, 2221543033U, 2438960610U, + 1181637006U, 548689776U, 2362791313U, 3372408396U, + 3104550113U, 3145860560U, 296247880U, 1970579870U, + 3078560182U, 3769228297U, 1714227617U, 3291629107U, + 3898220290U, 166772364U, 1251581989U, 493813264U, + 448347421U, 195405023U, 2709975567U, 677966185U, + 3703036547U, 1463355134U, 2715995803U, 1338867538U, + 1343315457U, 2802222074U, 2684532164U, 233230375U, + 2599980071U, 2000651841U, 3277868038U, 1638401717U, + 4028070440U, 3237316320U, 6314154U, 819756386U, + 300326615U, 590932579U, 1405279636U, 3267499572U, + 3150704214U, 2428286686U, 3959192993U, 3461946742U, + 1862657033U, 1266418056U, 963775037U, 2089974820U, + 2263052895U, 1917689273U, 448879540U, 3550394620U, + 3981727096U, 150775221U, 3627908307U, 1303187396U, + 508620638U, 2975983352U, 2726630617U, 1817252668U, + 1876281319U, 1457606340U, 908771278U, 3720792119U, + 3617206836U, 2455994898U, 1729034894U, 1080033504U, + + 976866871U, 3556439503U, 2881648439U, 1522871579U, + 1555064734U, 1336096578U, 3548522304U, 2579274686U, + 3574697629U, 3205460757U, 3593280638U, 3338716283U, + 3079412587U, 564236357U, 2993598910U, 1781952180U, + 1464380207U, 3163844217U, 3332601554U, 1699332808U, + 1393555694U, 1183702653U, 3581086237U, 1288719814U, + 691649499U, 2847557200U, 2895455976U, 3193889540U, + 2717570544U, 1781354906U, 1676643554U, 2592534050U, + 3230253752U, 1126444790U, 2770207658U, 2633158820U, + 2210423226U, 2615765581U, 2414155088U, 3127139286U, + 673620729U, 2805611233U, 1269405062U, 4015350505U, + 3341807571U, 4149409754U, 1057255273U, 2012875353U, + 2162469141U, 2276492801U, 2601117357U, 993977747U, + 3918593370U, 2654263191U, 753973209U, 36408145U, + 2530585658U, 25011837U, 3520020182U, 2088578344U, + 530523599U, 2918365339U, 1524020338U, 1518925132U, + 3760827505U, 3759777254U, 1202760957U, 3985898139U, + 3906192525U, 674977740U, 4174734889U, 2031300136U, + 2019492241U, 3983892565U, 4153806404U, 3822280332U, + 352677332U, 2297720250U, 60907813U, 90501309U, + 3286998549U, 1016092578U, 2535922412U, 2839152426U, + 457141659U, 509813237U, 4120667899U, 652014361U, + 1966332200U, 2975202805U, 55981186U, 2327461051U, + 676427537U, 3255491064U, 2882294119U, 3433927263U, + 1307055953U, 942726286U, 933058658U, 2468411793U, + 3933900994U, 4215176142U, 1361170020U, 2001714738U, + 2830558078U, 3274259782U, 1222529897U, 1679025792U, + 2729314320U, 3714953764U, 1770335741U, 151462246U, + 3013232138U, 1682292957U, 1483529935U, 471910574U, + 1539241949U, 458788160U, 3436315007U, 1807016891U, + 3718408830U, 978976581U, 1043663428U, 3165965781U, + 1927990952U, 4200891579U, 2372276910U, 3208408903U, + 3533431907U, 1412390302U, 2931980059U, 4132332400U, + 1947078029U, 3881505623U, 4168226417U, 2941484381U, + 1077988104U, 1320477388U, 886195818U, 18198404U, + 3786409000U, 2509781533U, 112762804U, 3463356488U, + 1866414978U, 891333506U, 18488651U, 661792760U, + 1628790961U, 3885187036U, 3141171499U, 876946877U, + 2693282273U, 1372485963U, 791857591U, 2686433993U, + 3759982718U, 3167212022U, 3472953795U, 2716379847U, + 445679433U, 3561995674U, 3504004811U, 3574258232U, + 54117162U, 3331405415U, 2381918588U, 3769707343U, + 4154350007U, 1140177722U, 4074052095U, 668550556U, + 3214352940U, 367459370U, 261225585U, 2610173221U, + 4209349473U, 3468074219U, 3265815641U, 314222801U, + 3066103646U, 3808782860U, 282218597U, 3406013506U, + 3773591054U, 379116347U, 1285071038U, 846784868U, + 2669647154U, 3771962079U, 3550491691U, 2305946142U, + 453669953U, 1268987020U, 3317592352U, 3279303384U, + 3744833421U, 2610507566U, 3859509063U, 266596637U, + 3847019092U, 517658769U, 3462560207U, 3443424879U, + 370717030U, 4247526661U, 2224018117U, 4143653529U, + 4112773975U, 2788324899U, 2477274417U, 1456262402U, + 2901442914U, 1517677493U, 1846949527U, 2295493580U, + 3734397586U, 2176403920U, 1280348187U, 1908823572U, + 3871786941U, 846861322U, 1172426758U, 3287448474U, + 3383383037U, 1655181056U, 3139813346U, 901632758U, + 1897031941U, 2986607138U, 3066810236U, 3447102507U, + 1393639104U, 373351379U, 950779232U, 625454576U, + 3124240540U, 4148612726U, 2007998917U, 544563296U, + 2244738638U, 2330496472U, 2058025392U, 1291430526U, + 424198748U, 50039436U, 29584100U, 3605783033U, + 2429876329U, 2791104160U, 1057563949U, 3255363231U, + 3075367218U, 3463963227U, 1469046755U, 985887462U +}; + + +// this version is only used to make pbox and sbox +static void crypt_block(BF_KEY *key, const word32 in[2], word32 out[2]) +{ + word32 left = in[0]; + word32 right = in[1]; + + const word32 *const s=key->sbox; + const word32 *p=key->pbox; + + unsigned i; + + left ^= p[0]; + + for (i=0; isbox; + word32 *pbox = key->pbox; + + memcpy(pbox, p_init, sizeof(p_init)); + memcpy(sbox, s_init, sizeof(s_init)); + + // Xor key string into encryption key vector + for (i=0 ; ipbox_dec[ROUNDS+1-i] = pbox[i]; +} + + +void BlowfishEncryptLE (unsigned char *inBlock, unsigned char *outBlock, BF_KEY *key, int encrypt) +{ + word32 left = LE32 (((word32 *) inBlock)[0]); + word32 right = LE32 (((word32 *) inBlock)[1]); + + const word32 *const s = key->sbox; + const word32 * p = encrypt ? key->pbox : key->pbox_dec; + + unsigned i; + + left ^= p[0]; + + for (i=0; i +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Cast.h" + +#define word32 unsigned __int32 +#define byte unsigned __int8 +#define GETBYTE(x, y) (unsigned int)(byte)((x)>>(8*(y))) + +/* Macros to access 8-bit bytes out of a 32-bit word */ +#define U8a(x) GETBYTE(x,3) +#define U8b(x) GETBYTE(x,2) +#define U8c(x) GETBYTE(x,1) +#define U8d(x) GETBYTE(x,0) + +static word32 rotlVariable (word32 x, unsigned int y) +{ + return (word32)((x<>(sizeof(word32)*8-y))); +} + + +// CAST S-boxes + +static const word32 S[8][256] = { +{ + 0x30FB40D4UL, 0x9FA0FF0BUL, 0x6BECCD2FUL, 0x3F258C7AUL, + 0x1E213F2FUL, 0x9C004DD3UL, 0x6003E540UL, 0xCF9FC949UL, + 0xBFD4AF27UL, 0x88BBBDB5UL, 0xE2034090UL, 0x98D09675UL, + 0x6E63A0E0UL, 0x15C361D2UL, 0xC2E7661DUL, 0x22D4FF8EUL, + 0x28683B6FUL, 0xC07FD059UL, 0xFF2379C8UL, 0x775F50E2UL, + 0x43C340D3UL, 0xDF2F8656UL, 0x887CA41AUL, 0xA2D2BD2DUL, + 0xA1C9E0D6UL, 0x346C4819UL, 0x61B76D87UL, 0x22540F2FUL, + 0x2ABE32E1UL, 0xAA54166BUL, 0x22568E3AUL, 0xA2D341D0UL, + 0x66DB40C8UL, 0xA784392FUL, 0x004DFF2FUL, 0x2DB9D2DEUL, + 0x97943FACUL, 0x4A97C1D8UL, 0x527644B7UL, 0xB5F437A7UL, + 0xB82CBAEFUL, 0xD751D159UL, 0x6FF7F0EDUL, 0x5A097A1FUL, + 0x827B68D0UL, 0x90ECF52EUL, 0x22B0C054UL, 0xBC8E5935UL, + 0x4B6D2F7FUL, 0x50BB64A2UL, 0xD2664910UL, 0xBEE5812DUL, + 0xB7332290UL, 0xE93B159FUL, 0xB48EE411UL, 0x4BFF345DUL, + 0xFD45C240UL, 0xAD31973FUL, 0xC4F6D02EUL, 0x55FC8165UL, + 0xD5B1CAADUL, 0xA1AC2DAEUL, 0xA2D4B76DUL, 0xC19B0C50UL, + 0x882240F2UL, 0x0C6E4F38UL, 0xA4E4BFD7UL, 0x4F5BA272UL, + 0x564C1D2FUL, 0xC59C5319UL, 0xB949E354UL, 0xB04669FEUL, + 0xB1B6AB8AUL, 0xC71358DDUL, 0x6385C545UL, 0x110F935DUL, + 0x57538AD5UL, 0x6A390493UL, 0xE63D37E0UL, 0x2A54F6B3UL, + 0x3A787D5FUL, 0x6276A0B5UL, 0x19A6FCDFUL, 0x7A42206AUL, + 0x29F9D4D5UL, 0xF61B1891UL, 0xBB72275EUL, 0xAA508167UL, + 0x38901091UL, 0xC6B505EBUL, 0x84C7CB8CUL, 0x2AD75A0FUL, + 0x874A1427UL, 0xA2D1936BUL, 0x2AD286AFUL, 0xAA56D291UL, + 0xD7894360UL, 0x425C750DUL, 0x93B39E26UL, 0x187184C9UL, + 0x6C00B32DUL, 0x73E2BB14UL, 0xA0BEBC3CUL, 0x54623779UL, + 0x64459EABUL, 0x3F328B82UL, 0x7718CF82UL, 0x59A2CEA6UL, + 0x04EE002EUL, 0x89FE78E6UL, 0x3FAB0950UL, 0x325FF6C2UL, + 0x81383F05UL, 0x6963C5C8UL, 0x76CB5AD6UL, 0xD49974C9UL, + 0xCA180DCFUL, 0x380782D5UL, 0xC7FA5CF6UL, 0x8AC31511UL, + 0x35E79E13UL, 0x47DA91D0UL, 0xF40F9086UL, 0xA7E2419EUL, + 0x31366241UL, 0x051EF495UL, 0xAA573B04UL, 0x4A805D8DUL, + 0x548300D0UL, 0x00322A3CUL, 0xBF64CDDFUL, 0xBA57A68EUL, + 0x75C6372BUL, 0x50AFD341UL, 0xA7C13275UL, 0x915A0BF5UL, + 0x6B54BFABUL, 0x2B0B1426UL, 0xAB4CC9D7UL, 0x449CCD82UL, + 0xF7FBF265UL, 0xAB85C5F3UL, 0x1B55DB94UL, 0xAAD4E324UL, + 0xCFA4BD3FUL, 0x2DEAA3E2UL, 0x9E204D02UL, 0xC8BD25ACUL, + 0xEADF55B3UL, 0xD5BD9E98UL, 0xE31231B2UL, 0x2AD5AD6CUL, + 0x954329DEUL, 0xADBE4528UL, 0xD8710F69UL, 0xAA51C90FUL, + 0xAA786BF6UL, 0x22513F1EUL, 0xAA51A79BUL, 0x2AD344CCUL, + 0x7B5A41F0UL, 0xD37CFBADUL, 0x1B069505UL, 0x41ECE491UL, + 0xB4C332E6UL, 0x032268D4UL, 0xC9600ACCUL, 0xCE387E6DUL, + 0xBF6BB16CUL, 0x6A70FB78UL, 0x0D03D9C9UL, 0xD4DF39DEUL, + 0xE01063DAUL, 0x4736F464UL, 0x5AD328D8UL, 0xB347CC96UL, + 0x75BB0FC3UL, 0x98511BFBUL, 0x4FFBCC35UL, 0xB58BCF6AUL, + 0xE11F0ABCUL, 0xBFC5FE4AUL, 0xA70AEC10UL, 0xAC39570AUL, + 0x3F04442FUL, 0x6188B153UL, 0xE0397A2EUL, 0x5727CB79UL, + 0x9CEB418FUL, 0x1CACD68DUL, 0x2AD37C96UL, 0x0175CB9DUL, + 0xC69DFF09UL, 0xC75B65F0UL, 0xD9DB40D8UL, 0xEC0E7779UL, + 0x4744EAD4UL, 0xB11C3274UL, 0xDD24CB9EUL, 0x7E1C54BDUL, + 0xF01144F9UL, 0xD2240EB1UL, 0x9675B3FDUL, 0xA3AC3755UL, + 0xD47C27AFUL, 0x51C85F4DUL, 0x56907596UL, 0xA5BB15E6UL, + 0x580304F0UL, 0xCA042CF1UL, 0x011A37EAUL, 0x8DBFAADBUL, + 0x35BA3E4AUL, 0x3526FFA0UL, 0xC37B4D09UL, 0xBC306ED9UL, + 0x98A52666UL, 0x5648F725UL, 0xFF5E569DUL, 0x0CED63D0UL, + 0x7C63B2CFUL, 0x700B45E1UL, 0xD5EA50F1UL, 0x85A92872UL, + 0xAF1FBDA7UL, 0xD4234870UL, 0xA7870BF3UL, 0x2D3B4D79UL, + 0x42E04198UL, 0x0CD0EDE7UL, 0x26470DB8UL, 0xF881814CUL, + 0x474D6AD7UL, 0x7C0C5E5CUL, 0xD1231959UL, 0x381B7298UL, + 0xF5D2F4DBUL, 0xAB838653UL, 0x6E2F1E23UL, 0x83719C9EUL, + 0xBD91E046UL, 0x9A56456EUL, 0xDC39200CUL, 0x20C8C571UL, + 0x962BDA1CUL, 0xE1E696FFUL, 0xB141AB08UL, 0x7CCA89B9UL, + 0x1A69E783UL, 0x02CC4843UL, 0xA2F7C579UL, 0x429EF47DUL, + 0x427B169CUL, 0x5AC9F049UL, 0xDD8F0F00UL, 0x5C8165BFUL +}, + +{ + 0x1F201094UL, 0xEF0BA75BUL, 0x69E3CF7EUL, 0x393F4380UL, + 0xFE61CF7AUL, 0xEEC5207AUL, 0x55889C94UL, 0x72FC0651UL, + 0xADA7EF79UL, 0x4E1D7235UL, 0xD55A63CEUL, 0xDE0436BAUL, + 0x99C430EFUL, 0x5F0C0794UL, 0x18DCDB7DUL, 0xA1D6EFF3UL, + 0xA0B52F7BUL, 0x59E83605UL, 0xEE15B094UL, 0xE9FFD909UL, + 0xDC440086UL, 0xEF944459UL, 0xBA83CCB3UL, 0xE0C3CDFBUL, + 0xD1DA4181UL, 0x3B092AB1UL, 0xF997F1C1UL, 0xA5E6CF7BUL, + 0x01420DDBUL, 0xE4E7EF5BUL, 0x25A1FF41UL, 0xE180F806UL, + 0x1FC41080UL, 0x179BEE7AUL, 0xD37AC6A9UL, 0xFE5830A4UL, + 0x98DE8B7FUL, 0x77E83F4EUL, 0x79929269UL, 0x24FA9F7BUL, + 0xE113C85BUL, 0xACC40083UL, 0xD7503525UL, 0xF7EA615FUL, + 0x62143154UL, 0x0D554B63UL, 0x5D681121UL, 0xC866C359UL, + 0x3D63CF73UL, 0xCEE234C0UL, 0xD4D87E87UL, 0x5C672B21UL, + 0x071F6181UL, 0x39F7627FUL, 0x361E3084UL, 0xE4EB573BUL, + 0x602F64A4UL, 0xD63ACD9CUL, 0x1BBC4635UL, 0x9E81032DUL, + 0x2701F50CUL, 0x99847AB4UL, 0xA0E3DF79UL, 0xBA6CF38CUL, + 0x10843094UL, 0x2537A95EUL, 0xF46F6FFEUL, 0xA1FF3B1FUL, + 0x208CFB6AUL, 0x8F458C74UL, 0xD9E0A227UL, 0x4EC73A34UL, + 0xFC884F69UL, 0x3E4DE8DFUL, 0xEF0E0088UL, 0x3559648DUL, + 0x8A45388CUL, 0x1D804366UL, 0x721D9BFDUL, 0xA58684BBUL, + 0xE8256333UL, 0x844E8212UL, 0x128D8098UL, 0xFED33FB4UL, + 0xCE280AE1UL, 0x27E19BA5UL, 0xD5A6C252UL, 0xE49754BDUL, + 0xC5D655DDUL, 0xEB667064UL, 0x77840B4DUL, 0xA1B6A801UL, + 0x84DB26A9UL, 0xE0B56714UL, 0x21F043B7UL, 0xE5D05860UL, + 0x54F03084UL, 0x066FF472UL, 0xA31AA153UL, 0xDADC4755UL, + 0xB5625DBFUL, 0x68561BE6UL, 0x83CA6B94UL, 0x2D6ED23BUL, + 0xECCF01DBUL, 0xA6D3D0BAUL, 0xB6803D5CUL, 0xAF77A709UL, + 0x33B4A34CUL, 0x397BC8D6UL, 0x5EE22B95UL, 0x5F0E5304UL, + 0x81ED6F61UL, 0x20E74364UL, 0xB45E1378UL, 0xDE18639BUL, + 0x881CA122UL, 0xB96726D1UL, 0x8049A7E8UL, 0x22B7DA7BUL, + 0x5E552D25UL, 0x5272D237UL, 0x79D2951CUL, 0xC60D894CUL, + 0x488CB402UL, 0x1BA4FE5BUL, 0xA4B09F6BUL, 0x1CA815CFUL, + 0xA20C3005UL, 0x8871DF63UL, 0xB9DE2FCBUL, 0x0CC6C9E9UL, + 0x0BEEFF53UL, 0xE3214517UL, 0xB4542835UL, 0x9F63293CUL, + 0xEE41E729UL, 0x6E1D2D7CUL, 0x50045286UL, 0x1E6685F3UL, + 0xF33401C6UL, 0x30A22C95UL, 0x31A70850UL, 0x60930F13UL, + 0x73F98417UL, 0xA1269859UL, 0xEC645C44UL, 0x52C877A9UL, + 0xCDFF33A6UL, 0xA02B1741UL, 0x7CBAD9A2UL, 0x2180036FUL, + 0x50D99C08UL, 0xCB3F4861UL, 0xC26BD765UL, 0x64A3F6ABUL, + 0x80342676UL, 0x25A75E7BUL, 0xE4E6D1FCUL, 0x20C710E6UL, + 0xCDF0B680UL, 0x17844D3BUL, 0x31EEF84DUL, 0x7E0824E4UL, + 0x2CCB49EBUL, 0x846A3BAEUL, 0x8FF77888UL, 0xEE5D60F6UL, + 0x7AF75673UL, 0x2FDD5CDBUL, 0xA11631C1UL, 0x30F66F43UL, + 0xB3FAEC54UL, 0x157FD7FAUL, 0xEF8579CCUL, 0xD152DE58UL, + 0xDB2FFD5EUL, 0x8F32CE19UL, 0x306AF97AUL, 0x02F03EF8UL, + 0x99319AD5UL, 0xC242FA0FUL, 0xA7E3EBB0UL, 0xC68E4906UL, + 0xB8DA230CUL, 0x80823028UL, 0xDCDEF3C8UL, 0xD35FB171UL, + 0x088A1BC8UL, 0xBEC0C560UL, 0x61A3C9E8UL, 0xBCA8F54DUL, + 0xC72FEFFAUL, 0x22822E99UL, 0x82C570B4UL, 0xD8D94E89UL, + 0x8B1C34BCUL, 0x301E16E6UL, 0x273BE979UL, 0xB0FFEAA6UL, + 0x61D9B8C6UL, 0x00B24869UL, 0xB7FFCE3FUL, 0x08DC283BUL, + 0x43DAF65AUL, 0xF7E19798UL, 0x7619B72FUL, 0x8F1C9BA4UL, + 0xDC8637A0UL, 0x16A7D3B1UL, 0x9FC393B7UL, 0xA7136EEBUL, + 0xC6BCC63EUL, 0x1A513742UL, 0xEF6828BCUL, 0x520365D6UL, + 0x2D6A77ABUL, 0x3527ED4BUL, 0x821FD216UL, 0x095C6E2EUL, + 0xDB92F2FBUL, 0x5EEA29CBUL, 0x145892F5UL, 0x91584F7FUL, + 0x5483697BUL, 0x2667A8CCUL, 0x85196048UL, 0x8C4BACEAUL, + 0x833860D4UL, 0x0D23E0F9UL, 0x6C387E8AUL, 0x0AE6D249UL, + 0xB284600CUL, 0xD835731DUL, 0xDCB1C647UL, 0xAC4C56EAUL, + 0x3EBD81B3UL, 0x230EABB0UL, 0x6438BC87UL, 0xF0B5B1FAUL, + 0x8F5EA2B3UL, 0xFC184642UL, 0x0A036B7AUL, 0x4FB089BDUL, + 0x649DA589UL, 0xA345415EUL, 0x5C038323UL, 0x3E5D3BB9UL, + 0x43D79572UL, 0x7E6DD07CUL, 0x06DFDF1EUL, 0x6C6CC4EFUL, + 0x7160A539UL, 0x73BFBE70UL, 0x83877605UL, 0x4523ECF1UL +}, + +{ + 0x8DEFC240UL, 0x25FA5D9FUL, 0xEB903DBFUL, 0xE810C907UL, + 0x47607FFFUL, 0x369FE44BUL, 0x8C1FC644UL, 0xAECECA90UL, + 0xBEB1F9BFUL, 0xEEFBCAEAUL, 0xE8CF1950UL, 0x51DF07AEUL, + 0x920E8806UL, 0xF0AD0548UL, 0xE13C8D83UL, 0x927010D5UL, + 0x11107D9FUL, 0x07647DB9UL, 0xB2E3E4D4UL, 0x3D4F285EUL, + 0xB9AFA820UL, 0xFADE82E0UL, 0xA067268BUL, 0x8272792EUL, + 0x553FB2C0UL, 0x489AE22BUL, 0xD4EF9794UL, 0x125E3FBCUL, + 0x21FFFCEEUL, 0x825B1BFDUL, 0x9255C5EDUL, 0x1257A240UL, + 0x4E1A8302UL, 0xBAE07FFFUL, 0x528246E7UL, 0x8E57140EUL, + 0x3373F7BFUL, 0x8C9F8188UL, 0xA6FC4EE8UL, 0xC982B5A5UL, + 0xA8C01DB7UL, 0x579FC264UL, 0x67094F31UL, 0xF2BD3F5FUL, + 0x40FFF7C1UL, 0x1FB78DFCUL, 0x8E6BD2C1UL, 0x437BE59BUL, + 0x99B03DBFUL, 0xB5DBC64BUL, 0x638DC0E6UL, 0x55819D99UL, + 0xA197C81CUL, 0x4A012D6EUL, 0xC5884A28UL, 0xCCC36F71UL, + 0xB843C213UL, 0x6C0743F1UL, 0x8309893CUL, 0x0FEDDD5FUL, + 0x2F7FE850UL, 0xD7C07F7EUL, 0x02507FBFUL, 0x5AFB9A04UL, + 0xA747D2D0UL, 0x1651192EUL, 0xAF70BF3EUL, 0x58C31380UL, + 0x5F98302EUL, 0x727CC3C4UL, 0x0A0FB402UL, 0x0F7FEF82UL, + 0x8C96FDADUL, 0x5D2C2AAEUL, 0x8EE99A49UL, 0x50DA88B8UL, + 0x8427F4A0UL, 0x1EAC5790UL, 0x796FB449UL, 0x8252DC15UL, + 0xEFBD7D9BUL, 0xA672597DUL, 0xADA840D8UL, 0x45F54504UL, + 0xFA5D7403UL, 0xE83EC305UL, 0x4F91751AUL, 0x925669C2UL, + 0x23EFE941UL, 0xA903F12EUL, 0x60270DF2UL, 0x0276E4B6UL, + 0x94FD6574UL, 0x927985B2UL, 0x8276DBCBUL, 0x02778176UL, + 0xF8AF918DUL, 0x4E48F79EUL, 0x8F616DDFUL, 0xE29D840EUL, + 0x842F7D83UL, 0x340CE5C8UL, 0x96BBB682UL, 0x93B4B148UL, + 0xEF303CABUL, 0x984FAF28UL, 0x779FAF9BUL, 0x92DC560DUL, + 0x224D1E20UL, 0x8437AA88UL, 0x7D29DC96UL, 0x2756D3DCUL, + 0x8B907CEEUL, 0xB51FD240UL, 0xE7C07CE3UL, 0xE566B4A1UL, + 0xC3E9615EUL, 0x3CF8209DUL, 0x6094D1E3UL, 0xCD9CA341UL, + 0x5C76460EUL, 0x00EA983BUL, 0xD4D67881UL, 0xFD47572CUL, + 0xF76CEDD9UL, 0xBDA8229CUL, 0x127DADAAUL, 0x438A074EUL, + 0x1F97C090UL, 0x081BDB8AUL, 0x93A07EBEUL, 0xB938CA15UL, + 0x97B03CFFUL, 0x3DC2C0F8UL, 0x8D1AB2ECUL, 0x64380E51UL, + 0x68CC7BFBUL, 0xD90F2788UL, 0x12490181UL, 0x5DE5FFD4UL, + 0xDD7EF86AUL, 0x76A2E214UL, 0xB9A40368UL, 0x925D958FUL, + 0x4B39FFFAUL, 0xBA39AEE9UL, 0xA4FFD30BUL, 0xFAF7933BUL, + 0x6D498623UL, 0x193CBCFAUL, 0x27627545UL, 0x825CF47AUL, + 0x61BD8BA0UL, 0xD11E42D1UL, 0xCEAD04F4UL, 0x127EA392UL, + 0x10428DB7UL, 0x8272A972UL, 0x9270C4A8UL, 0x127DE50BUL, + 0x285BA1C8UL, 0x3C62F44FUL, 0x35C0EAA5UL, 0xE805D231UL, + 0x428929FBUL, 0xB4FCDF82UL, 0x4FB66A53UL, 0x0E7DC15BUL, + 0x1F081FABUL, 0x108618AEUL, 0xFCFD086DUL, 0xF9FF2889UL, + 0x694BCC11UL, 0x236A5CAEUL, 0x12DECA4DUL, 0x2C3F8CC5UL, + 0xD2D02DFEUL, 0xF8EF5896UL, 0xE4CF52DAUL, 0x95155B67UL, + 0x494A488CUL, 0xB9B6A80CUL, 0x5C8F82BCUL, 0x89D36B45UL, + 0x3A609437UL, 0xEC00C9A9UL, 0x44715253UL, 0x0A874B49UL, + 0xD773BC40UL, 0x7C34671CUL, 0x02717EF6UL, 0x4FEB5536UL, + 0xA2D02FFFUL, 0xD2BF60C4UL, 0xD43F03C0UL, 0x50B4EF6DUL, + 0x07478CD1UL, 0x006E1888UL, 0xA2E53F55UL, 0xB9E6D4BCUL, + 0xA2048016UL, 0x97573833UL, 0xD7207D67UL, 0xDE0F8F3DUL, + 0x72F87B33UL, 0xABCC4F33UL, 0x7688C55DUL, 0x7B00A6B0UL, + 0x947B0001UL, 0x570075D2UL, 0xF9BB88F8UL, 0x8942019EUL, + 0x4264A5FFUL, 0x856302E0UL, 0x72DBD92BUL, 0xEE971B69UL, + 0x6EA22FDEUL, 0x5F08AE2BUL, 0xAF7A616DUL, 0xE5C98767UL, + 0xCF1FEBD2UL, 0x61EFC8C2UL, 0xF1AC2571UL, 0xCC8239C2UL, + 0x67214CB8UL, 0xB1E583D1UL, 0xB7DC3E62UL, 0x7F10BDCEUL, + 0xF90A5C38UL, 0x0FF0443DUL, 0x606E6DC6UL, 0x60543A49UL, + 0x5727C148UL, 0x2BE98A1DUL, 0x8AB41738UL, 0x20E1BE24UL, + 0xAF96DA0FUL, 0x68458425UL, 0x99833BE5UL, 0x600D457DUL, + 0x282F9350UL, 0x8334B362UL, 0xD91D1120UL, 0x2B6D8DA0UL, + 0x642B1E31UL, 0x9C305A00UL, 0x52BCE688UL, 0x1B03588AUL, + 0xF7BAEFD5UL, 0x4142ED9CUL, 0xA4315C11UL, 0x83323EC5UL, + 0xDFEF4636UL, 0xA133C501UL, 0xE9D3531CUL, 0xEE353783UL +}, + +{ + 0x9DB30420UL, 0x1FB6E9DEUL, 0xA7BE7BEFUL, 0xD273A298UL, + 0x4A4F7BDBUL, 0x64AD8C57UL, 0x85510443UL, 0xFA020ED1UL, + 0x7E287AFFUL, 0xE60FB663UL, 0x095F35A1UL, 0x79EBF120UL, + 0xFD059D43UL, 0x6497B7B1UL, 0xF3641F63UL, 0x241E4ADFUL, + 0x28147F5FUL, 0x4FA2B8CDUL, 0xC9430040UL, 0x0CC32220UL, + 0xFDD30B30UL, 0xC0A5374FUL, 0x1D2D00D9UL, 0x24147B15UL, + 0xEE4D111AUL, 0x0FCA5167UL, 0x71FF904CUL, 0x2D195FFEUL, + 0x1A05645FUL, 0x0C13FEFEUL, 0x081B08CAUL, 0x05170121UL, + 0x80530100UL, 0xE83E5EFEUL, 0xAC9AF4F8UL, 0x7FE72701UL, + 0xD2B8EE5FUL, 0x06DF4261UL, 0xBB9E9B8AUL, 0x7293EA25UL, + 0xCE84FFDFUL, 0xF5718801UL, 0x3DD64B04UL, 0xA26F263BUL, + 0x7ED48400UL, 0x547EEBE6UL, 0x446D4CA0UL, 0x6CF3D6F5UL, + 0x2649ABDFUL, 0xAEA0C7F5UL, 0x36338CC1UL, 0x503F7E93UL, + 0xD3772061UL, 0x11B638E1UL, 0x72500E03UL, 0xF80EB2BBUL, + 0xABE0502EUL, 0xEC8D77DEUL, 0x57971E81UL, 0xE14F6746UL, + 0xC9335400UL, 0x6920318FUL, 0x081DBB99UL, 0xFFC304A5UL, + 0x4D351805UL, 0x7F3D5CE3UL, 0xA6C866C6UL, 0x5D5BCCA9UL, + 0xDAEC6FEAUL, 0x9F926F91UL, 0x9F46222FUL, 0x3991467DUL, + 0xA5BF6D8EUL, 0x1143C44FUL, 0x43958302UL, 0xD0214EEBUL, + 0x022083B8UL, 0x3FB6180CUL, 0x18F8931EUL, 0x281658E6UL, + 0x26486E3EUL, 0x8BD78A70UL, 0x7477E4C1UL, 0xB506E07CUL, + 0xF32D0A25UL, 0x79098B02UL, 0xE4EABB81UL, 0x28123B23UL, + 0x69DEAD38UL, 0x1574CA16UL, 0xDF871B62UL, 0x211C40B7UL, + 0xA51A9EF9UL, 0x0014377BUL, 0x041E8AC8UL, 0x09114003UL, + 0xBD59E4D2UL, 0xE3D156D5UL, 0x4FE876D5UL, 0x2F91A340UL, + 0x557BE8DEUL, 0x00EAE4A7UL, 0x0CE5C2ECUL, 0x4DB4BBA6UL, + 0xE756BDFFUL, 0xDD3369ACUL, 0xEC17B035UL, 0x06572327UL, + 0x99AFC8B0UL, 0x56C8C391UL, 0x6B65811CUL, 0x5E146119UL, + 0x6E85CB75UL, 0xBE07C002UL, 0xC2325577UL, 0x893FF4ECUL, + 0x5BBFC92DUL, 0xD0EC3B25UL, 0xB7801AB7UL, 0x8D6D3B24UL, + 0x20C763EFUL, 0xC366A5FCUL, 0x9C382880UL, 0x0ACE3205UL, + 0xAAC9548AUL, 0xECA1D7C7UL, 0x041AFA32UL, 0x1D16625AUL, + 0x6701902CUL, 0x9B757A54UL, 0x31D477F7UL, 0x9126B031UL, + 0x36CC6FDBUL, 0xC70B8B46UL, 0xD9E66A48UL, 0x56E55A79UL, + 0x026A4CEBUL, 0x52437EFFUL, 0x2F8F76B4UL, 0x0DF980A5UL, + 0x8674CDE3UL, 0xEDDA04EBUL, 0x17A9BE04UL, 0x2C18F4DFUL, + 0xB7747F9DUL, 0xAB2AF7B4UL, 0xEFC34D20UL, 0x2E096B7CUL, + 0x1741A254UL, 0xE5B6A035UL, 0x213D42F6UL, 0x2C1C7C26UL, + 0x61C2F50FUL, 0x6552DAF9UL, 0xD2C231F8UL, 0x25130F69UL, + 0xD8167FA2UL, 0x0418F2C8UL, 0x001A96A6UL, 0x0D1526ABUL, + 0x63315C21UL, 0x5E0A72ECUL, 0x49BAFEFDUL, 0x187908D9UL, + 0x8D0DBD86UL, 0x311170A7UL, 0x3E9B640CUL, 0xCC3E10D7UL, + 0xD5CAD3B6UL, 0x0CAEC388UL, 0xF73001E1UL, 0x6C728AFFUL, + 0x71EAE2A1UL, 0x1F9AF36EUL, 0xCFCBD12FUL, 0xC1DE8417UL, + 0xAC07BE6BUL, 0xCB44A1D8UL, 0x8B9B0F56UL, 0x013988C3UL, + 0xB1C52FCAUL, 0xB4BE31CDUL, 0xD8782806UL, 0x12A3A4E2UL, + 0x6F7DE532UL, 0x58FD7EB6UL, 0xD01EE900UL, 0x24ADFFC2UL, + 0xF4990FC5UL, 0x9711AAC5UL, 0x001D7B95UL, 0x82E5E7D2UL, + 0x109873F6UL, 0x00613096UL, 0xC32D9521UL, 0xADA121FFUL, + 0x29908415UL, 0x7FBB977FUL, 0xAF9EB3DBUL, 0x29C9ED2AUL, + 0x5CE2A465UL, 0xA730F32CUL, 0xD0AA3FE8UL, 0x8A5CC091UL, + 0xD49E2CE7UL, 0x0CE454A9UL, 0xD60ACD86UL, 0x015F1919UL, + 0x77079103UL, 0xDEA03AF6UL, 0x78A8565EUL, 0xDEE356DFUL, + 0x21F05CBEUL, 0x8B75E387UL, 0xB3C50651UL, 0xB8A5C3EFUL, + 0xD8EEB6D2UL, 0xE523BE77UL, 0xC2154529UL, 0x2F69EFDFUL, + 0xAFE67AFBUL, 0xF470C4B2UL, 0xF3E0EB5BUL, 0xD6CC9876UL, + 0x39E4460CUL, 0x1FDA8538UL, 0x1987832FUL, 0xCA007367UL, + 0xA99144F8UL, 0x296B299EUL, 0x492FC295UL, 0x9266BEABUL, + 0xB5676E69UL, 0x9BD3DDDAUL, 0xDF7E052FUL, 0xDB25701CUL, + 0x1B5E51EEUL, 0xF65324E6UL, 0x6AFCE36CUL, 0x0316CC04UL, + 0x8644213EUL, 0xB7DC59D0UL, 0x7965291FUL, 0xCCD6FD43UL, + 0x41823979UL, 0x932BCDF6UL, 0xB657C34DUL, 0x4EDFD282UL, + 0x7AE5290CUL, 0x3CB9536BUL, 0x851E20FEUL, 0x9833557EUL, + 0x13ECF0B0UL, 0xD3FFB372UL, 0x3F85C5C1UL, 0x0AEF7ED2UL +}, + +{ + 0x7EC90C04UL, 0x2C6E74B9UL, 0x9B0E66DFUL, 0xA6337911UL, + 0xB86A7FFFUL, 0x1DD358F5UL, 0x44DD9D44UL, 0x1731167FUL, + 0x08FBF1FAUL, 0xE7F511CCUL, 0xD2051B00UL, 0x735ABA00UL, + 0x2AB722D8UL, 0x386381CBUL, 0xACF6243AUL, 0x69BEFD7AUL, + 0xE6A2E77FUL, 0xF0C720CDUL, 0xC4494816UL, 0xCCF5C180UL, + 0x38851640UL, 0x15B0A848UL, 0xE68B18CBUL, 0x4CAADEFFUL, + 0x5F480A01UL, 0x0412B2AAUL, 0x259814FCUL, 0x41D0EFE2UL, + 0x4E40B48DUL, 0x248EB6FBUL, 0x8DBA1CFEUL, 0x41A99B02UL, + 0x1A550A04UL, 0xBA8F65CBUL, 0x7251F4E7UL, 0x95A51725UL, + 0xC106ECD7UL, 0x97A5980AUL, 0xC539B9AAUL, 0x4D79FE6AUL, + 0xF2F3F763UL, 0x68AF8040UL, 0xED0C9E56UL, 0x11B4958BUL, + 0xE1EB5A88UL, 0x8709E6B0UL, 0xD7E07156UL, 0x4E29FEA7UL, + 0x6366E52DUL, 0x02D1C000UL, 0xC4AC8E05UL, 0x9377F571UL, + 0x0C05372AUL, 0x578535F2UL, 0x2261BE02UL, 0xD642A0C9UL, + 0xDF13A280UL, 0x74B55BD2UL, 0x682199C0UL, 0xD421E5ECUL, + 0x53FB3CE8UL, 0xC8ADEDB3UL, 0x28A87FC9UL, 0x3D959981UL, + 0x5C1FF900UL, 0xFE38D399UL, 0x0C4EFF0BUL, 0x062407EAUL, + 0xAA2F4FB1UL, 0x4FB96976UL, 0x90C79505UL, 0xB0A8A774UL, + 0xEF55A1FFUL, 0xE59CA2C2UL, 0xA6B62D27UL, 0xE66A4263UL, + 0xDF65001FUL, 0x0EC50966UL, 0xDFDD55BCUL, 0x29DE0655UL, + 0x911E739AUL, 0x17AF8975UL, 0x32C7911CUL, 0x89F89468UL, + 0x0D01E980UL, 0x524755F4UL, 0x03B63CC9UL, 0x0CC844B2UL, + 0xBCF3F0AAUL, 0x87AC36E9UL, 0xE53A7426UL, 0x01B3D82BUL, + 0x1A9E7449UL, 0x64EE2D7EUL, 0xCDDBB1DAUL, 0x01C94910UL, + 0xB868BF80UL, 0x0D26F3FDUL, 0x9342EDE7UL, 0x04A5C284UL, + 0x636737B6UL, 0x50F5B616UL, 0xF24766E3UL, 0x8ECA36C1UL, + 0x136E05DBUL, 0xFEF18391UL, 0xFB887A37UL, 0xD6E7F7D4UL, + 0xC7FB7DC9UL, 0x3063FCDFUL, 0xB6F589DEUL, 0xEC2941DAUL, + 0x26E46695UL, 0xB7566419UL, 0xF654EFC5UL, 0xD08D58B7UL, + 0x48925401UL, 0xC1BACB7FUL, 0xE5FF550FUL, 0xB6083049UL, + 0x5BB5D0E8UL, 0x87D72E5AUL, 0xAB6A6EE1UL, 0x223A66CEUL, + 0xC62BF3CDUL, 0x9E0885F9UL, 0x68CB3E47UL, 0x086C010FUL, + 0xA21DE820UL, 0xD18B69DEUL, 0xF3F65777UL, 0xFA02C3F6UL, + 0x407EDAC3UL, 0xCBB3D550UL, 0x1793084DUL, 0xB0D70EBAUL, + 0x0AB378D5UL, 0xD951FB0CUL, 0xDED7DA56UL, 0x4124BBE4UL, + 0x94CA0B56UL, 0x0F5755D1UL, 0xE0E1E56EUL, 0x6184B5BEUL, + 0x580A249FUL, 0x94F74BC0UL, 0xE327888EUL, 0x9F7B5561UL, + 0xC3DC0280UL, 0x05687715UL, 0x646C6BD7UL, 0x44904DB3UL, + 0x66B4F0A3UL, 0xC0F1648AUL, 0x697ED5AFUL, 0x49E92FF6UL, + 0x309E374FUL, 0x2CB6356AUL, 0x85808573UL, 0x4991F840UL, + 0x76F0AE02UL, 0x083BE84DUL, 0x28421C9AUL, 0x44489406UL, + 0x736E4CB8UL, 0xC1092910UL, 0x8BC95FC6UL, 0x7D869CF4UL, + 0x134F616FUL, 0x2E77118DUL, 0xB31B2BE1UL, 0xAA90B472UL, + 0x3CA5D717UL, 0x7D161BBAUL, 0x9CAD9010UL, 0xAF462BA2UL, + 0x9FE459D2UL, 0x45D34559UL, 0xD9F2DA13UL, 0xDBC65487UL, + 0xF3E4F94EUL, 0x176D486FUL, 0x097C13EAUL, 0x631DA5C7UL, + 0x445F7382UL, 0x175683F4UL, 0xCDC66A97UL, 0x70BE0288UL, + 0xB3CDCF72UL, 0x6E5DD2F3UL, 0x20936079UL, 0x459B80A5UL, + 0xBE60E2DBUL, 0xA9C23101UL, 0xEBA5315CUL, 0x224E42F2UL, + 0x1C5C1572UL, 0xF6721B2CUL, 0x1AD2FFF3UL, 0x8C25404EUL, + 0x324ED72FUL, 0x4067B7FDUL, 0x0523138EUL, 0x5CA3BC78UL, + 0xDC0FD66EUL, 0x75922283UL, 0x784D6B17UL, 0x58EBB16EUL, + 0x44094F85UL, 0x3F481D87UL, 0xFCFEAE7BUL, 0x77B5FF76UL, + 0x8C2302BFUL, 0xAAF47556UL, 0x5F46B02AUL, 0x2B092801UL, + 0x3D38F5F7UL, 0x0CA81F36UL, 0x52AF4A8AUL, 0x66D5E7C0UL, + 0xDF3B0874UL, 0x95055110UL, 0x1B5AD7A8UL, 0xF61ED5ADUL, + 0x6CF6E479UL, 0x20758184UL, 0xD0CEFA65UL, 0x88F7BE58UL, + 0x4A046826UL, 0x0FF6F8F3UL, 0xA09C7F70UL, 0x5346ABA0UL, + 0x5CE96C28UL, 0xE176EDA3UL, 0x6BAC307FUL, 0x376829D2UL, + 0x85360FA9UL, 0x17E3FE2AUL, 0x24B79767UL, 0xF5A96B20UL, + 0xD6CD2595UL, 0x68FF1EBFUL, 0x7555442CUL, 0xF19F06BEUL, + 0xF9E0659AUL, 0xEEB9491DUL, 0x34010718UL, 0xBB30CAB8UL, + 0xE822FE15UL, 0x88570983UL, 0x750E6249UL, 0xDA627E55UL, + 0x5E76FFA8UL, 0xB1534546UL, 0x6D47DE08UL, 0xEFE9E7D4UL +}, + +{ + 0xF6FA8F9DUL, 0x2CAC6CE1UL, 0x4CA34867UL, 0xE2337F7CUL, + 0x95DB08E7UL, 0x016843B4UL, 0xECED5CBCUL, 0x325553ACUL, + 0xBF9F0960UL, 0xDFA1E2EDUL, 0x83F0579DUL, 0x63ED86B9UL, + 0x1AB6A6B8UL, 0xDE5EBE39UL, 0xF38FF732UL, 0x8989B138UL, + 0x33F14961UL, 0xC01937BDUL, 0xF506C6DAUL, 0xE4625E7EUL, + 0xA308EA99UL, 0x4E23E33CUL, 0x79CBD7CCUL, 0x48A14367UL, + 0xA3149619UL, 0xFEC94BD5UL, 0xA114174AUL, 0xEAA01866UL, + 0xA084DB2DUL, 0x09A8486FUL, 0xA888614AUL, 0x2900AF98UL, + 0x01665991UL, 0xE1992863UL, 0xC8F30C60UL, 0x2E78EF3CUL, + 0xD0D51932UL, 0xCF0FEC14UL, 0xF7CA07D2UL, 0xD0A82072UL, + 0xFD41197EUL, 0x9305A6B0UL, 0xE86BE3DAUL, 0x74BED3CDUL, + 0x372DA53CUL, 0x4C7F4448UL, 0xDAB5D440UL, 0x6DBA0EC3UL, + 0x083919A7UL, 0x9FBAEED9UL, 0x49DBCFB0UL, 0x4E670C53UL, + 0x5C3D9C01UL, 0x64BDB941UL, 0x2C0E636AUL, 0xBA7DD9CDUL, + 0xEA6F7388UL, 0xE70BC762UL, 0x35F29ADBUL, 0x5C4CDD8DUL, + 0xF0D48D8CUL, 0xB88153E2UL, 0x08A19866UL, 0x1AE2EAC8UL, + 0x284CAF89UL, 0xAA928223UL, 0x9334BE53UL, 0x3B3A21BFUL, + 0x16434BE3UL, 0x9AEA3906UL, 0xEFE8C36EUL, 0xF890CDD9UL, + 0x80226DAEUL, 0xC340A4A3UL, 0xDF7E9C09UL, 0xA694A807UL, + 0x5B7C5ECCUL, 0x221DB3A6UL, 0x9A69A02FUL, 0x68818A54UL, + 0xCEB2296FUL, 0x53C0843AUL, 0xFE893655UL, 0x25BFE68AUL, + 0xB4628ABCUL, 0xCF222EBFUL, 0x25AC6F48UL, 0xA9A99387UL, + 0x53BDDB65UL, 0xE76FFBE7UL, 0xE967FD78UL, 0x0BA93563UL, + 0x8E342BC1UL, 0xE8A11BE9UL, 0x4980740DUL, 0xC8087DFCUL, + 0x8DE4BF99UL, 0xA11101A0UL, 0x7FD37975UL, 0xDA5A26C0UL, + 0xE81F994FUL, 0x9528CD89UL, 0xFD339FEDUL, 0xB87834BFUL, + 0x5F04456DUL, 0x22258698UL, 0xC9C4C83BUL, 0x2DC156BEUL, + 0x4F628DAAUL, 0x57F55EC5UL, 0xE2220ABEUL, 0xD2916EBFUL, + 0x4EC75B95UL, 0x24F2C3C0UL, 0x42D15D99UL, 0xCD0D7FA0UL, + 0x7B6E27FFUL, 0xA8DC8AF0UL, 0x7345C106UL, 0xF41E232FUL, + 0x35162386UL, 0xE6EA8926UL, 0x3333B094UL, 0x157EC6F2UL, + 0x372B74AFUL, 0x692573E4UL, 0xE9A9D848UL, 0xF3160289UL, + 0x3A62EF1DUL, 0xA787E238UL, 0xF3A5F676UL, 0x74364853UL, + 0x20951063UL, 0x4576698DUL, 0xB6FAD407UL, 0x592AF950UL, + 0x36F73523UL, 0x4CFB6E87UL, 0x7DA4CEC0UL, 0x6C152DAAUL, + 0xCB0396A8UL, 0xC50DFE5DUL, 0xFCD707ABUL, 0x0921C42FUL, + 0x89DFF0BBUL, 0x5FE2BE78UL, 0x448F4F33UL, 0x754613C9UL, + 0x2B05D08DUL, 0x48B9D585UL, 0xDC049441UL, 0xC8098F9BUL, + 0x7DEDE786UL, 0xC39A3373UL, 0x42410005UL, 0x6A091751UL, + 0x0EF3C8A6UL, 0x890072D6UL, 0x28207682UL, 0xA9A9F7BEUL, + 0xBF32679DUL, 0xD45B5B75UL, 0xB353FD00UL, 0xCBB0E358UL, + 0x830F220AUL, 0x1F8FB214UL, 0xD372CF08UL, 0xCC3C4A13UL, + 0x8CF63166UL, 0x061C87BEUL, 0x88C98F88UL, 0x6062E397UL, + 0x47CF8E7AUL, 0xB6C85283UL, 0x3CC2ACFBUL, 0x3FC06976UL, + 0x4E8F0252UL, 0x64D8314DUL, 0xDA3870E3UL, 0x1E665459UL, + 0xC10908F0UL, 0x513021A5UL, 0x6C5B68B7UL, 0x822F8AA0UL, + 0x3007CD3EUL, 0x74719EEFUL, 0xDC872681UL, 0x073340D4UL, + 0x7E432FD9UL, 0x0C5EC241UL, 0x8809286CUL, 0xF592D891UL, + 0x08A930F6UL, 0x957EF305UL, 0xB7FBFFBDUL, 0xC266E96FUL, + 0x6FE4AC98UL, 0xB173ECC0UL, 0xBC60B42AUL, 0x953498DAUL, + 0xFBA1AE12UL, 0x2D4BD736UL, 0x0F25FAABUL, 0xA4F3FCEBUL, + 0xE2969123UL, 0x257F0C3DUL, 0x9348AF49UL, 0x361400BCUL, + 0xE8816F4AUL, 0x3814F200UL, 0xA3F94043UL, 0x9C7A54C2UL, + 0xBC704F57UL, 0xDA41E7F9UL, 0xC25AD33AUL, 0x54F4A084UL, + 0xB17F5505UL, 0x59357CBEUL, 0xEDBD15C8UL, 0x7F97C5ABUL, + 0xBA5AC7B5UL, 0xB6F6DEAFUL, 0x3A479C3AUL, 0x5302DA25UL, + 0x653D7E6AUL, 0x54268D49UL, 0x51A477EAUL, 0x5017D55BUL, + 0xD7D25D88UL, 0x44136C76UL, 0x0404A8C8UL, 0xB8E5A121UL, + 0xB81A928AUL, 0x60ED5869UL, 0x97C55B96UL, 0xEAEC991BUL, + 0x29935913UL, 0x01FDB7F1UL, 0x088E8DFAUL, 0x9AB6F6F5UL, + 0x3B4CBF9FUL, 0x4A5DE3ABUL, 0xE6051D35UL, 0xA0E1D855UL, + 0xD36B4CF1UL, 0xF544EDEBUL, 0xB0E93524UL, 0xBEBB8FBDUL, + 0xA2D762CFUL, 0x49C92F54UL, 0x38B5F331UL, 0x7128A454UL, + 0x48392905UL, 0xA65B1DB8UL, 0x851C97BDUL, 0xD675CF2FUL +}, + +{ + 0x85E04019UL, 0x332BF567UL, 0x662DBFFFUL, 0xCFC65693UL, + 0x2A8D7F6FUL, 0xAB9BC912UL, 0xDE6008A1UL, 0x2028DA1FUL, + 0x0227BCE7UL, 0x4D642916UL, 0x18FAC300UL, 0x50F18B82UL, + 0x2CB2CB11UL, 0xB232E75CUL, 0x4B3695F2UL, 0xB28707DEUL, + 0xA05FBCF6UL, 0xCD4181E9UL, 0xE150210CUL, 0xE24EF1BDUL, + 0xB168C381UL, 0xFDE4E789UL, 0x5C79B0D8UL, 0x1E8BFD43UL, + 0x4D495001UL, 0x38BE4341UL, 0x913CEE1DUL, 0x92A79C3FUL, + 0x089766BEUL, 0xBAEEADF4UL, 0x1286BECFUL, 0xB6EACB19UL, + 0x2660C200UL, 0x7565BDE4UL, 0x64241F7AUL, 0x8248DCA9UL, + 0xC3B3AD66UL, 0x28136086UL, 0x0BD8DFA8UL, 0x356D1CF2UL, + 0x107789BEUL, 0xB3B2E9CEUL, 0x0502AA8FUL, 0x0BC0351EUL, + 0x166BF52AUL, 0xEB12FF82UL, 0xE3486911UL, 0xD34D7516UL, + 0x4E7B3AFFUL, 0x5F43671BUL, 0x9CF6E037UL, 0x4981AC83UL, + 0x334266CEUL, 0x8C9341B7UL, 0xD0D854C0UL, 0xCB3A6C88UL, + 0x47BC2829UL, 0x4725BA37UL, 0xA66AD22BUL, 0x7AD61F1EUL, + 0x0C5CBAFAUL, 0x4437F107UL, 0xB6E79962UL, 0x42D2D816UL, + 0x0A961288UL, 0xE1A5C06EUL, 0x13749E67UL, 0x72FC081AUL, + 0xB1D139F7UL, 0xF9583745UL, 0xCF19DF58UL, 0xBEC3F756UL, + 0xC06EBA30UL, 0x07211B24UL, 0x45C28829UL, 0xC95E317FUL, + 0xBC8EC511UL, 0x38BC46E9UL, 0xC6E6FA14UL, 0xBAE8584AUL, + 0xAD4EBC46UL, 0x468F508BUL, 0x7829435FUL, 0xF124183BUL, + 0x821DBA9FUL, 0xAFF60FF4UL, 0xEA2C4E6DUL, 0x16E39264UL, + 0x92544A8BUL, 0x009B4FC3UL, 0xABA68CEDUL, 0x9AC96F78UL, + 0x06A5B79AUL, 0xB2856E6EUL, 0x1AEC3CA9UL, 0xBE838688UL, + 0x0E0804E9UL, 0x55F1BE56UL, 0xE7E5363BUL, 0xB3A1F25DUL, + 0xF7DEBB85UL, 0x61FE033CUL, 0x16746233UL, 0x3C034C28UL, + 0xDA6D0C74UL, 0x79AAC56CUL, 0x3CE4E1ADUL, 0x51F0C802UL, + 0x98F8F35AUL, 0x1626A49FUL, 0xEED82B29UL, 0x1D382FE3UL, + 0x0C4FB99AUL, 0xBB325778UL, 0x3EC6D97BUL, 0x6E77A6A9UL, + 0xCB658B5CUL, 0xD45230C7UL, 0x2BD1408BUL, 0x60C03EB7UL, + 0xB9068D78UL, 0xA33754F4UL, 0xF430C87DUL, 0xC8A71302UL, + 0xB96D8C32UL, 0xEBD4E7BEUL, 0xBE8B9D2DUL, 0x7979FB06UL, + 0xE7225308UL, 0x8B75CF77UL, 0x11EF8DA4UL, 0xE083C858UL, + 0x8D6B786FUL, 0x5A6317A6UL, 0xFA5CF7A0UL, 0x5DDA0033UL, + 0xF28EBFB0UL, 0xF5B9C310UL, 0xA0EAC280UL, 0x08B9767AUL, + 0xA3D9D2B0UL, 0x79D34217UL, 0x021A718DUL, 0x9AC6336AUL, + 0x2711FD60UL, 0x438050E3UL, 0x069908A8UL, 0x3D7FEDC4UL, + 0x826D2BEFUL, 0x4EEB8476UL, 0x488DCF25UL, 0x36C9D566UL, + 0x28E74E41UL, 0xC2610ACAUL, 0x3D49A9CFUL, 0xBAE3B9DFUL, + 0xB65F8DE6UL, 0x92AEAF64UL, 0x3AC7D5E6UL, 0x9EA80509UL, + 0xF22B017DUL, 0xA4173F70UL, 0xDD1E16C3UL, 0x15E0D7F9UL, + 0x50B1B887UL, 0x2B9F4FD5UL, 0x625ABA82UL, 0x6A017962UL, + 0x2EC01B9CUL, 0x15488AA9UL, 0xD716E740UL, 0x40055A2CUL, + 0x93D29A22UL, 0xE32DBF9AUL, 0x058745B9UL, 0x3453DC1EUL, + 0xD699296EUL, 0x496CFF6FUL, 0x1C9F4986UL, 0xDFE2ED07UL, + 0xB87242D1UL, 0x19DE7EAEUL, 0x053E561AUL, 0x15AD6F8CUL, + 0x66626C1CUL, 0x7154C24CUL, 0xEA082B2AUL, 0x93EB2939UL, + 0x17DCB0F0UL, 0x58D4F2AEUL, 0x9EA294FBUL, 0x52CF564CUL, + 0x9883FE66UL, 0x2EC40581UL, 0x763953C3UL, 0x01D6692EUL, + 0xD3A0C108UL, 0xA1E7160EUL, 0xE4F2DFA6UL, 0x693ED285UL, + 0x74904698UL, 0x4C2B0EDDUL, 0x4F757656UL, 0x5D393378UL, + 0xA132234FUL, 0x3D321C5DUL, 0xC3F5E194UL, 0x4B269301UL, + 0xC79F022FUL, 0x3C997E7EUL, 0x5E4F9504UL, 0x3FFAFBBDUL, + 0x76F7AD0EUL, 0x296693F4UL, 0x3D1FCE6FUL, 0xC61E45BEUL, + 0xD3B5AB34UL, 0xF72BF9B7UL, 0x1B0434C0UL, 0x4E72B567UL, + 0x5592A33DUL, 0xB5229301UL, 0xCFD2A87FUL, 0x60AEB767UL, + 0x1814386BUL, 0x30BCC33DUL, 0x38A0C07DUL, 0xFD1606F2UL, + 0xC363519BUL, 0x589DD390UL, 0x5479F8E6UL, 0x1CB8D647UL, + 0x97FD61A9UL, 0xEA7759F4UL, 0x2D57539DUL, 0x569A58CFUL, + 0xE84E63ADUL, 0x462E1B78UL, 0x6580F87EUL, 0xF3817914UL, + 0x91DA55F4UL, 0x40A230F3UL, 0xD1988F35UL, 0xB6E318D2UL, + 0x3FFA50BCUL, 0x3D40F021UL, 0xC3C0BDAEUL, 0x4958C24CUL, + 0x518F36B2UL, 0x84B1D370UL, 0x0FEDCE83UL, 0x878DDADAUL, + 0xF2A279C7UL, 0x94E01BE8UL, 0x90716F4BUL, 0x954B8AA3UL +}, + +{ + 0xE216300DUL, 0xBBDDFFFCUL, 0xA7EBDABDUL, 0x35648095UL, + 0x7789F8B7UL, 0xE6C1121BUL, 0x0E241600UL, 0x052CE8B5UL, + 0x11A9CFB0UL, 0xE5952F11UL, 0xECE7990AUL, 0x9386D174UL, + 0x2A42931CUL, 0x76E38111UL, 0xB12DEF3AUL, 0x37DDDDFCUL, + 0xDE9ADEB1UL, 0x0A0CC32CUL, 0xBE197029UL, 0x84A00940UL, + 0xBB243A0FUL, 0xB4D137CFUL, 0xB44E79F0UL, 0x049EEDFDUL, + 0x0B15A15DUL, 0x480D3168UL, 0x8BBBDE5AUL, 0x669DED42UL, + 0xC7ECE831UL, 0x3F8F95E7UL, 0x72DF191BUL, 0x7580330DUL, + 0x94074251UL, 0x5C7DCDFAUL, 0xABBE6D63UL, 0xAA402164UL, + 0xB301D40AUL, 0x02E7D1CAUL, 0x53571DAEUL, 0x7A3182A2UL, + 0x12A8DDECUL, 0xFDAA335DUL, 0x176F43E8UL, 0x71FB46D4UL, + 0x38129022UL, 0xCE949AD4UL, 0xB84769ADUL, 0x965BD862UL, + 0x82F3D055UL, 0x66FB9767UL, 0x15B80B4EUL, 0x1D5B47A0UL, + 0x4CFDE06FUL, 0xC28EC4B8UL, 0x57E8726EUL, 0x647A78FCUL, + 0x99865D44UL, 0x608BD593UL, 0x6C200E03UL, 0x39DC5FF6UL, + 0x5D0B00A3UL, 0xAE63AFF2UL, 0x7E8BD632UL, 0x70108C0CUL, + 0xBBD35049UL, 0x2998DF04UL, 0x980CF42AUL, 0x9B6DF491UL, + 0x9E7EDD53UL, 0x06918548UL, 0x58CB7E07UL, 0x3B74EF2EUL, + 0x522FFFB1UL, 0xD24708CCUL, 0x1C7E27CDUL, 0xA4EB215BUL, + 0x3CF1D2E2UL, 0x19B47A38UL, 0x424F7618UL, 0x35856039UL, + 0x9D17DEE7UL, 0x27EB35E6UL, 0xC9AFF67BUL, 0x36BAF5B8UL, + 0x09C467CDUL, 0xC18910B1UL, 0xE11DBF7BUL, 0x06CD1AF8UL, + 0x7170C608UL, 0x2D5E3354UL, 0xD4DE495AUL, 0x64C6D006UL, + 0xBCC0C62CUL, 0x3DD00DB3UL, 0x708F8F34UL, 0x77D51B42UL, + 0x264F620FUL, 0x24B8D2BFUL, 0x15C1B79EUL, 0x46A52564UL, + 0xF8D7E54EUL, 0x3E378160UL, 0x7895CDA5UL, 0x859C15A5UL, + 0xE6459788UL, 0xC37BC75FUL, 0xDB07BA0CUL, 0x0676A3ABUL, + 0x7F229B1EUL, 0x31842E7BUL, 0x24259FD7UL, 0xF8BEF472UL, + 0x835FFCB8UL, 0x6DF4C1F2UL, 0x96F5B195UL, 0xFD0AF0FCUL, + 0xB0FE134CUL, 0xE2506D3DUL, 0x4F9B12EAUL, 0xF215F225UL, + 0xA223736FUL, 0x9FB4C428UL, 0x25D04979UL, 0x34C713F8UL, + 0xC4618187UL, 0xEA7A6E98UL, 0x7CD16EFCUL, 0x1436876CUL, + 0xF1544107UL, 0xBEDEEE14UL, 0x56E9AF27UL, 0xA04AA441UL, + 0x3CF7C899UL, 0x92ECBAE6UL, 0xDD67016DUL, 0x151682EBUL, + 0xA842EEDFUL, 0xFDBA60B4UL, 0xF1907B75UL, 0x20E3030FUL, + 0x24D8C29EUL, 0xE139673BUL, 0xEFA63FB8UL, 0x71873054UL, + 0xB6F2CF3BUL, 0x9F326442UL, 0xCB15A4CCUL, 0xB01A4504UL, + 0xF1E47D8DUL, 0x844A1BE5UL, 0xBAE7DFDCUL, 0x42CBDA70UL, + 0xCD7DAE0AUL, 0x57E85B7AUL, 0xD53F5AF6UL, 0x20CF4D8CUL, + 0xCEA4D428UL, 0x79D130A4UL, 0x3486EBFBUL, 0x33D3CDDCUL, + 0x77853B53UL, 0x37EFFCB5UL, 0xC5068778UL, 0xE580B3E6UL, + 0x4E68B8F4UL, 0xC5C8B37EUL, 0x0D809EA2UL, 0x398FEB7CUL, + 0x132A4F94UL, 0x43B7950EUL, 0x2FEE7D1CUL, 0x223613BDUL, + 0xDD06CAA2UL, 0x37DF932BUL, 0xC4248289UL, 0xACF3EBC3UL, + 0x5715F6B7UL, 0xEF3478DDUL, 0xF267616FUL, 0xC148CBE4UL, + 0x9052815EUL, 0x5E410FABUL, 0xB48A2465UL, 0x2EDA7FA4UL, + 0xE87B40E4UL, 0xE98EA084UL, 0x5889E9E1UL, 0xEFD390FCUL, + 0xDD07D35BUL, 0xDB485694UL, 0x38D7E5B2UL, 0x57720101UL, + 0x730EDEBCUL, 0x5B643113UL, 0x94917E4FUL, 0x503C2FBAUL, + 0x646F1282UL, 0x7523D24AUL, 0xE0779695UL, 0xF9C17A8FUL, + 0x7A5B2121UL, 0xD187B896UL, 0x29263A4DUL, 0xBA510CDFUL, + 0x81F47C9FUL, 0xAD1163EDUL, 0xEA7B5965UL, 0x1A00726EUL, + 0x11403092UL, 0x00DA6D77UL, 0x4A0CDD61UL, 0xAD1F4603UL, + 0x605BDFB0UL, 0x9EEDC364UL, 0x22EBE6A8UL, 0xCEE7D28AUL, + 0xA0E736A0UL, 0x5564A6B9UL, 0x10853209UL, 0xC7EB8F37UL, + 0x2DE705CAUL, 0x8951570FUL, 0xDF09822BUL, 0xBD691A6CUL, + 0xAA12E4F2UL, 0x87451C0FUL, 0xE0F6A27AUL, 0x3ADA4819UL, + 0x4CF1764FUL, 0x0D771C2BUL, 0x67CDB156UL, 0x350D8384UL, + 0x5938FA0FUL, 0x42399EF3UL, 0x36997B07UL, 0x0E84093DUL, + 0x4AA93E61UL, 0x8360D87BUL, 0x1FA98B0CUL, 0x1149382CUL, + 0xE97625A5UL, 0x0614D1B7UL, 0x0E25244BUL, 0x0C768347UL, + 0x589E8D82UL, 0x0D2059D1UL, 0xA466BB1EUL, 0xF8DA0A82UL, + 0x04F19130UL, 0xBA6E4EC0UL, 0x99265164UL, 0x1EE7230DUL, + 0x50B2AD80UL, 0xEAEE6801UL, 0x8DB2A283UL, 0xEA8BF59EUL +}}; + +/* CAST uses three different round functions */ +#define f1(l, r, km, kr) \ + t = rotlVariable(km + r, kr); \ + l ^= ((S[0][U8a(t)] ^ S[1][U8b(t)]) - \ + S[2][U8c(t)]) + S[3][U8d(t)]; +#define f2(l, r, km, kr) \ + t = rotlVariable(km ^ r, kr); \ + l ^= ((S[0][U8a(t)] - S[1][U8b(t)]) + \ + S[2][U8c(t)]) ^ S[3][U8d(t)]; +#define f3(l, r, km, kr) \ + t = rotlVariable(km - r, kr); \ + l ^= ((S[0][U8a(t)] + S[1][U8b(t)]) ^ \ + S[2][U8c(t)]) - S[3][U8d(t)]; + +#define F1(l, r, i, j) f1(l, r, K[i], K[i+j]) +#define F2(l, r, i, j) f2(l, r, K[i], K[i+j]) +#define F3(l, r, i, j) f3(l, r, K[i], K[i+j]) + + +void Cast5Encrypt (const byte *inBlock, byte *outBlock, CAST_KEY *key) +{ + word32 l = BE32 (((word32 *)inBlock)[0]); + word32 r = BE32 (((word32 *)inBlock)[1]); + word32 *K = key->K; + word32 t; + + /* Do the work */ + F1(l, r, 0, 16); + F2(r, l, 1, 16); + F3(l, r, 2, 16); + F1(r, l, 3, 16); + F2(l, r, 4, 16); + F3(r, l, 5, 16); + F1(l, r, 6, 16); + F2(r, l, 7, 16); + F3(l, r, 8, 16); + F1(r, l, 9, 16); + F2(l, r, 10, 16); + F3(r, l, 11, 16); + F1(l, r, 12, 16); + F2(r, l, 13, 16); + F3(l, r, 14, 16); + F1(r, l, 15, 16); + + /* Put l,r into outblock */ + ((word32 *)outBlock)[0] = BE32 (r); + ((word32 *)outBlock)[1] = BE32 (l); +} + + +void Cast5Decrypt (const byte *inBlock, byte *outBlock, CAST_KEY *key) +{ + word32 r = BE32 (((word32 *)inBlock)[0]); + word32 l = BE32 (((word32 *)inBlock)[1]); + word32 *K = key->K; + word32 t; + + /* Only do full 16 rounds if key length > 80 bits */ + F1(r, l, 15, 16); + F3(l, r, 14, 16); + F2(r, l, 13, 16); + F1(l, r, 12, 16); + F3(r, l, 11, 16); + F2(l, r, 10, 16); + F1(r, l, 9, 16); + F3(l, r, 8, 16); + F2(r, l, 7, 16); + F1(l, r, 6, 16); + F3(r, l, 5, 16); + F2(l, r, 4, 16); + F1(r, l, 3, 16); + F3(l, r, 2, 16); + F2(r, l, 1, 16); + F1(l, r, 0, 16); + /* Put l,r into outblock */ + ((word32 *)outBlock)[0] = BE32 (l); + ((word32 *)outBlock)[1] = BE32 (r); + /* Wipe clean */ + t = l = r = 0; +} + +void Cast5SetKey (CAST_KEY *key, unsigned int keylength, const byte *userKey) +{ + unsigned int i; + word32 *K = key->K; + word32 X[4], Z[4]; + + X[0] = BE32 (((word32 *)userKey)[0]); + X[1] = BE32 (((word32 *)userKey)[1]); + X[2] = BE32 (((word32 *)userKey)[2]); + X[3] = BE32 (((word32 *)userKey)[3]); + +#define x(i) GETBYTE(X[i/4], 3-i%4) +#define z(i) GETBYTE(Z[i/4], 3-i%4) + + for (i=0; i<=16; i+=16) + { + // this part is copied directly from RFC 2144 (with some search and replace) by Wei Dai + Z[0] = X[0] ^ S[4][x(0xD)] ^ S[5][x(0xF)] ^ S[6][x(0xC)] ^ S[7][x(0xE)] ^ S[6][x(0x8)]; + Z[1] = X[2] ^ S[4][z(0x0)] ^ S[5][z(0x2)] ^ S[6][z(0x1)] ^ S[7][z(0x3)] ^ S[7][x(0xA)]; + Z[2] = X[3] ^ S[4][z(0x7)] ^ S[5][z(0x6)] ^ S[6][z(0x5)] ^ S[7][z(0x4)] ^ S[4][x(0x9)]; + Z[3] = X[1] ^ S[4][z(0xA)] ^ S[5][z(0x9)] ^ S[6][z(0xB)] ^ S[7][z(0x8)] ^ S[5][x(0xB)]; + K[i+0] = S[4][z(0x8)] ^ S[5][z(0x9)] ^ S[6][z(0x7)] ^ S[7][z(0x6)] ^ S[4][z(0x2)]; + K[i+1] = S[4][z(0xA)] ^ S[5][z(0xB)] ^ S[6][z(0x5)] ^ S[7][z(0x4)] ^ S[5][z(0x6)]; + K[i+2] = S[4][z(0xC)] ^ S[5][z(0xD)] ^ S[6][z(0x3)] ^ S[7][z(0x2)] ^ S[6][z(0x9)]; + K[i+3] = S[4][z(0xE)] ^ S[5][z(0xF)] ^ S[6][z(0x1)] ^ S[7][z(0x0)] ^ S[7][z(0xC)]; + X[0] = Z[2] ^ S[4][z(0x5)] ^ S[5][z(0x7)] ^ S[6][z(0x4)] ^ S[7][z(0x6)] ^ S[6][z(0x0)]; + X[1] = Z[0] ^ S[4][x(0x0)] ^ S[5][x(0x2)] ^ S[6][x(0x1)] ^ S[7][x(0x3)] ^ S[7][z(0x2)]; + X[2] = Z[1] ^ S[4][x(0x7)] ^ S[5][x(0x6)] ^ S[6][x(0x5)] ^ S[7][x(0x4)] ^ S[4][z(0x1)]; + X[3] = Z[3] ^ S[4][x(0xA)] ^ S[5][x(0x9)] ^ S[6][x(0xB)] ^ S[7][x(0x8)] ^ S[5][z(0x3)]; + K[i+4] = S[4][x(0x3)] ^ S[5][x(0x2)] ^ S[6][x(0xC)] ^ S[7][x(0xD)] ^ S[4][x(0x8)]; + K[i+5] = S[4][x(0x1)] ^ S[5][x(0x0)] ^ S[6][x(0xE)] ^ S[7][x(0xF)] ^ S[5][x(0xD)]; + K[i+6] = S[4][x(0x7)] ^ S[5][x(0x6)] ^ S[6][x(0x8)] ^ S[7][x(0x9)] ^ S[6][x(0x3)]; + K[i+7] = S[4][x(0x5)] ^ S[5][x(0x4)] ^ S[6][x(0xA)] ^ S[7][x(0xB)] ^ S[7][x(0x7)]; + Z[0] = X[0] ^ S[4][x(0xD)] ^ S[5][x(0xF)] ^ S[6][x(0xC)] ^ S[7][x(0xE)] ^ S[6][x(0x8)]; + Z[1] = X[2] ^ S[4][z(0x0)] ^ S[5][z(0x2)] ^ S[6][z(0x1)] ^ S[7][z(0x3)] ^ S[7][x(0xA)]; + Z[2] = X[3] ^ S[4][z(0x7)] ^ S[5][z(0x6)] ^ S[6][z(0x5)] ^ S[7][z(0x4)] ^ S[4][x(0x9)]; + Z[3] = X[1] ^ S[4][z(0xA)] ^ S[5][z(0x9)] ^ S[6][z(0xB)] ^ S[7][z(0x8)] ^ S[5][x(0xB)]; + K[i+8] = S[4][z(0x3)] ^ S[5][z(0x2)] ^ S[6][z(0xC)] ^ S[7][z(0xD)] ^ S[4][z(0x9)]; + K[i+9] = S[4][z(0x1)] ^ S[5][z(0x0)] ^ S[6][z(0xE)] ^ S[7][z(0xF)] ^ S[5][z(0xC)]; + K[i+10] = S[4][z(0x7)] ^ S[5][z(0x6)] ^ S[6][z(0x8)] ^ S[7][z(0x9)] ^ S[6][z(0x2)]; + K[i+11] = S[4][z(0x5)] ^ S[5][z(0x4)] ^ S[6][z(0xA)] ^ S[7][z(0xB)] ^ S[7][z(0x6)]; + X[0] = Z[2] ^ S[4][z(0x5)] ^ S[5][z(0x7)] ^ S[6][z(0x4)] ^ S[7][z(0x6)] ^ S[6][z(0x0)]; + X[1] = Z[0] ^ S[4][x(0x0)] ^ S[5][x(0x2)] ^ S[6][x(0x1)] ^ S[7][x(0x3)] ^ S[7][z(0x2)]; + X[2] = Z[1] ^ S[4][x(0x7)] ^ S[5][x(0x6)] ^ S[6][x(0x5)] ^ S[7][x(0x4)] ^ S[4][z(0x1)]; + X[3] = Z[3] ^ S[4][x(0xA)] ^ S[5][x(0x9)] ^ S[6][x(0xB)] ^ S[7][x(0x8)] ^ S[5][z(0x3)]; + K[i+12] = S[4][x(0x8)] ^ S[5][x(0x9)] ^ S[6][x(0x7)] ^ S[7][x(0x6)] ^ S[4][x(0x3)]; + K[i+13] = S[4][x(0xA)] ^ S[5][x(0xB)] ^ S[6][x(0x5)] ^ S[7][x(0x4)] ^ S[5][x(0x7)]; + K[i+14] = S[4][x(0xC)] ^ S[5][x(0xD)] ^ S[6][x(0x3)] ^ S[7][x(0x2)] ^ S[6][x(0x8)]; + K[i+15] = S[4][x(0xE)] ^ S[5][x(0xF)] ^ S[6][x(0x1)] ^ S[7][x(0x0)] ^ S[7][x(0xD)]; + } + + for (i=16; i<32; i++) + K[i] &= 0x1f; +} diff --git a/Crypto/Cast.h b/Crypto/Cast.h new file mode 100644 index 0000000..57c1355 --- /dev/null +++ b/Crypto/Cast.h @@ -0,0 +1,24 @@ +/* Deprecated/legacy */ + + +#ifndef HEADER_CAST_H +#define HEADER_CAST_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct CAST_KEY_STRUCT +{ + unsigned __int32 K[32]; +} CAST_KEY; + +void Cast5Decrypt (const byte *inBlock, byte *outBlock, CAST_KEY *key); +void Cast5Encrypt (const byte *inBlock, byte *outBlock, CAST_KEY *key); +void Cast5SetKey (CAST_KEY *key, unsigned int keylength, const byte *userKey); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Crypto/Crypto.vcproj b/Crypto/Crypto.vcproj new file mode 100644 index 0000000..e1d2e41 --- /dev/null +++ b/Crypto/Crypto.vcproj @@ -0,0 +1,318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Crypto/Des.c b/Crypto/Des.c new file mode 100644 index 0000000..8a3673e --- /dev/null +++ b/Crypto/Des.c @@ -0,0 +1,406 @@ +/* Deprecated/legacy */ + + +// des.cpp - modified by Wei Dai from Phil Karn's des.c +// The original code and all modifications are in the public domain. + +/* + * This is a major rewrite of my old public domain DES code written + * circa 1987, which in turn borrowed heavily from Jim Gillogly's 1977 + * public domain code. I pretty much kept my key scheduling code, but + * the actual encrypt/decrypt routines are taken from from Richard + * Outerbridge's DES code as printed in Schneier's "Applied Cryptography." + * + * This code is in the public domain. I would appreciate bug reports and + * enhancements. + * + * Phil Karn KA9Q, karn@unix.ka9q.ampr.org, August 1994. + */ + +/* Adapted for TrueCrypt */ + +#include +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Des.h" + +#define word32 unsigned __int32 +#define byte unsigned __int8 + +static word32 rotlFixed (word32 x, unsigned int y) +{ + return (word32)((x<>(sizeof(word32)*8-y))); +} + +static word32 rotrFixed (word32 x, unsigned int y) +{ + return (word32)((x>>y) | (x<<(sizeof(word32)*8-y))); +} + + +/* Tables defined in the Data Encryption Standard documents + * Three of these tables, the initial permutation, the final + * permutation and the expansion operator, are regular enough that + * for speed, we hard-code them. They're here for reference only. + * Also, the S and P boxes are used by a separate program, gensp.c, + * to build the combined SP box, Spbox[]. They're also here just + * for reference. + */ +#ifdef notdef +/* initial permutation IP */ +static byte ip[] = { + 58, 50, 42, 34, 26, 18, 10, 2, + 60, 52, 44, 36, 28, 20, 12, 4, + 62, 54, 46, 38, 30, 22, 14, 6, + 64, 56, 48, 40, 32, 24, 16, 8, + 57, 49, 41, 33, 25, 17, 9, 1, + 59, 51, 43, 35, 27, 19, 11, 3, + 61, 53, 45, 37, 29, 21, 13, 5, + 63, 55, 47, 39, 31, 23, 15, 7 +}; + +/* final permutation IP^-1 */ +static byte fp[] = { + 40, 8, 48, 16, 56, 24, 64, 32, + 39, 7, 47, 15, 55, 23, 63, 31, + 38, 6, 46, 14, 54, 22, 62, 30, + 37, 5, 45, 13, 53, 21, 61, 29, + 36, 4, 44, 12, 52, 20, 60, 28, + 35, 3, 43, 11, 51, 19, 59, 27, + 34, 2, 42, 10, 50, 18, 58, 26, + 33, 1, 41, 9, 49, 17, 57, 25 +}; +/* expansion operation matrix */ +static byte ei[] = { + 32, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 8, 9, + 8, 9, 10, 11, 12, 13, + 12, 13, 14, 15, 16, 17, + 16, 17, 18, 19, 20, 21, + 20, 21, 22, 23, 24, 25, + 24, 25, 26, 27, 28, 29, + 28, 29, 30, 31, 32, 1 +}; +/* The (in)famous S-boxes */ +static byte sbox[8][64] = { + /* S1 */ + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13, + + /* S2 */ + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9, + + /* S3 */ + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12, + + /* S4 */ + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14, + + /* S5 */ + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3, + + /* S6 */ + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13, + + /* S7 */ + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12, + + /* S8 */ + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11 +}; + +/* 32-bit permutation function P used on the output of the S-boxes */ +static byte p32i[] = { + 16, 7, 20, 21, + 29, 12, 28, 17, + 1, 15, 23, 26, + 5, 18, 31, 10, + 2, 8, 24, 14, + 32, 27, 3, 9, + 19, 13, 30, 6, + 22, 11, 4, 25 +}; +#endif + +/* permuted choice table (key) */ +static const byte pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 +}; + +/* number left rotations of pc1 */ +static const byte totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 +}; + +/* permuted choice key (table) */ +static const byte pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 +}; + +/* End of DES-defined tables */ + +/* bit 0 is left-most in byte */ +static const int bytebit[] = { + 0200,0100,040,020,010,04,02,01 +}; + +static const word32 Spbox[8][64] = { +{ +0x01010400,0x00000000,0x00010000,0x01010404, 0x01010004,0x00010404,0x00000004,0x00010000, +0x00000400,0x01010400,0x01010404,0x00000400, 0x01000404,0x01010004,0x01000000,0x00000004, +0x00000404,0x01000400,0x01000400,0x00010400, 0x00010400,0x01010000,0x01010000,0x01000404, +0x00010004,0x01000004,0x01000004,0x00010004, 0x00000000,0x00000404,0x00010404,0x01000000, +0x00010000,0x01010404,0x00000004,0x01010000, 0x01010400,0x01000000,0x01000000,0x00000400, +0x01010004,0x00010000,0x00010400,0x01000004, 0x00000400,0x00000004,0x01000404,0x00010404, +0x01010404,0x00010004,0x01010000,0x01000404, 0x01000004,0x00000404,0x00010404,0x01010400, +0x00000404,0x01000400,0x01000400,0x00000000, 0x00010004,0x00010400,0x00000000,0x01010004}, +{ +0x80108020,0x80008000,0x00008000,0x00108020, 0x00100000,0x00000020,0x80100020,0x80008020, +0x80000020,0x80108020,0x80108000,0x80000000, 0x80008000,0x00100000,0x00000020,0x80100020, +0x00108000,0x00100020,0x80008020,0x00000000, 0x80000000,0x00008000,0x00108020,0x80100000, +0x00100020,0x80000020,0x00000000,0x00108000, 0x00008020,0x80108000,0x80100000,0x00008020, +0x00000000,0x00108020,0x80100020,0x00100000, 0x80008020,0x80100000,0x80108000,0x00008000, +0x80100000,0x80008000,0x00000020,0x80108020, 0x00108020,0x00000020,0x00008000,0x80000000, +0x00008020,0x80108000,0x00100000,0x80000020, 0x00100020,0x80008020,0x80000020,0x00100020, +0x00108000,0x00000000,0x80008000,0x00008020, 0x80000000,0x80100020,0x80108020,0x00108000}, +{ +0x00000208,0x08020200,0x00000000,0x08020008, 0x08000200,0x00000000,0x00020208,0x08000200, +0x00020008,0x08000008,0x08000008,0x00020000, 0x08020208,0x00020008,0x08020000,0x00000208, +0x08000000,0x00000008,0x08020200,0x00000200, 0x00020200,0x08020000,0x08020008,0x00020208, +0x08000208,0x00020200,0x00020000,0x08000208, 0x00000008,0x08020208,0x00000200,0x08000000, +0x08020200,0x08000000,0x00020008,0x00000208, 0x00020000,0x08020200,0x08000200,0x00000000, +0x00000200,0x00020008,0x08020208,0x08000200, 0x08000008,0x00000200,0x00000000,0x08020008, +0x08000208,0x00020000,0x08000000,0x08020208, 0x00000008,0x00020208,0x00020200,0x08000008, +0x08020000,0x08000208,0x00000208,0x08020000, 0x00020208,0x00000008,0x08020008,0x00020200}, +{ +0x00802001,0x00002081,0x00002081,0x00000080, 0x00802080,0x00800081,0x00800001,0x00002001, +0x00000000,0x00802000,0x00802000,0x00802081, 0x00000081,0x00000000,0x00800080,0x00800001, +0x00000001,0x00002000,0x00800000,0x00802001, 0x00000080,0x00800000,0x00002001,0x00002080, +0x00800081,0x00000001,0x00002080,0x00800080, 0x00002000,0x00802080,0x00802081,0x00000081, +0x00800080,0x00800001,0x00802000,0x00802081, 0x00000081,0x00000000,0x00000000,0x00802000, +0x00002080,0x00800080,0x00800081,0x00000001, 0x00802001,0x00002081,0x00002081,0x00000080, +0x00802081,0x00000081,0x00000001,0x00002000, 0x00800001,0x00002001,0x00802080,0x00800081, +0x00002001,0x00002080,0x00800000,0x00802001, 0x00000080,0x00800000,0x00002000,0x00802080}, +{ +0x00000100,0x02080100,0x02080000,0x42000100, 0x00080000,0x00000100,0x40000000,0x02080000, +0x40080100,0x00080000,0x02000100,0x40080100, 0x42000100,0x42080000,0x00080100,0x40000000, +0x02000000,0x40080000,0x40080000,0x00000000, 0x40000100,0x42080100,0x42080100,0x02000100, +0x42080000,0x40000100,0x00000000,0x42000000, 0x02080100,0x02000000,0x42000000,0x00080100, +0x00080000,0x42000100,0x00000100,0x02000000, 0x40000000,0x02080000,0x42000100,0x40080100, +0x02000100,0x40000000,0x42080000,0x02080100, 0x40080100,0x00000100,0x02000000,0x42080000, +0x42080100,0x00080100,0x42000000,0x42080100, 0x02080000,0x00000000,0x40080000,0x42000000, +0x00080100,0x02000100,0x40000100,0x00080000, 0x00000000,0x40080000,0x02080100,0x40000100}, +{ +0x20000010,0x20400000,0x00004000,0x20404010, 0x20400000,0x00000010,0x20404010,0x00400000, +0x20004000,0x00404010,0x00400000,0x20000010, 0x00400010,0x20004000,0x20000000,0x00004010, +0x00000000,0x00400010,0x20004010,0x00004000, 0x00404000,0x20004010,0x00000010,0x20400010, +0x20400010,0x00000000,0x00404010,0x20404000, 0x00004010,0x00404000,0x20404000,0x20000000, +0x20004000,0x00000010,0x20400010,0x00404000, 0x20404010,0x00400000,0x00004010,0x20000010, +0x00400000,0x20004000,0x20000000,0x00004010, 0x20000010,0x20404010,0x00404000,0x20400000, +0x00404010,0x20404000,0x00000000,0x20400010, 0x00000010,0x00004000,0x20400000,0x00404010, +0x00004000,0x00400010,0x20004010,0x00000000, 0x20404000,0x20000000,0x00400010,0x20004010}, +{ +0x00200000,0x04200002,0x04000802,0x00000000, 0x00000800,0x04000802,0x00200802,0x04200800, +0x04200802,0x00200000,0x00000000,0x04000002, 0x00000002,0x04000000,0x04200002,0x00000802, +0x04000800,0x00200802,0x00200002,0x04000800, 0x04000002,0x04200000,0x04200800,0x00200002, +0x04200000,0x00000800,0x00000802,0x04200802, 0x00200800,0x00000002,0x04000000,0x00200800, +0x04000000,0x00200800,0x00200000,0x04000802, 0x04000802,0x04200002,0x04200002,0x00000002, +0x00200002,0x04000000,0x04000800,0x00200000, 0x04200800,0x00000802,0x00200802,0x04200800, +0x00000802,0x04000002,0x04200802,0x04200000, 0x00200800,0x00000000,0x00000002,0x04200802, +0x00000000,0x00200802,0x04200000,0x00000800, 0x04000002,0x04000800,0x00000800,0x00200002}, +{ +0x10001040,0x00001000,0x00040000,0x10041040, 0x10000000,0x10001040,0x00000040,0x10000000, +0x00040040,0x10040000,0x10041040,0x00041000, 0x10041000,0x00041040,0x00001000,0x00000040, +0x10040000,0x10000040,0x10001000,0x00001040, 0x00041000,0x00040040,0x10040040,0x10041000, +0x00001040,0x00000000,0x00000000,0x10040040, 0x10000040,0x10001000,0x00041040,0x00040000, +0x00041040,0x00040000,0x10041000,0x00001000, 0x00000040,0x10040040,0x00001000,0x00041040, +0x10001000,0x00000040,0x10000040,0x10040000, 0x10040040,0x10000000,0x00040000,0x10001040, +0x00000000,0x10041040,0x00040040,0x10000040, 0x10040000,0x10001000,0x10001040,0x00000000, +0x10041040,0x00041000,0x00041000,0x00001040, 0x00001040,0x00040040,0x10000000,0x10041000} +}; + +/* Set key (initialize key schedule array) */ +static void RawSetKey (int encryption, const byte *key, word32 *scheduledKey) +{ + byte buffer[56+56+8]; + byte *const pc1m=buffer; /* place to modify pc1 into */ + byte *const pcr=pc1m+56; /* place to rotate pc1 into */ + byte *const ks=pcr+56; + register int i,j,l; + int m; + + for (j=0; j<56; j++) { /* convert pc1 to bits of key */ + l=pc1[j]-1; /* integer bit location */ + m = l & 07; /* find bit */ + pc1m[j]=(key[l>>3] & /* find which key byte l is in */ + bytebit[m]) /* and which bit of that byte */ + ? 1 : 0; /* and store 1-bit result */ + } + for (i=0; i<16; i++) { /* key chunk for each iteration */ + memset(ks,0,8); /* Clear key schedule */ + for (j=0; j<56; j++) /* rotate pc1 the right amount */ + pcr[j] = pc1m[(l=j+totrot[i])<(j<28? 28 : 56) ? l: l-28]; + /* rotate left and right halves independently */ + for (j=0; j<48; j++){ /* select bits individually */ + /* check bit that goes to ks[j] */ + if (pcr[pc2[j]-1]){ + /* mask it in if it's there */ + l= j % 6; + ks[j/6] |= bytebit[l] >> 2; + } + } + /* Now convert to odd/even interleaved form for use in F */ + scheduledKey[2*i] = ((word32)ks[0] << 24) + | ((word32)ks[2] << 16) + | ((word32)ks[4] << 8) + | ((word32)ks[6]); + scheduledKey[2*i+1] = ((word32)ks[1] << 24) + | ((word32)ks[3] << 16) + | ((word32)ks[5] << 8) + | ((word32)ks[7]); + } + + if (!encryption) // reverse key schedule order + for (i=0; i<16; i+=2) + { + word32 b = scheduledKey[i]; + scheduledKey[i] = scheduledKey[32-2-i]; + scheduledKey[32-2-i] = b; + + b = scheduledKey[i+1]; + scheduledKey[i+1] = scheduledKey[32-1-i]; + scheduledKey[32-1-i] = b; + } + + burn (buffer, sizeof (buffer)); +} + +static void RawProcessBlock(word32 *l_, word32 *r_, const word32 *k) +{ + word32 l = *l_, r = *r_; + const word32 *kptr=k; + unsigned i; + + for (i=0; i<8; i++) + { + word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0]; + l ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = r ^ kptr[4*i+1]; + l ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + + work = rotrFixed(l, 4U) ^ kptr[4*i+2]; + r ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = l ^ kptr[4*i+3]; + r ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + } + + *l_ = l; *r_ = r; +} + +void TripleDesSetKey (const byte *userKey, unsigned int length, TDES_KEY *ks) +{ + RawSetKey (1, userKey + 0, ks->k1); + RawSetKey (1, userKey + 8, ks->k2); + RawSetKey (1, userKey + 16, ks->k3); + RawSetKey (0, userKey + 16, ks->k1d); + RawSetKey (0, userKey + 8, ks->k2d); + RawSetKey (0, userKey + 0, ks->k3d); +} + +void TripleDesEncrypt (byte *inBlock, byte *outBlock, TDES_KEY *key, int encrypt) +{ + word32 left = BE32 (((word32 *)inBlock)[0]); + word32 right = BE32 (((word32 *)inBlock)[1]); + word32 work; + + right = rotlFixed(right, 4U); + work = (left ^ right) & 0xf0f0f0f0; + left ^= work; + right = rotrFixed(right^work, 20U); + work = (left ^ right) & 0xffff0000; + left ^= work; + right = rotrFixed(right^work, 18U); + work = (left ^ right) & 0x33333333; + left ^= work; + right = rotrFixed(right^work, 6U); + work = (left ^ right) & 0x00ff00ff; + left ^= work; + right = rotlFixed(right^work, 9U); + work = (left ^ right) & 0xaaaaaaaa; + left = rotlFixed(left^work, 1U); + right ^= work; + + RawProcessBlock (&left, &right, encrypt ? key->k1 : key->k1d); + RawProcessBlock (&right, &left, !encrypt ? key->k2 : key->k2d); + RawProcessBlock (&left, &right, encrypt ? key->k3 : key->k3d); + + right = rotrFixed(right, 1U); + work = (left ^ right) & 0xaaaaaaaa; + right ^= work; + left = rotrFixed(left^work, 9U); + work = (left ^ right) & 0x00ff00ff; + right ^= work; + left = rotlFixed(left^work, 6U); + work = (left ^ right) & 0x33333333; + right ^= work; + left = rotlFixed(left^work, 18U); + work = (left ^ right) & 0xffff0000; + right ^= work; + left = rotlFixed(left^work, 20U); + work = (left ^ right) & 0xf0f0f0f0; + right ^= work; + left = rotrFixed(left^work, 4U); + + ((word32 *)outBlock)[0] = BE32 (right); + ((word32 *)outBlock)[1] = BE32 (left); +} diff --git a/Crypto/Des.h b/Crypto/Des.h new file mode 100644 index 0000000..f3d61e6 --- /dev/null +++ b/Crypto/Des.h @@ -0,0 +1,28 @@ +/* Deprecated/legacy */ + + +#ifndef HEADER_Crypto_DES +#define HEADER_Crypto_DES + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct TRIPLE_DES_KEY_STRUCT +{ + unsigned __int32 k1[32]; + unsigned __int32 k2[32]; + unsigned __int32 k3[32]; + unsigned __int32 k1d[32]; + unsigned __int32 k2d[32]; + unsigned __int32 k3d[32]; +} TDES_KEY; + +void TripleDesEncrypt (byte *inBlock, byte *outBlock, TDES_KEY *key, int encrypt); +void TripleDesSetKey (const byte *userKey, unsigned int length, TDES_KEY *ks); + +#ifdef __cplusplus +} +#endif + +#endif // HEADER_Crypto_DES diff --git a/Crypto/Makefile b/Crypto/Makefile new file mode 100644 index 0000000..5acbbd2 --- /dev/null +++ b/Crypto/Makefile @@ -0,0 +1 @@ +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/Crypto/Makefile.inc b/Crypto/Makefile.inc new file mode 100644 index 0000000..955f2a7 --- /dev/null +++ b/Crypto/Makefile.inc @@ -0,0 +1,15 @@ +TC_ASFLAGS = -Xvc -Ox + +!if "$(TC_ARCH)" == "x86" +TC_ASFLAGS = $(TC_ASFLAGS) -f win32 --prefix _ -D MS_STDCALL -D DLL_EXPORT +!else +TC_ASFLAGS = $(TC_ASFLAGS) -f win64 +!endif + +TC_ASM_ERR_LOG = ..\Driver\build_errors_asm.log + +"$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj": Aes_$(TC_ARCH).asm + nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).lst" Aes_$(TC_ARCH).asm 2>$(TC_ASM_ERR_LOG) + +"$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj": Aes_hw_cpu.asm + nasm.exe $(TC_ASFLAGS) -o "$@" -l "$(OBJ_PATH)\$(O)\Aes_hw_cpu.lst" Aes_hw_cpu.asm 2>$(TC_ASM_ERR_LOG) diff --git a/Crypto/Rmd160.c b/Crypto/Rmd160.c new file mode 100644 index 0000000..aa398ed --- /dev/null +++ b/Crypto/Rmd160.c @@ -0,0 +1,490 @@ +// RIPEMD-160 written and placed in the public domain by Wei Dai + +/* + * This code implements the MD4 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + */ + +/* Adapted for TrueCrypt */ + +#include +#include "Common/Tcdefs.h" +#include "Common/Endian.h" +#include "Rmd160.h" + +#define F(x, y, z) (x ^ y ^ z) +#define G(x, y, z) (z ^ (x & (y^z))) +#define H(x, y, z) (z ^ (x | ~y)) +#define I(x, y, z) (y ^ (z & (x^y))) +#define J(x, y, z) (x ^ (y | ~z)) + +#define PUT_64BIT_LE(cp, value) do { \ + (cp)[7] = (byte) ((value) >> 56); \ + (cp)[6] = (byte) ((value) >> 48); \ + (cp)[5] = (byte) ((value) >> 40); \ + (cp)[4] = (byte) ((value) >> 32); \ + (cp)[3] = (byte) ((value) >> 24); \ + (cp)[2] = (byte) ((value) >> 16); \ + (cp)[1] = (byte) ((value) >> 8); \ + (cp)[0] = (byte) (value); } while (0) + +#define PUT_32BIT_LE(cp, value) do { \ + (cp)[3] = (byte) ((value) >> 24); \ + (cp)[2] = (byte) ((value) >> 16); \ + (cp)[1] = (byte) ((value) >> 8); \ + (cp)[0] = (byte) (value); } while (0) + +#ifndef TC_MINIMIZE_CODE_SIZE + +static byte PADDING[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#else + +static byte PADDING[64]; + +#endif + +void RMD160Init (RMD160_CTX *ctx) +{ + ctx->count = 0; + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xefcdab89; + ctx->state[2] = 0x98badcfe; + ctx->state[3] = 0x10325476; + ctx->state[4] = 0xc3d2e1f0; + PADDING[0] = 0x80; +} + +/* +* Update context to reflect the concatenation of another buffer full +* of bytes. +*/ +void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 lenArg) +{ +#ifndef TC_WINDOWS_BOOT + uint64 len = lenArg, have, need; +#else + uint16 len = (uint16) lenArg, have, need; +#endif + + /* Check how many bytes we already have and how many more we need. */ + have = ((ctx->count >> 3) & (RIPEMD160_BLOCK_LENGTH - 1)); + need = RIPEMD160_BLOCK_LENGTH - have; + + /* Update bitcount */ + ctx->count += len << 3; + + if (len >= need) { + if (have != 0) { + memcpy (ctx->buffer + have, input, (size_t) need); + RMD160Transform ((uint32 *) ctx->state, (const uint32 *) ctx->buffer); + input += need; + len -= need; + have = 0; + } + + /* Process data in RIPEMD160_BLOCK_LENGTH-byte chunks. */ + while (len >= RIPEMD160_BLOCK_LENGTH) { + RMD160Transform ((uint32 *) ctx->state, (const uint32 *) input); + input += RIPEMD160_BLOCK_LENGTH; + len -= RIPEMD160_BLOCK_LENGTH; + } + } + + /* Handle any remaining bytes of data. */ + if (len != 0) + memcpy (ctx->buffer + have, input, (size_t) len); +} + +/* +* Pad pad to 64-byte boundary with the bit pattern +* 1 0* (64-bit count of bits processed, MSB-first) +*/ +static void RMD160Pad(RMD160_CTX *ctx) +{ + byte count[8]; + uint32 padlen; + + /* Convert count to 8 bytes in little endian order. */ + +#ifndef TC_WINDOWS_BOOT + PUT_64BIT_LE(count, ctx->count); +#else + *(uint32 *) (count + 4) = 0; + *(uint16 *) (count + 2) = 0; + *(uint16 *) (count + 0) = ctx->count; +#endif + + /* Pad out to 56 mod 64. */ + padlen = RIPEMD160_BLOCK_LENGTH - + (uint32)((ctx->count >> 3) & (RIPEMD160_BLOCK_LENGTH - 1)); + if (padlen < 1 + 8) + padlen += RIPEMD160_BLOCK_LENGTH; + RMD160Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ + RMD160Update(ctx, count, 8); +} + +/* +* Final wrapup--call RMD160Pad, fill in digest and zero out ctx. +*/ +void RMD160Final(unsigned char *digest, RMD160_CTX *ctx) +{ + int i; + + RMD160Pad(ctx); + if (digest) { + for (i = 0; i < 5; i++) + PUT_32BIT_LE(digest + i * 4, ctx->state[i]); + memset (ctx, 0, sizeof(*ctx)); + } +} + + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define word32 unsigned __int32 + +#define k0 0 +#define k1 0x5a827999UL +#define k2 0x6ed9eba1UL +#define k3 0x8f1bbcdcUL +#define k4 0xa953fd4eUL +#define k5 0x50a28be6UL +#define k6 0x5c4dd124UL +#define k7 0x6d703ef3UL +#define k8 0x7a6d76e9UL +#define k9 0 + +static word32 rotlFixed (word32 x, unsigned int y) +{ + return (word32)((x<>(sizeof(word32)*8-y))); +} + +#define Subround(f, a, b, c, d, e, x, s, k) \ + a += f(b, c, d) + x + k;\ + a = rotlFixed((word32)a, s) + e;\ + c = rotlFixed((word32)c, 10U) + +void RMD160Transform (unsigned __int32 *digest, const unsigned __int32 *data) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + const word32 *X = data; +#else + word32 X[16]; + int i; +#endif + + word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; + a1 = a2 = digest[0]; + b1 = b2 = digest[1]; + c1 = c2 = digest[2]; + d1 = d2 = digest[3]; + e1 = e2 = digest[4]; + +#if BYTE_ORDER == BIG_ENDIAN + for (i = 0; i < 16; i++) + { + X[i] = LE32 (data[i]); + } +#endif + + Subround(F, a1, b1, c1, d1, e1, X[ 0], 11, k0); + Subround(F, e1, a1, b1, c1, d1, X[ 1], 14, k0); + Subround(F, d1, e1, a1, b1, c1, X[ 2], 15, k0); + Subround(F, c1, d1, e1, a1, b1, X[ 3], 12, k0); + Subround(F, b1, c1, d1, e1, a1, X[ 4], 5, k0); + Subround(F, a1, b1, c1, d1, e1, X[ 5], 8, k0); + Subround(F, e1, a1, b1, c1, d1, X[ 6], 7, k0); + Subround(F, d1, e1, a1, b1, c1, X[ 7], 9, k0); + Subround(F, c1, d1, e1, a1, b1, X[ 8], 11, k0); + Subround(F, b1, c1, d1, e1, a1, X[ 9], 13, k0); + Subround(F, a1, b1, c1, d1, e1, X[10], 14, k0); + Subround(F, e1, a1, b1, c1, d1, X[11], 15, k0); + Subround(F, d1, e1, a1, b1, c1, X[12], 6, k0); + Subround(F, c1, d1, e1, a1, b1, X[13], 7, k0); + Subround(F, b1, c1, d1, e1, a1, X[14], 9, k0); + Subround(F, a1, b1, c1, d1, e1, X[15], 8, k0); + + Subround(G, e1, a1, b1, c1, d1, X[ 7], 7, k1); + Subround(G, d1, e1, a1, b1, c1, X[ 4], 6, k1); + Subround(G, c1, d1, e1, a1, b1, X[13], 8, k1); + Subround(G, b1, c1, d1, e1, a1, X[ 1], 13, k1); + Subround(G, a1, b1, c1, d1, e1, X[10], 11, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 6], 9, k1); + Subround(G, d1, e1, a1, b1, c1, X[15], 7, k1); + Subround(G, c1, d1, e1, a1, b1, X[ 3], 15, k1); + Subround(G, b1, c1, d1, e1, a1, X[12], 7, k1); + Subround(G, a1, b1, c1, d1, e1, X[ 0], 12, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 9], 15, k1); + Subround(G, d1, e1, a1, b1, c1, X[ 5], 9, k1); + Subround(G, c1, d1, e1, a1, b1, X[ 2], 11, k1); + Subround(G, b1, c1, d1, e1, a1, X[14], 7, k1); + Subround(G, a1, b1, c1, d1, e1, X[11], 13, k1); + Subround(G, e1, a1, b1, c1, d1, X[ 8], 12, k1); + + Subround(H, d1, e1, a1, b1, c1, X[ 3], 11, k2); + Subround(H, c1, d1, e1, a1, b1, X[10], 13, k2); + Subround(H, b1, c1, d1, e1, a1, X[14], 6, k2); + Subround(H, a1, b1, c1, d1, e1, X[ 4], 7, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 9], 14, k2); + Subround(H, d1, e1, a1, b1, c1, X[15], 9, k2); + Subround(H, c1, d1, e1, a1, b1, X[ 8], 13, k2); + Subround(H, b1, c1, d1, e1, a1, X[ 1], 15, k2); + Subround(H, a1, b1, c1, d1, e1, X[ 2], 14, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 7], 8, k2); + Subround(H, d1, e1, a1, b1, c1, X[ 0], 13, k2); + Subround(H, c1, d1, e1, a1, b1, X[ 6], 6, k2); + Subround(H, b1, c1, d1, e1, a1, X[13], 5, k2); + Subround(H, a1, b1, c1, d1, e1, X[11], 12, k2); + Subround(H, e1, a1, b1, c1, d1, X[ 5], 7, k2); + Subround(H, d1, e1, a1, b1, c1, X[12], 5, k2); + + Subround(I, c1, d1, e1, a1, b1, X[ 1], 11, k3); + Subround(I, b1, c1, d1, e1, a1, X[ 9], 12, k3); + Subround(I, a1, b1, c1, d1, e1, X[11], 14, k3); + Subround(I, e1, a1, b1, c1, d1, X[10], 15, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 0], 14, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 8], 15, k3); + Subround(I, b1, c1, d1, e1, a1, X[12], 9, k3); + Subround(I, a1, b1, c1, d1, e1, X[ 4], 8, k3); + Subround(I, e1, a1, b1, c1, d1, X[13], 9, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 3], 14, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 7], 5, k3); + Subround(I, b1, c1, d1, e1, a1, X[15], 6, k3); + Subround(I, a1, b1, c1, d1, e1, X[14], 8, k3); + Subround(I, e1, a1, b1, c1, d1, X[ 5], 6, k3); + Subround(I, d1, e1, a1, b1, c1, X[ 6], 5, k3); + Subround(I, c1, d1, e1, a1, b1, X[ 2], 12, k3); + + Subround(J, b1, c1, d1, e1, a1, X[ 4], 9, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 0], 15, k4); + Subround(J, e1, a1, b1, c1, d1, X[ 5], 5, k4); + Subround(J, d1, e1, a1, b1, c1, X[ 9], 11, k4); + Subround(J, c1, d1, e1, a1, b1, X[ 7], 6, k4); + Subround(J, b1, c1, d1, e1, a1, X[12], 8, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 2], 13, k4); + Subround(J, e1, a1, b1, c1, d1, X[10], 12, k4); + Subround(J, d1, e1, a1, b1, c1, X[14], 5, k4); + Subround(J, c1, d1, e1, a1, b1, X[ 1], 12, k4); + Subround(J, b1, c1, d1, e1, a1, X[ 3], 13, k4); + Subround(J, a1, b1, c1, d1, e1, X[ 8], 14, k4); + Subround(J, e1, a1, b1, c1, d1, X[11], 11, k4); + Subround(J, d1, e1, a1, b1, c1, X[ 6], 8, k4); + Subround(J, c1, d1, e1, a1, b1, X[15], 5, k4); + Subround(J, b1, c1, d1, e1, a1, X[13], 6, k4); + + Subround(J, a2, b2, c2, d2, e2, X[ 5], 8, k5); + Subround(J, e2, a2, b2, c2, d2, X[14], 9, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 7], 9, k5); + Subround(J, c2, d2, e2, a2, b2, X[ 0], 11, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 9], 13, k5); + Subround(J, a2, b2, c2, d2, e2, X[ 2], 15, k5); + Subround(J, e2, a2, b2, c2, d2, X[11], 15, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 4], 5, k5); + Subround(J, c2, d2, e2, a2, b2, X[13], 7, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 6], 7, k5); + Subround(J, a2, b2, c2, d2, e2, X[15], 8, k5); + Subround(J, e2, a2, b2, c2, d2, X[ 8], 11, k5); + Subround(J, d2, e2, a2, b2, c2, X[ 1], 14, k5); + Subround(J, c2, d2, e2, a2, b2, X[10], 14, k5); + Subround(J, b2, c2, d2, e2, a2, X[ 3], 12, k5); + Subround(J, a2, b2, c2, d2, e2, X[12], 6, k5); + + Subround(I, e2, a2, b2, c2, d2, X[ 6], 9, k6); + Subround(I, d2, e2, a2, b2, c2, X[11], 13, k6); + Subround(I, c2, d2, e2, a2, b2, X[ 3], 15, k6); + Subround(I, b2, c2, d2, e2, a2, X[ 7], 7, k6); + Subround(I, a2, b2, c2, d2, e2, X[ 0], 12, k6); + Subround(I, e2, a2, b2, c2, d2, X[13], 8, k6); + Subround(I, d2, e2, a2, b2, c2, X[ 5], 9, k6); + Subround(I, c2, d2, e2, a2, b2, X[10], 11, k6); + Subround(I, b2, c2, d2, e2, a2, X[14], 7, k6); + Subround(I, a2, b2, c2, d2, e2, X[15], 7, k6); + Subround(I, e2, a2, b2, c2, d2, X[ 8], 12, k6); + Subround(I, d2, e2, a2, b2, c2, X[12], 7, k6); + Subround(I, c2, d2, e2, a2, b2, X[ 4], 6, k6); + Subround(I, b2, c2, d2, e2, a2, X[ 9], 15, k6); + Subround(I, a2, b2, c2, d2, e2, X[ 1], 13, k6); + Subround(I, e2, a2, b2, c2, d2, X[ 2], 11, k6); + + Subround(H, d2, e2, a2, b2, c2, X[15], 9, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 5], 7, k7); + Subround(H, b2, c2, d2, e2, a2, X[ 1], 15, k7); + Subround(H, a2, b2, c2, d2, e2, X[ 3], 11, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 7], 8, k7); + Subround(H, d2, e2, a2, b2, c2, X[14], 6, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 6], 6, k7); + Subround(H, b2, c2, d2, e2, a2, X[ 9], 14, k7); + Subround(H, a2, b2, c2, d2, e2, X[11], 12, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 8], 13, k7); + Subround(H, d2, e2, a2, b2, c2, X[12], 5, k7); + Subround(H, c2, d2, e2, a2, b2, X[ 2], 14, k7); + Subround(H, b2, c2, d2, e2, a2, X[10], 13, k7); + Subround(H, a2, b2, c2, d2, e2, X[ 0], 13, k7); + Subround(H, e2, a2, b2, c2, d2, X[ 4], 7, k7); + Subround(H, d2, e2, a2, b2, c2, X[13], 5, k7); + + Subround(G, c2, d2, e2, a2, b2, X[ 8], 15, k8); + Subround(G, b2, c2, d2, e2, a2, X[ 6], 5, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 4], 8, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 1], 11, k8); + Subround(G, d2, e2, a2, b2, c2, X[ 3], 14, k8); + Subround(G, c2, d2, e2, a2, b2, X[11], 14, k8); + Subround(G, b2, c2, d2, e2, a2, X[15], 6, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 0], 14, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 5], 6, k8); + Subround(G, d2, e2, a2, b2, c2, X[12], 9, k8); + Subround(G, c2, d2, e2, a2, b2, X[ 2], 12, k8); + Subround(G, b2, c2, d2, e2, a2, X[13], 9, k8); + Subround(G, a2, b2, c2, d2, e2, X[ 9], 12, k8); + Subround(G, e2, a2, b2, c2, d2, X[ 7], 5, k8); + Subround(G, d2, e2, a2, b2, c2, X[10], 15, k8); + Subround(G, c2, d2, e2, a2, b2, X[14], 8, k8); + + Subround(F, b2, c2, d2, e2, a2, X[12], 8, k9); + Subround(F, a2, b2, c2, d2, e2, X[15], 5, k9); + Subround(F, e2, a2, b2, c2, d2, X[10], 12, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 4], 9, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 1], 12, k9); + Subround(F, b2, c2, d2, e2, a2, X[ 5], 5, k9); + Subround(F, a2, b2, c2, d2, e2, X[ 8], 14, k9); + Subround(F, e2, a2, b2, c2, d2, X[ 7], 6, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 6], 8, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 2], 13, k9); + Subround(F, b2, c2, d2, e2, a2, X[13], 6, k9); + Subround(F, a2, b2, c2, d2, e2, X[14], 5, k9); + Subround(F, e2, a2, b2, c2, d2, X[ 0], 15, k9); + Subround(F, d2, e2, a2, b2, c2, X[ 3], 13, k9); + Subround(F, c2, d2, e2, a2, b2, X[ 9], 11, k9); + Subround(F, b2, c2, d2, e2, a2, X[11], 11, k9); + + c1 = digest[1] + c1 + d2; + digest[1] = digest[2] + d1 + e2; + digest[2] = digest[3] + e1 + a2; + digest[3] = digest[4] + a1 + b2; + digest[4] = digest[0] + b1 + c2; + digest[0] = c1; +} + +#else // TC_MINIMIZE_CODE_SIZE + +/* + Copyright (c) 2008 TrueCrypt Developers Association. All rights reserved. + + Governed by the TrueCrypt License 3.0 the full text of which is contained in + the file License.txt included in TrueCrypt binary and source code distribution + packages. +*/ + +#pragma optimize ("tl", on) + +typedef unsigned __int32 uint32; +typedef unsigned __int8 byte; + +#include +#pragma intrinsic (_lrotl) + +static const byte OrderTab[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, + 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, + 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, + 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, + 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, + 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, + 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, + 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, + 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 +}; + +static const byte RolTab[] = { + 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, + 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, + 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, + 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, + 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, + 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, + 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, + 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, + 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, + 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 +}; + +static const uint32 KTab[] = { + 0x00000000UL, + 0x5A827999UL, + 0x6ED9EBA1UL, + 0x8F1BBCDCUL, + 0xA953FD4EUL, + 0x50A28BE6UL, + 0x5C4DD124UL, + 0x6D703EF3UL, + 0x7A6D76E9UL, + 0x00000000UL +}; + + +void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data) +{ + uint32 a, b, c, d, e; + uint32 a2, b2, c2, d2, e2; + byte pos; + uint32 tmp; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + for (pos = 0; pos < 160; ++pos) + { + tmp = a + data[OrderTab[pos]] + KTab[pos >> 4]; + + switch (pos >> 4) + { + case 0: case 9: tmp += F (b, c, d); break; + case 1: case 8: tmp += G (b, c, d); break; + case 2: case 7: tmp += H (b, c, d); break; + case 3: case 6: tmp += I (b, c, d); break; + case 4: case 5: tmp += J (b, c, d); break; + } + + tmp = _lrotl (tmp, RolTab[pos]) + e; + a = e; + e = d; + d = _lrotl (c, 10); + c = b; + b = tmp; + + if (pos == 79) + { + a2 = a; + b2 = b; + c2 = c; + d2 = d; + e2 = e; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + } + } + + tmp = state[1] + c2 + d; + state[1] = state[2] + d2 + e; + state[2] = state[3] + e2 + a; + state[3] = state[4] + a2 + b; + state[4] = state[0] + b2 + c; + state[0] = tmp; +} + +#endif // TC_MINIMIZE_CODE_SIZE diff --git a/Crypto/Rmd160.h b/Crypto/Rmd160.h new file mode 100644 index 0000000..8517465 --- /dev/null +++ b/Crypto/Rmd160.h @@ -0,0 +1,33 @@ +#ifndef TC_HEADER_Crypto_Ripemd160 +#define TC_HEADER_Crypto_Ripemd160 + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define RIPEMD160_BLOCK_LENGTH 64 + +typedef struct RMD160Context +{ + unsigned __int32 state[5]; +#ifndef TC_WINDOWS_BOOT + uint64 count; +#else + uint16 count; +#endif + unsigned char buffer[RIPEMD160_BLOCK_LENGTH]; +} RMD160_CTX; + +void RMD160Init (RMD160_CTX *ctx); +void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data); +void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 len); +void RMD160Final (unsigned char *digest, RMD160_CTX *ctx); + +#if defined(__cplusplus) +} +#endif + +#endif // TC_HEADER_Crypto_Ripemd160 diff --git a/Crypto/Serpent.c b/Crypto/Serpent.c new file mode 100644 index 0000000..7085b2b --- /dev/null +++ b/Crypto/Serpent.c @@ -0,0 +1,943 @@ +// serpent.cpp - written and placed in the public domain by Wei Dai + +/* Adapted for TrueCrypt */ + +#ifdef TC_WINDOWS_BOOT +#pragma optimize ("t", on) +#endif + +#include "Serpent.h" +#include "Common/Endian.h" + +#include + +#if defined(_WIN32) && !defined(_DEBUG) +#include +#define rotlFixed _rotl +#define rotrFixed _rotr +#else +#define rotlFixed(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) +#endif + +// linear transformation +#define LT(i,a,b,c,d,e) {\ + a = rotlFixed(a, 13); \ + c = rotlFixed(c, 3); \ + d = rotlFixed(d ^ c ^ (a << 3), 7); \ + b = rotlFixed(b ^ a ^ c, 1); \ + a = rotlFixed(a ^ b ^ d, 5); \ + c = rotlFixed(c ^ d ^ (b << 7), 22);} + +// inverse linear transformation +#define ILT(i,a,b,c,d,e) {\ + c = rotrFixed(c, 22); \ + a = rotrFixed(a, 5); \ + c ^= d ^ (b << 7); \ + a ^= b ^ d; \ + b = rotrFixed(b, 1); \ + d = rotrFixed(d, 7) ^ c ^ (a << 3); \ + b ^= a ^ c; \ + c = rotrFixed(c, 3); \ + a = rotrFixed(a, 13);} + +// order of output from S-box functions +#define beforeS0(f) f(0,a,b,c,d,e) +#define afterS0(f) f(1,b,e,c,a,d) +#define afterS1(f) f(2,c,b,a,e,d) +#define afterS2(f) f(3,a,e,b,d,c) +#define afterS3(f) f(4,e,b,d,c,a) +#define afterS4(f) f(5,b,a,e,c,d) +#define afterS5(f) f(6,a,c,b,e,d) +#define afterS6(f) f(7,a,c,d,b,e) +#define afterS7(f) f(8,d,e,b,a,c) + +// order of output from inverse S-box functions +#define beforeI7(f) f(8,a,b,c,d,e) +#define afterI7(f) f(7,d,a,b,e,c) +#define afterI6(f) f(6,a,b,c,e,d) +#define afterI5(f) f(5,b,d,e,c,a) +#define afterI4(f) f(4,b,c,e,a,d) +#define afterI3(f) f(3,a,b,e,c,d) +#define afterI2(f) f(2,b,d,e,c,a) +#define afterI1(f) f(1,a,b,c,e,d) +#define afterI0(f) f(0,a,d,b,e,c) + +// The instruction sequences for the S-box functions +// come from Dag Arne Osvik's paper "Speeding up Serpent". + +#define S0(i, r0, r1, r2, r3, r4) \ + { \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r4 ^= r2; \ + r1 ^= r0; \ + r0 |= r3; \ + r0 ^= r4; \ + r4 ^= r3; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 ^= r4; \ + r4 = ~r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r1 ^= r4; \ + r3 |= r0; \ + r1 ^= r3; \ + r4 ^= r3; \ + } + +#define I0(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r1; \ + r1 |= r0; \ + r4 = ~r4; \ + r1 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 &= r3; \ + r4 ^= r0; \ + r0 |= r1; \ + r0 ^= r2; \ + r3 ^= r4; \ + r2 ^= r1; \ + r3 ^= r0; \ + r3 ^= r1; \ + r2 &= r3; \ + r4 ^= r2; \ + } + +#define S1(i, r0, r1, r2, r3, r4) \ + { \ + r0 = ~r0; \ + r2 = ~r2; \ + r4 = r0; \ + r0 &= r1; \ + r2 ^= r0; \ + r0 |= r3; \ + r3 ^= r2; \ + r1 ^= r0; \ + r0 ^= r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r2 |= r0; \ + r2 &= r4; \ + r0 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r2; \ + r0 ^= r4; \ + } + +#define I1(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r1; \ + r1 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r3 ^= r0; \ + r0 |= r1; \ + r2 ^= r3; \ + r0 ^= r4; \ + r0 |= r2; \ + r1 ^= r3; \ + r0 ^= r1; \ + r1 |= r3; \ + r1 ^= r0; \ + r4 = ~r4; \ + r4 ^= r1; \ + r1 |= r0; \ + r1 ^= r0; \ + r1 |= r4; \ + r3 ^= r1; \ + } + +#define S2(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 &= r2; \ + r0 ^= r3; \ + r2 ^= r1; \ + r2 ^= r0; \ + r3 |= r4; \ + r3 ^= r1; \ + r4 ^= r2; \ + r1 = r3; \ + r3 |= r4; \ + r3 ^= r0; \ + r0 &= r1; \ + r4 ^= r0; \ + r1 ^= r3; \ + r1 ^= r4; \ + r4 = ~r4; \ + } + +#define I2(i, r0, r1, r2, r3, r4) \ + { \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r3; \ + r3 &= r2; \ + r3 ^= r1; \ + r1 |= r2; \ + r1 ^= r4; \ + r4 &= r3; \ + r2 ^= r3; \ + r4 &= r0; \ + r4 ^= r2; \ + r2 &= r1; \ + r2 |= r0; \ + r3 = ~r3; \ + r2 ^= r3; \ + r0 ^= r3; \ + r0 &= r1; \ + r3 ^= r4; \ + r3 ^= r0; \ + } + +#define S3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 |= r3; \ + r3 ^= r1; \ + r1 &= r4; \ + r4 ^= r2; \ + r2 ^= r3; \ + r3 &= r0; \ + r4 |= r1; \ + r3 ^= r4; \ + r0 ^= r1; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r2; \ + r1 |= r0; \ + r1 ^= r2; \ + r0 ^= r3; \ + r2 = r1; \ + r1 |= r3; \ + r1 ^= r0; \ + } + +#define I3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r4; \ + r4 ^= r3; \ + r3 |= r1; \ + r3 ^= r2; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 |= r3; \ + r0 ^= r1; \ + r4 ^= r2; \ + r2 &= r3; \ + r1 |= r3; \ + r1 ^= r2; \ + r4 ^= r0; \ + r2 ^= r4; \ + } + +#define S4(i, r0, r1, r2, r3, r4) \ + { \ + r1 ^= r3; \ + r3 = ~r3; \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r1 ^= r2; \ + r4 ^= r3; \ + r0 ^= r4; \ + r2 &= r4; \ + r2 ^= r0; \ + r0 &= r1; \ + r3 ^= r0; \ + r4 |= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r2 &= r3; \ + r0 = ~r0; \ + r4 ^= r2; \ + } + +#define I4(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r3; \ + r2 ^= r1; \ + r1 |= r3; \ + r1 &= r0; \ + r4 ^= r2; \ + r4 ^= r1; \ + r1 &= r2; \ + r0 = ~r0; \ + r3 ^= r4; \ + r1 ^= r3; \ + r3 &= r0; \ + r3 ^= r2; \ + r0 ^= r1; \ + r2 &= r0; \ + r3 ^= r0; \ + r2 ^= r4; \ + r2 |= r3; \ + r3 ^= r0; \ + r2 ^= r1; \ + } + +#define S5(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r1; \ + r1 ^= r3; \ + r3 = ~r3; \ + r4 = r1; \ + r1 &= r0; \ + r2 ^= r3; \ + r1 ^= r2; \ + r2 |= r4; \ + r4 ^= r3; \ + r3 &= r1; \ + r3 ^= r0; \ + r4 ^= r1; \ + r4 ^= r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r0 ^= r4; \ + r4 |= r3; \ + r2 ^= r4; \ + } + +#define I5(i, r0, r1, r2, r3, r4) \ + { \ + r1 = ~r1; \ + r4 = r3; \ + r2 ^= r1; \ + r3 |= r0; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 ^= r4; \ + r4 |= r0; \ + r4 ^= r1; \ + r1 &= r2; \ + r1 ^= r3; \ + r4 ^= r2; \ + r3 &= r4; \ + r4 ^= r1; \ + r3 ^= r0; \ + r3 ^= r4; \ + r4 = ~r4; \ + } + +#define S6(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r3; \ + r3 &= r0; \ + r0 ^= r4; \ + r3 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r2 ^= r0; \ + r0 |= r1; \ + r2 ^= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r4 ^= r3; \ + r4 ^= r0; \ + r3 = ~r3; \ + r2 &= r4; \ + r2 ^= r3; \ + } + +#define I6(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r2; \ + r4 = r2; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 = ~r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r4 |= r0; \ + r0 ^= r2; \ + r3 ^= r4; \ + r4 ^= r1; \ + r1 &= r3; \ + r1 ^= r0; \ + r0 ^= r3; \ + r0 |= r2; \ + r3 ^= r1; \ + r4 ^= r0; \ + } + +#define S7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r1; \ + r2 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r2 ^= r1; \ + r1 ^= r0; \ + r0 |= r4; \ + r0 ^= r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r3 &= r0; \ + r3 ^= r4; \ + r4 ^= r2; \ + r2 &= r0; \ + r4 = ~r4; \ + r2 ^= r4; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r1; \ + } + +#define I7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r4 |= r3; \ + r3 ^= r1; \ + r1 |= r0; \ + r0 ^= r2; \ + r2 &= r4; \ + r1 ^= r2; \ + r2 ^= r0; \ + r0 |= r2; \ + r3 &= r4; \ + r0 ^= r3; \ + r4 ^= r1; \ + r3 ^= r4; \ + r4 |= r0; \ + r3 ^= r2; \ + r4 ^= r2; \ + } + +// key xor +#define KX(r, a, b, c, d, e) {\ + a ^= k[4 * r + 0]; \ + b ^= k[4 * r + 1]; \ + c ^= k[4 * r + 2]; \ + d ^= k[4 * r + 3];} + + +#ifdef TC_MINIMIZE_CODE_SIZE + +static void S0f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r4 ^= *r2; + *r1 ^= *r0; + *r0 |= *r3; + *r0 ^= *r4; + *r4 ^= *r3; + *r3 ^= *r2; + *r2 |= *r1; + *r2 ^= *r4; + *r4 = ~*r4; + *r4 |= *r1; + *r1 ^= *r3; + *r1 ^= *r4; + *r3 |= *r0; + *r1 ^= *r3; + *r4 ^= *r3; +} + +static void S1f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 = ~*r0; + *r2 = ~*r2; + *r4 = *r0; + *r0 &= *r1; + *r2 ^= *r0; + *r0 |= *r3; + *r3 ^= *r2; + *r1 ^= *r0; + *r0 ^= *r4; + *r4 |= *r1; + *r1 ^= *r3; + *r2 |= *r0; + *r2 &= *r4; + *r0 ^= *r1; + *r1 &= *r2; + *r1 ^= *r0; + *r0 &= *r2; + *r0 ^= *r4; +} + +static void S2f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 &= *r2; + *r0 ^= *r3; + *r2 ^= *r1; + *r2 ^= *r0; + *r3 |= *r4; + *r3 ^= *r1; + *r4 ^= *r2; + *r1 = *r3; + *r3 |= *r4; + *r3 ^= *r0; + *r0 &= *r1; + *r4 ^= *r0; + *r1 ^= *r3; + *r1 ^= *r4; + *r4 = ~*r4; +} + +static void S3f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 |= *r3; + *r3 ^= *r1; + *r1 &= *r4; + *r4 ^= *r2; + *r2 ^= *r3; + *r3 &= *r0; + *r4 |= *r1; + *r3 ^= *r4; + *r0 ^= *r1; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r2; + *r1 |= *r0; + *r1 ^= *r2; + *r0 ^= *r3; + *r2 = *r1; + *r1 |= *r3; + *r1 ^= *r0; +} + +static void S4f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r1 ^= *r3; + *r3 = ~*r3; + *r2 ^= *r3; + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r1 ^= *r2; + *r4 ^= *r3; + *r0 ^= *r4; + *r2 &= *r4; + *r2 ^= *r0; + *r0 &= *r1; + *r3 ^= *r0; + *r4 |= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r2 &= *r3; + *r0 = ~*r0; + *r4 ^= *r2; +} + +static void S5f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 ^= *r1; + *r1 ^= *r3; + *r3 = ~*r3; + *r4 = *r1; + *r1 &= *r0; + *r2 ^= *r3; + *r1 ^= *r2; + *r2 |= *r4; + *r4 ^= *r3; + *r3 &= *r1; + *r3 ^= *r0; + *r4 ^= *r1; + *r4 ^= *r2; + *r2 ^= *r0; + *r0 &= *r3; + *r2 = ~*r2; + *r0 ^= *r4; + *r4 |= *r3; + *r2 ^= *r4; +} + +static void S6f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r2 = ~*r2; + *r4 = *r3; + *r3 &= *r0; + *r0 ^= *r4; + *r3 ^= *r2; + *r2 |= *r4; + *r1 ^= *r3; + *r2 ^= *r0; + *r0 |= *r1; + *r2 ^= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r4 ^= *r3; + *r4 ^= *r0; + *r3 = ~*r3; + *r2 &= *r4; + *r2 ^= *r3; +} + +static void S7f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r2; + *r2 &= *r1; + *r2 ^= *r3; + *r3 &= *r1; + *r4 ^= *r2; + *r2 ^= *r1; + *r1 ^= *r0; + *r0 |= *r4; + *r0 ^= *r2; + *r3 ^= *r1; + *r2 ^= *r3; + *r3 &= *r0; + *r3 ^= *r4; + *r4 ^= *r2; + *r2 &= *r0; + *r4 = ~*r4; + *r2 ^= *r4; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r1; +} + +static void KXf (const unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a ^= k[r]; + *b ^= k[r + 1]; + *c ^= k[r + 2]; + *d ^= k[r + 3]; +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_set_key(const unsigned __int8 userKey[], int keylen, unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < keylen / (int)sizeof(__int32); i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + if (keylen < 32) + k[keylen/4] |= (unsigned __int32)1 << ((keylen%4)*8); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + +#define LK(r, a, b, c, d, e) {\ + a = k[(8-r)*4 + 0]; \ + b = k[(8-r)*4 + 1]; \ + c = k[(8-r)*4 + 2]; \ + d = k[(8-r)*4 + 3];} + +#define SK(r, a, b, c, d, e) {\ + k[(8-r)*4 + 4] = a; \ + k[(8-r)*4 + 5] = b; \ + k[(8-r)*4 + 6] = c; \ + k[(8-r)*4 + 7] = d;} \ + + for (i=0; i<4; i++) + { + afterS2(LK); afterS2(S3); afterS3(SK); + afterS1(LK); afterS1(S2); afterS2(SK); + afterS0(LK); afterS0(S1); afterS1(SK); + beforeS0(LK); beforeS0(S0); afterS0(SK); + k += 8*4; + afterS6(LK); afterS6(S7); afterS7(SK); + afterS5(LK); afterS5(S6); afterS6(SK); + afterS4(LK); afterS4(S5); afterS5(SK); + afterS3(LK); afterS3(S4); afterS4(SK); + } + afterS2(LK); afterS2(S3); afterS3(SK); +} + +#else // TC_MINIMIZE_CODE_SIZE + +static void LKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a = k[r]; + *b = k[r + 1]; + *c = k[r + 2]; + *d = k[r + 3]; +} + +static void SKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + k[r + 4] = *a; + k[r + 5] = *b; + k[r + 6] = *c; + k[r + 7] = *d; +} + +void serpent_set_key(const unsigned __int8 userKey[], int keylen, unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < keylen / (int)sizeof(__int32); i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + if (keylen < 32) + k[keylen/4] |= (unsigned __int32)1 << ((keylen%4)*8); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + + for (i=0; i<4; i++) + { + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); + LKf (k, 24, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); SKf (k, 20, &a, &e, &b, &d); + LKf (k, 28, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); SKf (k, 24, &c, &b, &a, &e); + LKf (k, 32, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); SKf (k, 28, &b, &e, &c, &a); + k += 8*4; + LKf (k, 4, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); SKf (k, 0, &d, &e, &b, &a); + LKf (k, 8, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); SKf (k, 4, &a, &c, &d, &b); + LKf (k, 12, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); SKf (k, 8, &a, &c, &b, &e); + LKf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); SKf (k, 12, &b, &a, &e, &c); + } + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); +} + +#endif // TC_MINIMIZE_CODE_SIZE + + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + beforeS0(KX); beforeS0(S0); afterS0(LT); + afterS0(KX); afterS0(S1); afterS1(LT); + afterS1(KX); afterS1(S2); afterS2(LT); + afterS2(KX); afterS2(S3); afterS3(LT); + afterS3(KX); afterS3(S4); afterS4(LT); + afterS4(KX); afterS4(S5); afterS5(LT); + afterS5(KX); afterS5(S6); afterS6(LT); + afterS6(KX); afterS6(S7); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + beforeS0(LT); + } + while (1); + + afterS7(KX); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#else // TC_MINIMIZE_CODE_SIZE + +typedef unsigned __int32 uint32; + +static void LTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *a = rotlFixed(*a, 13); + *c = rotlFixed(*c, 3); + *d = rotlFixed(*d ^ *c ^ (*a << 3), 7); + *b = rotlFixed(*b ^ *a ^ *c, 1); + *a = rotlFixed(*a ^ *b ^ *d, 5); + *c = rotlFixed(*c ^ *d ^ (*b << 7), 22); +} + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + KXf (k, 0, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); LTf (&b, &e, &c, &a); + KXf (k, 4, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); LTf (&c, &b, &a, &e); + KXf (k, 8, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); LTf (&a, &e, &b, &d); + KXf (k, 12, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); LTf (&e, &b, &d, &c); + KXf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); LTf (&b, &a, &e, &c); + KXf (k, 20, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); LTf (&a, &c, &b, &e); + KXf (k, 24, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); LTf (&a, &c, &d, &b); + KXf (k, 28, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + LTf (&a,&b,&c,&d); + } + while (1); + + KXf (k, 32, &d, &e, &b, &a); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_SERPENT) + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + beforeI7(KX); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); afterI7(KX); + afterI7(ILT); afterI7(I6); afterI6(KX); + afterI6(ILT); afterI6(I5); afterI5(KX); + afterI5(ILT); afterI5(I4); afterI4(KX); + afterI4(ILT); afterI4(I3); afterI3(KX); + afterI3(ILT); afterI3(I2); afterI2(KX); + afterI2(ILT); afterI2(I1); afterI1(KX); + afterI1(ILT); afterI1(I0); afterI0(KX); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#else // TC_MINIMIZE_CODE_SIZE && !TC_WINDOWS_BOOT_SERPENT + +static void ILTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *c = rotrFixed(*c, 22); + *a = rotrFixed(*a, 5); + *c ^= *d ^ (*b << 7); + *a ^= *b ^ *d; + *b = rotrFixed(*b, 1); + *d = rotrFixed(*d, 7) ^ *c ^ (*a << 3); + *b ^= *a ^ *c; + *c = rotrFixed(*c, 3); + *a = rotrFixed(*a, 13); +} + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + KXf (k, 32, &a, &b, &c, &d); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); KXf (k, 28, &d, &a, &b, &e); + ILTf (&d, &a, &b, &e); afterI7(I6); KXf (k, 24, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI6(I5); KXf (k, 20, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI5(I4); KXf (k, 16, &b, &c, &e, &a); + ILTf (&b, &c, &e, &a); afterI4(I3); KXf (k, 12, &a, &b, &e, &c); + ILTf (&a, &b, &e, &c); afterI3(I2); KXf (k, 8, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI2(I1); KXf (k, 4, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI1(I0); KXf (k, 0, &a, &d, &b, &e); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#endif // TC_MINIMIZE_CODE_SIZE && !TC_WINDOWS_BOOT_SERPENT diff --git a/Crypto/Serpent.h b/Crypto/Serpent.h new file mode 100644 index 0000000..47d35f8 --- /dev/null +++ b/Crypto/Serpent.h @@ -0,0 +1,19 @@ +#ifndef HEADER_Crypto_Serpent +#define HEADER_Crypto_Serpent + +#include "Common/Tcdefs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +void serpent_set_key(const unsigned __int8 userKey[], int keylen, unsigned __int8 *ks); +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks); + +#ifdef __cplusplus +} +#endif + +#endif // HEADER_Crypto_Serpent diff --git a/Crypto/Sha1.c b/Crypto/Sha1.c new file mode 100644 index 0000000..a0eca9a --- /dev/null +++ b/Crypto/Sha1.c @@ -0,0 +1,282 @@ +/* Deprecated/legacy */ + +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 18/06/2004 + + This is a byte oriented version of SHA1 that operates on arrays of bytes + stored in memory. +*/ + +/* Adapted for TrueCrypt */ + +#include /* for memcpy() etc. */ +#include /* for _lrotl with VC++ */ + +#include "Sha1.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* + To obtain the highest speed on processors with 32-bit words, this code + needs to determine the order in which bytes are packed into such words. + The following block of code is an attempt to capture the most obvious + ways in which various environemnts specify their endian definitions. + It may well fail, in which case the definitions will need to be set by + editing at the points marked **** EDIT HERE IF NECESSARY **** below. +*/ + +/* PLATFORM SPECIFIC INCLUDES */ + +/* Original byte order detection removed */ +#include "../Common/Endian.h" + +#define BRG_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ +#define BRG_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ + +#if BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +#endif + +#if BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +#endif + +#ifdef _MSC_VER +#pragma intrinsic(memcpy) +#endif + +#if 1 && defined(_MSC_VER) && !defined(_DEBUG) +#define rotl32 _rotl +#define rotr32 _rotr +#else +#define rotl32(x,n) (((x) << n) | ((x) >> (32 - n))) +#define rotr32(x,n) (((x) >> n) | ((x) << (32 - n))) +#endif + +#if !defined(bswap_32) +#define bswap_32(x) ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00)) +#endif + +#if (PLATFORM_BYTE_ORDER == BRG_LITTLE_ENDIAN) +#define SWAP_BYTES +#else +#undef SWAP_BYTES +#endif + +#if defined(SWAP_BYTES) +#define bsw_32(p,n) \ + { int _i = (n); while(_i--) ((sha1_32t*)p)[_i] = bswap_32(((sha1_32t*)p)[_i]); } +#else +#define bsw_32(p,n) +#endif + +#define SHA1_MASK (SHA1_BLOCK_SIZE - 1) + +#if 0 + +#define ch(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define parity(x,y,z) ((x) ^ (y) ^ (z)) +#define maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#else /* Discovered by Rich Schroeppel and Colin Plumb */ + +#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define parity(x,y,z) ((x) ^ (y) ^ (z)) +#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) + +#endif + +/* Compile 64 bytes of hash data into SHA1 context. Note */ +/* that this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is in such an order that low */ +/* address bytes in the ORIGINAL byte stream will go in */ +/* this buffer to the high end of 32-bit words on BOTH big */ +/* and little endian systems */ + +#ifdef ARRAY +#define q(v,n) v[n] +#else +#define q(v,n) v##n +#endif + +#define one_cycle(v,a,b,c,d,e,f,k,h) \ + q(v,e) += rotr32(q(v,a),27) + \ + f(q(v,b),q(v,c),q(v,d)) + k + h; \ + q(v,b) = rotr32(q(v,b), 2) + +#define five_cycle(v,f,k,i) \ + one_cycle(v, 0,1,2,3,4, f,k,hf(i )); \ + one_cycle(v, 4,0,1,2,3, f,k,hf(i+1)); \ + one_cycle(v, 3,4,0,1,2, f,k,hf(i+2)); \ + one_cycle(v, 2,3,4,0,1, f,k,hf(i+3)); \ + one_cycle(v, 1,2,3,4,0, f,k,hf(i+4)) + +void sha1_compile(sha1_ctx ctx[1]) +{ sha1_32t *w = ctx->wbuf; + +#ifdef ARRAY + sha1_32t v[5]; + memcpy(v, ctx->hash, 5 * sizeof(sha1_32t)); +#else + sha1_32t v0, v1, v2, v3, v4; + v0 = ctx->hash[0]; v1 = ctx->hash[1]; + v2 = ctx->hash[2]; v3 = ctx->hash[3]; + v4 = ctx->hash[4]; +#endif + +#define hf(i) w[i] + + five_cycle(v, ch, 0x5a827999, 0); + five_cycle(v, ch, 0x5a827999, 5); + five_cycle(v, ch, 0x5a827999, 10); + one_cycle(v,0,1,2,3,4, ch, 0x5a827999, hf(15)); \ + +#undef hf +#define hf(i) (w[(i) & 15] = rotl32( \ + w[((i) + 13) & 15] ^ w[((i) + 8) & 15] \ + ^ w[((i) + 2) & 15] ^ w[(i) & 15], 1)) + + one_cycle(v,4,0,1,2,3, ch, 0x5a827999, hf(16)); + one_cycle(v,3,4,0,1,2, ch, 0x5a827999, hf(17)); + one_cycle(v,2,3,4,0,1, ch, 0x5a827999, hf(18)); + one_cycle(v,1,2,3,4,0, ch, 0x5a827999, hf(19)); + + five_cycle(v, parity, 0x6ed9eba1, 20); + five_cycle(v, parity, 0x6ed9eba1, 25); + five_cycle(v, parity, 0x6ed9eba1, 30); + five_cycle(v, parity, 0x6ed9eba1, 35); + + five_cycle(v, maj, 0x8f1bbcdc, 40); + five_cycle(v, maj, 0x8f1bbcdc, 45); + five_cycle(v, maj, 0x8f1bbcdc, 50); + five_cycle(v, maj, 0x8f1bbcdc, 55); + + five_cycle(v, parity, 0xca62c1d6, 60); + five_cycle(v, parity, 0xca62c1d6, 65); + five_cycle(v, parity, 0xca62c1d6, 70); + five_cycle(v, parity, 0xca62c1d6, 75); + +#ifdef ARRAY + ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; + ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; + ctx->hash[4] += v[4]; +#else + ctx->hash[0] += v0; ctx->hash[1] += v1; + ctx->hash[2] += v2; ctx->hash[3] += v3; + ctx->hash[4] += v4; +#endif +} + +void sha1_begin(sha1_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + ctx->hash[0] = 0x67452301; + ctx->hash[1] = 0xefcdab89; + ctx->hash[2] = 0x98badcfe; + ctx->hash[3] = 0x10325476; + ctx->hash[4] = 0xc3d2e1f0; +} + +/* SHA1 hash data in an array of bytes into hash buffer and */ +/* call the hash_compile function as required. */ + +void sha1_hash(const unsigned char data[], unsigned __int32 len, sha1_ctx ctx[1]) +{ sha1_32t pos = (sha1_32t)(ctx->count[0] & SHA1_MASK), + space = SHA1_BLOCK_SIZE - pos; + const unsigned char *sp = data; + + if((ctx->count[0] += len) < len) + ++(ctx->count[1]); + + while(len >= space) /* tranfer whole blocks if possible */ + { + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); + sp += space; len -= space; space = SHA1_BLOCK_SIZE; pos = 0; + bsw_32(ctx->wbuf, SHA1_BLOCK_SIZE >> 2); + sha1_compile(ctx); + } + + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); +} + +/* SHA1 final padding and digest calculation */ + +void sha1_end(unsigned char hval[], sha1_ctx ctx[1]) +{ sha1_32t i = (sha1_32t)(ctx->count[0] & SHA1_MASK); + + /* put bytes in the buffer in an order in which references to */ + /* 32-bit words will put bytes with lower addresses into the */ + /* top of 32 bit words on BOTH big and little endian machines */ + bsw_32(ctx->wbuf, (i + 3) >> 2); + + /* we now need to mask valid bytes and add the padding which is */ + /* a single 1 bit and as many zero bits as necessary. Note that */ + /* we can always add the first padding byte here because the */ + /* buffer always has at least one empty slot */ + ctx->wbuf[i >> 2] &= 0xffffff80 << 8 * (~i & 3); + ctx->wbuf[i >> 2] |= 0x00000080 << 8 * (~i & 3); + + /* we need 9 or more empty positions, one for the padding byte */ + /* (above) and eight for the length count. If there is not */ + /* enough space, pad and empty the buffer */ + if(i > SHA1_BLOCK_SIZE - 9) + { + if(i < 60) ctx->wbuf[15] = 0; + sha1_compile(ctx); + i = 0; + } + else /* compute a word index for the empty buffer positions */ + i = (i >> 2) + 1; + + while(i < 14) /* and zero pad all but last two positions */ + ctx->wbuf[i++] = 0; + + /* the following 32-bit length fields are assembled in the */ + /* wrong byte order on little endian machines but this is */ + /* corrected later since they are only ever used as 32-bit */ + /* word values. */ + ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 29); + ctx->wbuf[15] = ctx->count[0] << 3; + sha1_compile(ctx); + + /* extract the hash value as bytes in case the hash buffer is */ + /* misaligned for 32-bit words */ + for(i = 0; i < SHA1_DIGEST_SIZE; ++i) + hval[i] = (unsigned char)(ctx->hash[i >> 2] >> (8 * (~i & 3))); +} + +void sha1(unsigned char hval[], const unsigned char data[], unsigned __int32 len) +{ sha1_ctx cx[1]; + + sha1_begin(cx); sha1_hash(data, len, cx); sha1_end(hval, cx); +} + +#if defined(__cplusplus) +} +#endif diff --git a/Crypto/Sha1.h b/Crypto/Sha1.h new file mode 100644 index 0000000..34a839c --- /dev/null +++ b/Crypto/Sha1.h @@ -0,0 +1,80 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 26/08/2003 +*/ + +#ifndef _SHA1_H +#define _SHA1_H + +#include +#include "Common/Tcdefs.h" + +#define SHA1_BLOCK_SIZE 64 +#define SHA1_DIGEST_SIZE 20 + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* define an unsigned 32-bit type */ + +#if defined(_MSC_VER) + typedef unsigned __int32 sha1_32t; +#elif defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful + typedef unsigned __int32 sha1_32t; +#elif defined(UINT_MAX) && UINT_MAX == 0xffffffff + typedef unsigned int sha1_32t; +#else +# error Please define sha1_32t as an unsigned 32 bit type in sha1.h +#endif + +/* type to hold the SHA256 context */ + +typedef struct +{ sha1_32t count[2]; + sha1_32t hash[5]; + sha1_32t wbuf[16]; +} sha1_ctx; + +/* Note that these prototypes are the same for both bit and */ +/* byte oriented implementations. However the length fields */ +/* are in bytes or bits as appropriate for the version used */ +/* and bit sequences are input as arrays of bytes in which */ +/* bit sequences run from the most to the least significant */ +/* end of each byte */ + +void sha1_compile(sha1_ctx ctx[1]); + +void sha1_begin(sha1_ctx ctx[1]); +void sha1_hash(const unsigned char data[], unsigned __int32 len, sha1_ctx ctx[1]); +void sha1_end(unsigned char hval[], sha1_ctx ctx[1]); +void sha1(unsigned char hval[], const unsigned char data[], unsigned __int32 len); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Crypto/Sha2.c b/Crypto/Sha2.c new file mode 100644 index 0000000..9ae8c1c --- /dev/null +++ b/Crypto/Sha2.c @@ -0,0 +1,770 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 01/08/2005 + + This is a byte oriented version of SHA2 that operates on arrays of bytes + stored in memory. This code implements sha256, sha384 and sha512 but the + latter two functions rely on efficient 64-bit integer operations that + may not be very efficient on 32-bit machines + + The sha256 functions use a type 'sha256_ctx' to hold details of the + current hash state and uses the following three calls: + + void sha256_begin(sha256_ctx ctx[1]) + void sha256_hash(const unsigned char data[], + unsigned long len, sha256_ctx ctx[1]) + void sha_end1(unsigned char hval[], sha256_ctx ctx[1]) + + The first subroutine initialises a hash computation by setting up the + context in the sha256_ctx context. The second subroutine hashes 8-bit + bytes from array data[] into the hash state withinh sha256_ctx context, + the number of bytes to be hashed being given by the the unsigned long + integer len. The third subroutine completes the hash calculation and + places the resulting digest value in the array of 8-bit bytes hval[]. + + The sha384 and sha512 functions are similar and use the interfaces: + + void sha384_begin(sha384_ctx ctx[1]); + void sha384_hash(const unsigned char data[], + unsigned long len, sha384_ctx ctx[1]); + void sha384_end(unsigned char hval[], sha384_ctx ctx[1]); + + void sha512_begin(sha512_ctx ctx[1]); + void sha512_hash(const unsigned char data[], + unsigned long len, sha512_ctx ctx[1]); + void sha512_end(unsigned char hval[], sha512_ctx ctx[1]); + + In addition there is a function sha2 that can be used to call all these + functions using a call with a hash length parameter as follows: + + int sha2_begin(unsigned long len, sha2_ctx ctx[1]); + void sha2_hash(const unsigned char data[], + unsigned long len, sha2_ctx ctx[1]); + void sha2_end(unsigned char hval[], sha2_ctx ctx[1]); + + My thanks to Erik Andersen for testing this code + on big-endian systems and for his assistance with corrections +*/ + +#include "Common/Endian.h" +#define PLATFORM_BYTE_ORDER BYTE_ORDER +#define IS_LITTLE_ENDIAN LITTLE_ENDIAN + +#if 0 +#define UNROLL_SHA2 /* for SHA2 loop unroll */ +#endif + +#include /* for memcpy() etc. */ + +#include "Sha2.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined( _MSC_VER ) && ( _MSC_VER > 800 ) +#pragma intrinsic(memcpy) +#endif + +#if 0 && defined(_MSC_VER) +#define rotl32 _lrotl +#define rotr32 _lrotr +#else +#define rotl32(x,n) (((x) << n) | ((x) >> (32 - n))) +#define rotr32(x,n) (((x) >> n) | ((x) << (32 - n))) +#endif + +#if !defined(bswap_32) +#define bswap_32(x) ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00)) +#endif + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define SWAP_BYTES +#else +#undef SWAP_BYTES +#endif + +#if 0 + +#define ch(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#else /* Thanks to Rich Schroeppel and Colin Plumb for the following */ + +#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) + +#endif + +/* round transforms for SHA256 and SHA512 compression functions */ + +#define vf(n,i) v[(n - i) & 7] + +#define hf(i) (p[i & 15] += \ + g_1(p[(i + 14) & 15]) + p[(i + 9) & 15] + g_0(p[(i + 1) & 15])) + +#define v_cycle(i,j) \ + vf(7,i) += (j ? hf(i) : p[i]) + k_0[i+j] \ + + s_1(vf(4,i)) + ch(vf(4,i),vf(5,i),vf(6,i)); \ + vf(3,i) += vf(7,i); \ + vf(7,i) += s_0(vf(0,i))+ maj(vf(0,i),vf(1,i),vf(2,i)) + +#if defined(SHA_224) || defined(SHA_256) + +#define SHA256_MASK (SHA256_BLOCK_SIZE - 1) + +#if defined(SWAP_BYTES) +#define bsw_32(p,n) \ + { int _i = (n); while(_i--) ((uint_32t*)p)[_i] = bswap_32(((uint_32t*)p)[_i]); } +#else +#define bsw_32(p,n) +#endif + +#define s_0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22)) +#define s_1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25)) +#define g_0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3)) +#define g_1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10)) +#define k_0 k256 + +/* rotated SHA256 round definition. Rather than swapping variables as in */ +/* FIPS-180, different variables are 'rotated' on each round, returning */ +/* to their starting positions every eight rounds */ + +#define q(n) v##n + +#define one_cycle(a,b,c,d,e,f,g,h,k,w) \ + q(h) += s_1(q(e)) + ch(q(e), q(f), q(g)) + k + w; \ + q(d) += q(h); q(h) += s_0(q(a)) + maj(q(a), q(b), q(c)) + +/* SHA256 mixing data */ + +const uint_32t k256[64] = +{ 0x428a2f98ul, 0x71374491ul, 0xb5c0fbcful, 0xe9b5dba5ul, + 0x3956c25bul, 0x59f111f1ul, 0x923f82a4ul, 0xab1c5ed5ul, + 0xd807aa98ul, 0x12835b01ul, 0x243185beul, 0x550c7dc3ul, + 0x72be5d74ul, 0x80deb1feul, 0x9bdc06a7ul, 0xc19bf174ul, + 0xe49b69c1ul, 0xefbe4786ul, 0x0fc19dc6ul, 0x240ca1ccul, + 0x2de92c6ful, 0x4a7484aaul, 0x5cb0a9dcul, 0x76f988daul, + 0x983e5152ul, 0xa831c66dul, 0xb00327c8ul, 0xbf597fc7ul, + 0xc6e00bf3ul, 0xd5a79147ul, 0x06ca6351ul, 0x14292967ul, + 0x27b70a85ul, 0x2e1b2138ul, 0x4d2c6dfcul, 0x53380d13ul, + 0x650a7354ul, 0x766a0abbul, 0x81c2c92eul, 0x92722c85ul, + 0xa2bfe8a1ul, 0xa81a664bul, 0xc24b8b70ul, 0xc76c51a3ul, + 0xd192e819ul, 0xd6990624ul, 0xf40e3585ul, 0x106aa070ul, + 0x19a4c116ul, 0x1e376c08ul, 0x2748774cul, 0x34b0bcb5ul, + 0x391c0cb3ul, 0x4ed8aa4aul, 0x5b9cca4ful, 0x682e6ff3ul, + 0x748f82eeul, 0x78a5636ful, 0x84c87814ul, 0x8cc70208ul, + 0x90befffaul, 0xa4506cebul, 0xbef9a3f7ul, 0xc67178f2ul, +}; + +/* Compile 64 bytes of hash data into SHA256 digest value */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is such that low address bytes */ +/* in the ORIGINAL byte stream will go into the high end of */ +/* words on BOTH big and little endian systems */ + +VOID_RETURN sha256_compile(sha256_ctx ctx[1]) +{ +#if !defined(UNROLL_SHA2) + + uint_32t j, *p = ctx->wbuf, v[8]; + + memcpy(v, ctx->hash, 8 * sizeof(uint_32t)); + + for(j = 0; j < 64; j += 16) + { + v_cycle( 0, j); v_cycle( 1, j); + v_cycle( 2, j); v_cycle( 3, j); + v_cycle( 4, j); v_cycle( 5, j); + v_cycle( 6, j); v_cycle( 7, j); + v_cycle( 8, j); v_cycle( 9, j); + v_cycle(10, j); v_cycle(11, j); + v_cycle(12, j); v_cycle(13, j); + v_cycle(14, j); v_cycle(15, j); + } + + ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; + ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; + ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; + ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; + +#else + + uint_32t *p = ctx->wbuf,v0,v1,v2,v3,v4,v5,v6,v7; + + v0 = ctx->hash[0]; v1 = ctx->hash[1]; + v2 = ctx->hash[2]; v3 = ctx->hash[3]; + v4 = ctx->hash[4]; v5 = ctx->hash[5]; + v6 = ctx->hash[6]; v7 = ctx->hash[7]; + + one_cycle(0,1,2,3,4,5,6,7,k256[ 0],p[ 0]); + one_cycle(7,0,1,2,3,4,5,6,k256[ 1],p[ 1]); + one_cycle(6,7,0,1,2,3,4,5,k256[ 2],p[ 2]); + one_cycle(5,6,7,0,1,2,3,4,k256[ 3],p[ 3]); + one_cycle(4,5,6,7,0,1,2,3,k256[ 4],p[ 4]); + one_cycle(3,4,5,6,7,0,1,2,k256[ 5],p[ 5]); + one_cycle(2,3,4,5,6,7,0,1,k256[ 6],p[ 6]); + one_cycle(1,2,3,4,5,6,7,0,k256[ 7],p[ 7]); + one_cycle(0,1,2,3,4,5,6,7,k256[ 8],p[ 8]); + one_cycle(7,0,1,2,3,4,5,6,k256[ 9],p[ 9]); + one_cycle(6,7,0,1,2,3,4,5,k256[10],p[10]); + one_cycle(5,6,7,0,1,2,3,4,k256[11],p[11]); + one_cycle(4,5,6,7,0,1,2,3,k256[12],p[12]); + one_cycle(3,4,5,6,7,0,1,2,k256[13],p[13]); + one_cycle(2,3,4,5,6,7,0,1,k256[14],p[14]); + one_cycle(1,2,3,4,5,6,7,0,k256[15],p[15]); + + one_cycle(0,1,2,3,4,5,6,7,k256[16],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[17],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[18],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[19],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[20],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[21],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[22],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[23],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[24],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[25],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[26],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[27],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[28],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[29],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[30],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[31],hf(15)); + + one_cycle(0,1,2,3,4,5,6,7,k256[32],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[33],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[34],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[35],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[36],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[37],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[38],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[39],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[40],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[41],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[42],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[43],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[44],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[45],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[46],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[47],hf(15)); + + one_cycle(0,1,2,3,4,5,6,7,k256[48],hf( 0)); + one_cycle(7,0,1,2,3,4,5,6,k256[49],hf( 1)); + one_cycle(6,7,0,1,2,3,4,5,k256[50],hf( 2)); + one_cycle(5,6,7,0,1,2,3,4,k256[51],hf( 3)); + one_cycle(4,5,6,7,0,1,2,3,k256[52],hf( 4)); + one_cycle(3,4,5,6,7,0,1,2,k256[53],hf( 5)); + one_cycle(2,3,4,5,6,7,0,1,k256[54],hf( 6)); + one_cycle(1,2,3,4,5,6,7,0,k256[55],hf( 7)); + one_cycle(0,1,2,3,4,5,6,7,k256[56],hf( 8)); + one_cycle(7,0,1,2,3,4,5,6,k256[57],hf( 9)); + one_cycle(6,7,0,1,2,3,4,5,k256[58],hf(10)); + one_cycle(5,6,7,0,1,2,3,4,k256[59],hf(11)); + one_cycle(4,5,6,7,0,1,2,3,k256[60],hf(12)); + one_cycle(3,4,5,6,7,0,1,2,k256[61],hf(13)); + one_cycle(2,3,4,5,6,7,0,1,k256[62],hf(14)); + one_cycle(1,2,3,4,5,6,7,0,k256[63],hf(15)); + + ctx->hash[0] += v0; ctx->hash[1] += v1; + ctx->hash[2] += v2; ctx->hash[3] += v3; + ctx->hash[4] += v4; ctx->hash[5] += v5; + ctx->hash[6] += v6; ctx->hash[7] += v7; +#endif +} + +/* SHA256 hash data in an array of bytes into hash buffer */ +/* and call the hash_compile function as required. */ + +VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]) +{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA256_MASK), + space = SHA256_BLOCK_SIZE - pos; + const unsigned char *sp = data; + + if((ctx->count[0] += len) < len) + ++(ctx->count[1]); + + while(len >= space) /* tranfer whole blocks while possible */ + { + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); + sp += space; len -= space; space = SHA256_BLOCK_SIZE; pos = 0; + bsw_32(ctx->wbuf, SHA256_BLOCK_SIZE >> 2) + sha256_compile(ctx); + } + + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); +} + +/* SHA256 Final padding and digest calculation */ + +static void sha_end1(unsigned char hval[], sha256_ctx ctx[1], const unsigned int hlen) +{ uint_32t i = (uint_32t)(ctx->count[0] & SHA256_MASK); + + /* put bytes in the buffer in an order in which references to */ + /* 32-bit words will put bytes with lower addresses into the */ + /* top of 32 bit words on BOTH big and little endian machines */ + bsw_32(ctx->wbuf, (i + 3) >> 2) + + /* we now need to mask valid bytes and add the padding which is */ + /* a single 1 bit and as many zero bits as necessary. Note that */ + /* we can always add the first padding byte here because the */ + /* buffer always has at least one empty slot */ + ctx->wbuf[i >> 2] &= 0xffffff80 << 8 * (~i & 3); + ctx->wbuf[i >> 2] |= 0x00000080 << 8 * (~i & 3); + + /* we need 9 or more empty positions, one for the padding byte */ + /* (above) and eight for the length count. If there is not */ + /* enough space pad and empty the buffer */ + if(i > SHA256_BLOCK_SIZE - 9) + { + if(i < 60) ctx->wbuf[15] = 0; + sha256_compile(ctx); + i = 0; + } + else /* compute a word index for the empty buffer positions */ + i = (i >> 2) + 1; + + while(i < 14) /* and zero pad all but last two positions */ + ctx->wbuf[i++] = 0; + + /* the following 32-bit length fields are assembled in the */ + /* wrong byte order on little endian machines but this is */ + /* corrected later since they are only ever used as 32-bit */ + /* word values. */ + ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 29); + ctx->wbuf[15] = ctx->count[0] << 3; + sha256_compile(ctx); + + /* extract the hash value as bytes in case the hash buffer is */ + /* mislaigned for 32-bit words */ + for(i = 0; i < hlen; ++i) + hval[i] = (unsigned char)(ctx->hash[i >> 2] >> (8 * (~i & 3))); +} + +#endif + +#if defined(SHA_224) + +const uint_32t i224[8] = +{ + 0xc1059ed8ul, 0x367cd507ul, 0x3070dd17ul, 0xf70e5939ul, + 0xffc00b31ul, 0x68581511ul, 0x64f98fa7ul, 0xbefa4fa4ul +}; + +VOID_RETURN sha224_begin(sha224_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i224, 8 * sizeof(uint_32t)); +} + +VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]) +{ + sha_end1(hval, ctx, SHA224_DIGEST_SIZE); +} + +VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha224_ctx cx[1]; + + sha224_begin(cx); + sha224_hash(data, len, cx); + sha_end1(hval, cx, SHA224_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_256) + +const uint_32t i256[8] = +{ + 0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, + 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul +}; + +VOID_RETURN sha256_begin(sha256_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i256, 8 * sizeof(uint_32t)); +} + +VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]) +{ + sha_end1(hval, ctx, SHA256_DIGEST_SIZE); +} + +VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha256_ctx cx[1]; + + sha256_begin(cx); + sha256_hash(data, len, cx); + sha_end1(hval, cx, SHA256_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_384) || defined(SHA_512) + +#define SHA512_MASK (SHA512_BLOCK_SIZE - 1) + +#define rotr64(x,n) (((x) >> n) | ((x) << (64 - n))) + +#if !defined(bswap_64) +#define bswap_64(x) (((uint_64t)(bswap_32((uint_32t)(x)))) << 32 | bswap_32((uint_32t)((x) >> 32))) +#endif + +#if defined(SWAP_BYTES) +#define bsw_64(p,n) \ + { int _i = (n); while(_i--) ((uint_64t*)p)[_i] = bswap_64(((uint_64t*)p)[_i]); } +#else +#define bsw_64(p,n) +#endif + +/* SHA512 mixing function definitions */ + +#ifdef s_0 +# undef s_0 +# undef s_1 +# undef g_0 +# undef g_1 +# undef k_0 +#endif + +#define s_0(x) (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39)) +#define s_1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41)) +#define g_0(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7)) +#define g_1(x) (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6)) +#define k_0 k512 + +/* SHA384/SHA512 mixing data */ + +const uint_64t k512[80] = +{ + li_64(428a2f98d728ae22), li_64(7137449123ef65cd), + li_64(b5c0fbcfec4d3b2f), li_64(e9b5dba58189dbbc), + li_64(3956c25bf348b538), li_64(59f111f1b605d019), + li_64(923f82a4af194f9b), li_64(ab1c5ed5da6d8118), + li_64(d807aa98a3030242), li_64(12835b0145706fbe), + li_64(243185be4ee4b28c), li_64(550c7dc3d5ffb4e2), + li_64(72be5d74f27b896f), li_64(80deb1fe3b1696b1), + li_64(9bdc06a725c71235), li_64(c19bf174cf692694), + li_64(e49b69c19ef14ad2), li_64(efbe4786384f25e3), + li_64(0fc19dc68b8cd5b5), li_64(240ca1cc77ac9c65), + li_64(2de92c6f592b0275), li_64(4a7484aa6ea6e483), + li_64(5cb0a9dcbd41fbd4), li_64(76f988da831153b5), + li_64(983e5152ee66dfab), li_64(a831c66d2db43210), + li_64(b00327c898fb213f), li_64(bf597fc7beef0ee4), + li_64(c6e00bf33da88fc2), li_64(d5a79147930aa725), + li_64(06ca6351e003826f), li_64(142929670a0e6e70), + li_64(27b70a8546d22ffc), li_64(2e1b21385c26c926), + li_64(4d2c6dfc5ac42aed), li_64(53380d139d95b3df), + li_64(650a73548baf63de), li_64(766a0abb3c77b2a8), + li_64(81c2c92e47edaee6), li_64(92722c851482353b), + li_64(a2bfe8a14cf10364), li_64(a81a664bbc423001), + li_64(c24b8b70d0f89791), li_64(c76c51a30654be30), + li_64(d192e819d6ef5218), li_64(d69906245565a910), + li_64(f40e35855771202a), li_64(106aa07032bbd1b8), + li_64(19a4c116b8d2d0c8), li_64(1e376c085141ab53), + li_64(2748774cdf8eeb99), li_64(34b0bcb5e19b48a8), + li_64(391c0cb3c5c95a63), li_64(4ed8aa4ae3418acb), + li_64(5b9cca4f7763e373), li_64(682e6ff3d6b2b8a3), + li_64(748f82ee5defb2fc), li_64(78a5636f43172f60), + li_64(84c87814a1f0ab72), li_64(8cc702081a6439ec), + li_64(90befffa23631e28), li_64(a4506cebde82bde9), + li_64(bef9a3f7b2c67915), li_64(c67178f2e372532b), + li_64(ca273eceea26619c), li_64(d186b8c721c0c207), + li_64(eada7dd6cde0eb1e), li_64(f57d4f7fee6ed178), + li_64(06f067aa72176fba), li_64(0a637dc5a2c898a6), + li_64(113f9804bef90dae), li_64(1b710b35131c471b), + li_64(28db77f523047d84), li_64(32caab7b40c72493), + li_64(3c9ebe0a15c9bebc), li_64(431d67c49c100d4c), + li_64(4cc5d4becb3e42b6), li_64(597f299cfc657e2a), + li_64(5fcb6fab3ad6faec), li_64(6c44198c4a475817) +}; + +/* Compile 128 bytes of hash data into SHA384/512 digest */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is such that low address bytes */ +/* in the ORIGINAL byte stream will go into the high end of */ +/* words on BOTH big and little endian systems */ + +VOID_RETURN sha512_compile(sha512_ctx ctx[1]) +{ uint_64t v[8], *p = ctx->wbuf; + uint_32t j; + + memcpy(v, ctx->hash, 8 * sizeof(uint_64t)); + + for(j = 0; j < 80; j += 16) + { + v_cycle( 0, j); v_cycle( 1, j); + v_cycle( 2, j); v_cycle( 3, j); + v_cycle( 4, j); v_cycle( 5, j); + v_cycle( 6, j); v_cycle( 7, j); + v_cycle( 8, j); v_cycle( 9, j); + v_cycle(10, j); v_cycle(11, j); + v_cycle(12, j); v_cycle(13, j); + v_cycle(14, j); v_cycle(15, j); + } + + ctx->hash[0] += v[0]; ctx->hash[1] += v[1]; + ctx->hash[2] += v[2]; ctx->hash[3] += v[3]; + ctx->hash[4] += v[4]; ctx->hash[5] += v[5]; + ctx->hash[6] += v[6]; ctx->hash[7] += v[7]; +} + +/* Compile 128 bytes of hash data into SHA256 digest value */ +/* NOTE: this routine assumes that the byte order in the */ +/* ctx->wbuf[] at this point is in such an order that low */ +/* address bytes in the ORIGINAL byte stream placed in this */ +/* buffer will now go to the high end of words on BOTH big */ +/* and little endian systems */ + +VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]) +{ uint_32t pos = (uint_32t)(ctx->count[0] & SHA512_MASK), + space = SHA512_BLOCK_SIZE - pos; + const unsigned char *sp = data; + + if((ctx->count[0] += len) < len) + ++(ctx->count[1]); + + while(len >= space) /* tranfer whole blocks while possible */ + { + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space); + sp += space; len -= space; space = SHA512_BLOCK_SIZE; pos = 0; + bsw_64(ctx->wbuf, SHA512_BLOCK_SIZE >> 3); + sha512_compile(ctx); + } + + memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len); +} + +/* SHA384/512 Final padding and digest calculation */ + +static void sha_end2(unsigned char hval[], sha512_ctx ctx[1], const unsigned int hlen) +{ uint_32t i = (uint_32t)(ctx->count[0] & SHA512_MASK); + + /* put bytes in the buffer in an order in which references to */ + /* 32-bit words will put bytes with lower addresses into the */ + /* top of 32 bit words on BOTH big and little endian machines */ + bsw_64(ctx->wbuf, (i + 7) >> 3); + + /* we now need to mask valid bytes and add the padding which is */ + /* a single 1 bit and as many zero bits as necessary. Note that */ + /* we can always add the first padding byte here because the */ + /* buffer always has at least one empty slot */ + ctx->wbuf[i >> 3] &= li_64(ffffffffffffff00) << 8 * (~i & 7); + ctx->wbuf[i >> 3] |= li_64(0000000000000080) << 8 * (~i & 7); + + /* we need 17 or more empty byte positions, one for the padding */ + /* byte (above) and sixteen for the length count. If there is */ + /* not enough space pad and empty the buffer */ + if(i > SHA512_BLOCK_SIZE - 17) + { + if(i < 120) ctx->wbuf[15] = 0; + sha512_compile(ctx); + i = 0; + } + else + i = (i >> 3) + 1; + + while(i < 14) + ctx->wbuf[i++] = 0; + + /* the following 64-bit length fields are assembled in the */ + /* wrong byte order on little endian machines but this is */ + /* corrected later since they are only ever used as 64-bit */ + /* word values. */ + ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 61); + ctx->wbuf[15] = ctx->count[0] << 3; + sha512_compile(ctx); + + /* extract the hash value as bytes in case the hash buffer is */ + /* misaligned for 32-bit words */ + for(i = 0; i < hlen; ++i) + hval[i] = (unsigned char)(ctx->hash[i >> 3] >> (8 * (~i & 7))); +} + +#endif + +#if defined(SHA_384) + +/* SHA384 initialisation data */ + +const uint_64t i384[80] = +{ + li_64(cbbb9d5dc1059ed8), li_64(629a292a367cd507), + li_64(9159015a3070dd17), li_64(152fecd8f70e5939), + li_64(67332667ffc00b31), li_64(8eb44a8768581511), + li_64(db0c2e0d64f98fa7), li_64(47b5481dbefa4fa4) +}; + +VOID_RETURN sha384_begin(sha384_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i384, 8 * sizeof(uint_64t)); +} + +VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]) +{ + sha_end2(hval, ctx, SHA384_DIGEST_SIZE); +} + +VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha384_ctx cx[1]; + + sha384_begin(cx); + sha384_hash(data, len, cx); + sha_end2(hval, cx, SHA384_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_512) + +/* SHA512 initialisation data */ + +const uint_64t i512[80] = +{ + li_64(6a09e667f3bcc908), li_64(bb67ae8584caa73b), + li_64(3c6ef372fe94f82b), li_64(a54ff53a5f1d36f1), + li_64(510e527fade682d1), li_64(9b05688c2b3e6c1f), + li_64(1f83d9abfb41bd6b), li_64(5be0cd19137e2179) +}; + +VOID_RETURN sha512_begin(sha512_ctx ctx[1]) +{ + ctx->count[0] = ctx->count[1] = 0; + memcpy(ctx->hash, i512, 8 * sizeof(uint_64t)); +} + +VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]) +{ + sha_end2(hval, ctx, SHA512_DIGEST_SIZE); +} + +VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len) +{ sha512_ctx cx[1]; + + sha512_begin(cx); + sha512_hash(data, len, cx); + sha_end2(hval, cx, SHA512_DIGEST_SIZE); +} + +#endif + +#if defined(SHA_2) + +#define CTX_224(x) ((x)->uu->ctx256) +#define CTX_256(x) ((x)->uu->ctx256) +#define CTX_384(x) ((x)->uu->ctx512) +#define CTX_512(x) ((x)->uu->ctx512) + +/* SHA2 initialisation */ + +INT_RETURN sha2_begin(unsigned long len, sha2_ctx ctx[1]) +{ + switch(len) + { +#if defined(SHA_224) + case 224: + case 28: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; + memcpy(CTX_256(ctx)->hash, i224, 32); + ctx->sha2_len = 28; return EXIT_SUCCESS; +#endif +#if defined(SHA_256) + case 256: + case 32: CTX_256(ctx)->count[0] = CTX_256(ctx)->count[1] = 0; + memcpy(CTX_256(ctx)->hash, i256, 32); + ctx->sha2_len = 32; return EXIT_SUCCESS; +#endif +#if defined(SHA_384) + case 384: + case 48: CTX_384(ctx)->count[0] = CTX_384(ctx)->count[1] = 0; + memcpy(CTX_384(ctx)->hash, i384, 64); + ctx->sha2_len = 48; return EXIT_SUCCESS; +#endif +#if defined(SHA_512) + case 512: + case 64: CTX_512(ctx)->count[0] = CTX_512(ctx)->count[1] = 0; + memcpy(CTX_512(ctx)->hash, i512, 64); + ctx->sha2_len = 64; return EXIT_SUCCESS; +#endif + default: return EXIT_FAILURE; + } +} + +VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]) +{ + switch(ctx->sha2_len) + { +#if defined(SHA_224) + case 28: sha224_hash(data, len, CTX_224(ctx)); return; +#endif +#if defined(SHA_256) + case 32: sha256_hash(data, len, CTX_256(ctx)); return; +#endif +#if defined(SHA_384) + case 48: sha384_hash(data, len, CTX_384(ctx)); return; +#endif +#if defined(SHA_512) + case 64: sha512_hash(data, len, CTX_512(ctx)); return; +#endif + } +} + +VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]) +{ + switch(ctx->sha2_len) + { +#if defined(SHA_224) + case 28: sha_end1(hval, CTX_224(ctx), SHA224_DIGEST_SIZE); return; +#endif +#if defined(SHA_256) + case 32: sha_end1(hval, CTX_256(ctx), SHA256_DIGEST_SIZE); return; +#endif +#if defined(SHA_384) + case 48: sha_end2(hval, CTX_384(ctx), SHA384_DIGEST_SIZE); return; +#endif +#if defined(SHA_512) + case 64: sha_end2(hval, CTX_512(ctx), SHA512_DIGEST_SIZE); return; +#endif + } +} + +INT_RETURN sha2(unsigned char hval[], unsigned long size, + const unsigned char data[], unsigned long len) +{ sha2_ctx cx[1]; + + if(sha2_begin(size, cx) == EXIT_SUCCESS) + { + sha2_hash(data, len, cx); sha2_end(hval, cx); return EXIT_SUCCESS; + } + else + return EXIT_FAILURE; +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/Crypto/Sha2.h b/Crypto/Sha2.h new file mode 100644 index 0000000..6d0aeb0 --- /dev/null +++ b/Crypto/Sha2.h @@ -0,0 +1,155 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 01/08/2005 +*/ + +#ifndef _SHA2_H +#define _SHA2_H + +#include "Common/Tcdefs.h" +#include "Common/Endian.h" + +#define SHA_64BIT + +/* define the hash functions that you need */ +#define SHA_2 /* for dynamic hash length */ +#define SHA_224 +#define SHA_256 +#ifdef SHA_64BIT +# define SHA_384 +# define SHA_512 +# define NEED_UINT_64T +#endif + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif + +#define li_64(h) 0x##h##ull + +#define VOID_RETURN void +#define INT_RETURN int + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Note that the following function prototypes are the same */ +/* for both the bit and byte oriented implementations. But */ +/* the length fields are in bytes or bits as is appropriate */ +/* for the version used. Bit sequences are arrays of bytes */ +/* in which bit sequence indexes increase from the most to */ +/* the least significant end of each byte */ + +#define SHA224_DIGEST_SIZE 28 +#define SHA224_BLOCK_SIZE 64 +#define SHA256_DIGEST_SIZE 32 +#define SHA256_BLOCK_SIZE 64 + +/* type to hold the SHA256 (and SHA224) context */ + +typedef struct +{ uint_32t count[2]; + uint_32t hash[8]; + uint_32t wbuf[16]; +} sha256_ctx; + +typedef sha256_ctx sha224_ctx; + +VOID_RETURN sha256_compile(sha256_ctx ctx[1]); + +VOID_RETURN sha224_begin(sha224_ctx ctx[1]); +#define sha224_hash sha256_hash +VOID_RETURN sha224_end(unsigned char hval[], sha224_ctx ctx[1]); +VOID_RETURN sha224(unsigned char hval[], const unsigned char data[], unsigned long len); + +VOID_RETURN sha256_begin(sha256_ctx ctx[1]); +VOID_RETURN sha256_hash(const unsigned char data[], unsigned long len, sha256_ctx ctx[1]); +VOID_RETURN sha256_end(unsigned char hval[], sha256_ctx ctx[1]); +VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned long len); + +#ifndef SHA_64BIT + +typedef struct +{ union + { sha256_ctx ctx256[1]; + } uu[1]; + uint_32t sha2_len; +} sha2_ctx; + +#define SHA2_MAX_DIGEST_SIZE SHA256_DIGEST_SIZE + +#else + +#define SHA384_DIGEST_SIZE 48 +#define SHA384_BLOCK_SIZE 128 +#define SHA512_DIGEST_SIZE 64 +#define SHA512_BLOCK_SIZE 128 +#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE + +/* type to hold the SHA384 (and SHA512) context */ + +typedef struct +{ uint_64t count[2]; + uint_64t hash[8]; + uint_64t wbuf[16]; +} sha512_ctx; + +typedef sha512_ctx sha384_ctx; + +typedef struct +{ union + { sha256_ctx ctx256[1]; + sha512_ctx ctx512[1]; + } uu[1]; + uint_32t sha2_len; +} sha2_ctx; + +VOID_RETURN sha512_compile(sha512_ctx ctx[1]); + +VOID_RETURN sha384_begin(sha384_ctx ctx[1]); +#define sha384_hash sha512_hash +VOID_RETURN sha384_end(unsigned char hval[], sha384_ctx ctx[1]); +VOID_RETURN sha384(unsigned char hval[], const unsigned char data[], unsigned long len); + +VOID_RETURN sha512_begin(sha512_ctx ctx[1]); +VOID_RETURN sha512_hash(const unsigned char data[], unsigned long len, sha512_ctx ctx[1]); +VOID_RETURN sha512_end(unsigned char hval[], sha512_ctx ctx[1]); +VOID_RETURN sha512(unsigned char hval[], const unsigned char data[], unsigned long len); + +INT_RETURN sha2_begin(unsigned long size, sha2_ctx ctx[1]); +VOID_RETURN sha2_hash(const unsigned char data[], unsigned long len, sha2_ctx ctx[1]); +VOID_RETURN sha2_end(unsigned char hval[], sha2_ctx ctx[1]); +INT_RETURN sha2(unsigned char hval[], unsigned long size, const unsigned char data[], unsigned long len); + +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Crypto/Sources b/Crypto/Sources new file mode 100644 index 0000000..f38c268 --- /dev/null +++ b/Crypto/Sources @@ -0,0 +1,23 @@ +TARGETNAME=Crypto +TARGETTYPE=DRIVER_LIBRARY + +INCLUDES = .. + +NTTARGETFILES = \ + "$(OBJ_PATH)\$(O)\Aes_$(TC_ARCH).obj" \ + "$(OBJ_PATH)\$(O)\Aes_hw_cpu.obj" + +SOURCES = \ + Aes_$(TC_ARCH).asm \ + Aes_hw_cpu.asm \ + Aeskey.c \ + Aestab.c \ + Blowfish.c \ + Cast.c \ + Des.c \ + Rmd160.c \ + Serpent.c \ + Sha1.c \ + Sha2.c \ + Twofish.c \ + Whirlpool.c diff --git a/Crypto/Twofish.c b/Crypto/Twofish.c new file mode 100644 index 0000000..6bdb469 --- /dev/null +++ b/Crypto/Twofish.c @@ -0,0 +1,548 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1999, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software is allowed (with or without + changes) provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + + My thanks to Doug Whiting and Niels Ferguson for comments that led + to improvements in this implementation. + + Issue Date: 14th January 1999 +*/ + +/* Adapted for TrueCrypt */ + + +#ifdef TC_WINDOWS_BOOT +#pragma optimize ("tl", on) +#endif + +#include "Twofish.h" +#include "Common/Endian.h" + +#define Q_TABLES +#define M_TABLE + +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) +# define MK_TABLE +# define ONE_STEP +#endif + +/* finite field arithmetic for GF(2**8) with the modular */ +/* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */ + +#define G_M 0x0169 + +static u1byte tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) }; +static u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 }; + +#define ffm_01(x) (x) +#define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3]) +#define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3]) + +static u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; +static u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 }; + +static u1byte qt0[2][16] = +{ { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 }, + { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 } +}; + +static u1byte qt1[2][16] = +{ { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 }, + { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 } +}; + +static u1byte qt2[2][16] = +{ { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 }, + { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 } +}; + +static u1byte qt3[2][16] = +{ { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 }, + { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 } +}; + +static u1byte qp(const u4byte n, const u1byte x) +{ u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4; + + a0 = x >> 4; b0 = x & 15; + a1 = a0 ^ b0; b1 = ror4[b0] ^ ashx[a0]; + a2 = qt0[n][a1]; b2 = qt1[n][b1]; + a3 = a2 ^ b2; b3 = ror4[b2] ^ ashx[a2]; + a4 = qt2[n][a3]; b4 = qt3[n][b3]; + return (b4 << 4) | a4; +}; + +#ifdef Q_TABLES + +static u4byte qt_gen = 0; +static u1byte q_tab[2][256]; + +#define q(n,x) q_tab[n][x] + +static void gen_qtab(void) +{ u4byte i; + + for(i = 0; i < 256; ++i) + { + q(0,i) = qp(0, (u1byte)i); + q(1,i) = qp(1, (u1byte)i); + } +}; + +#else + +#define q(n,x) qp(n, x) + +#endif + +#ifdef M_TABLE + +static u4byte mt_gen = 0; +static u4byte m_tab[4][256]; + +static void gen_mtab(void) +{ u4byte i, f01, f5b, fef; + + for(i = 0; i < 256; ++i) + { + f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); + m_tab[0][i] = f01 + (f5b << 8) + (fef << 16) + (fef << 24); + m_tab[2][i] = f5b + (fef << 8) + (f01 << 16) + (fef << 24); + + f01 = q(0,i); f5b = ffm_5b(f01); fef = ffm_ef(f01); + m_tab[1][i] = fef + (fef << 8) + (f5b << 16) + (f01 << 24); + m_tab[3][i] = f5b + (f01 << 8) + (fef << 16) + (f5b << 24); + } +}; + +#define mds(n,x) m_tab[n][x] + +#else + +#define fm_00 ffm_01 +#define fm_10 ffm_5b +#define fm_20 ffm_ef +#define fm_30 ffm_ef +#define q_0(x) q(1,x) + +#define fm_01 ffm_ef +#define fm_11 ffm_ef +#define fm_21 ffm_5b +#define fm_31 ffm_01 +#define q_1(x) q(0,x) + +#define fm_02 ffm_5b +#define fm_12 ffm_ef +#define fm_22 ffm_01 +#define fm_32 ffm_ef +#define q_2(x) q(1,x) + +#define fm_03 ffm_5b +#define fm_13 ffm_01 +#define fm_23 ffm_ef +#define fm_33 ffm_5b +#define q_3(x) q(0,x) + +#define f_0(n,x) ((u4byte)fm_0##n(x)) +#define f_1(n,x) ((u4byte)fm_1##n(x) << 8) +#define f_2(n,x) ((u4byte)fm_2##n(x) << 16) +#define f_3(n,x) ((u4byte)fm_3##n(x) << 24) + +#define mds(n,x) f_0(n,q_##n(x)) ^ f_1(n,q_##n(x)) ^ f_2(n,q_##n(x)) ^ f_3(n,q_##n(x)) + +#endif + +static u4byte h_fun(TwofishInstance *instance, const u4byte x, const u4byte key[]) +{ u4byte b0, b1, b2, b3; + +#ifndef M_TABLE + u4byte m5b_b0, m5b_b1, m5b_b2, m5b_b3; + u4byte mef_b0, mef_b1, mef_b2, mef_b3; +#endif + + b0 = extract_byte(x, 0); b1 = extract_byte(x, 1); b2 = extract_byte(x, 2); b3 = extract_byte(x, 3); + + switch(instance->k_len) + { + case 4: b0 = q(1, (u1byte) b0) ^ extract_byte(key[3],0); + b1 = q(0, (u1byte) b1) ^ extract_byte(key[3],1); + b2 = q(0, (u1byte) b2) ^ extract_byte(key[3],2); + b3 = q(1, (u1byte) b3) ^ extract_byte(key[3],3); + case 3: b0 = q(1, (u1byte) b0) ^ extract_byte(key[2],0); + b1 = q(1, (u1byte) b1) ^ extract_byte(key[2],1); + b2 = q(0, (u1byte) b2) ^ extract_byte(key[2],2); + b3 = q(0, (u1byte) b3) ^ extract_byte(key[2],3); + case 2: b0 = q(0, (u1byte) (q(0, (u1byte) b0) ^ extract_byte(key[1],0))) ^ extract_byte(key[0],0); + b1 = q(0, (u1byte) (q(1, (u1byte) b1) ^ extract_byte(key[1],1))) ^ extract_byte(key[0],1); + b2 = q(1, (u1byte) (q(0, (u1byte) b2) ^ extract_byte(key[1],2))) ^ extract_byte(key[0],2); + b3 = q(1, (u1byte) (q(1, (u1byte) b3) ^ extract_byte(key[1],3))) ^ extract_byte(key[0],3); + } +#ifdef M_TABLE + + return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3); + +#else + + b0 = q(1, (u1byte) b0); b1 = q(0, (u1byte) b1); b2 = q(1, (u1byte) b2); b3 = q(0, (u1byte) b3); + m5b_b0 = ffm_5b(b0); m5b_b1 = ffm_5b(b1); m5b_b2 = ffm_5b(b2); m5b_b3 = ffm_5b(b3); + mef_b0 = ffm_ef(b0); mef_b1 = ffm_ef(b1); mef_b2 = ffm_ef(b2); mef_b3 = ffm_ef(b3); + b0 ^= mef_b1 ^ m5b_b2 ^ m5b_b3; b3 ^= m5b_b0 ^ mef_b1 ^ mef_b2; + b2 ^= mef_b0 ^ m5b_b1 ^ mef_b3; b1 ^= mef_b0 ^ mef_b2 ^ m5b_b3; + + return b0 | (b3 << 8) | (b2 << 16) | (b1 << 24); + +#endif +}; + +#ifdef MK_TABLE + +#ifdef ONE_STEP +//u4byte mk_tab[4][256]; +#else +static u1byte sb[4][256]; +#endif + +#define q20(x) q(0,q(0,x) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q21(x) q(0,q(1,x) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q22(x) q(1,q(0,x) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q23(x) q(1,q(1,x) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +#define q30(x) q(0,q(0,q(1, x) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q31(x) q(0,q(1,q(1, x) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q32(x) q(1,q(0,q(0, x) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q33(x) q(1,q(1,q(0, x) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +#define q40(x) q(0,q(0,q(1, q(1, x) ^ extract_byte(key[3],0)) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0) +#define q41(x) q(0,q(1,q(1, q(0, x) ^ extract_byte(key[3],1)) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1) +#define q42(x) q(1,q(0,q(0, q(0, x) ^ extract_byte(key[3],2)) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2) +#define q43(x) q(1,q(1,q(0, q(1, x) ^ extract_byte(key[3],3)) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3) + +static void gen_mk_tab(TwofishInstance *instance, u4byte key[]) +{ u4byte i; + u1byte by; + + u4byte *mk_tab = instance->mk_tab; + + switch(instance->k_len) + { + case 2: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q20(by)); mk_tab[1 + 4*i] = mds(1, q21(by)); + mk_tab[2 + 4*i] = mds(2, q22(by)); mk_tab[3 + 4*i] = mds(3, q23(by)); +#else + sb[0][i] = q20(by); sb[1][i] = q21(by); + sb[2][i] = q22(by); sb[3][i] = q23(by); +#endif + } + break; + + case 3: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q30(by)); mk_tab[1 + 4*i] = mds(1, q31(by)); + mk_tab[2 + 4*i] = mds(2, q32(by)); mk_tab[3 + 4*i] = mds(3, q33(by)); +#else + sb[0][i] = q30(by); sb[1][i] = q31(by); + sb[2][i] = q32(by); sb[3][i] = q33(by); +#endif + } + break; + + case 4: for(i = 0; i < 256; ++i) + { + by = (u1byte)i; +#ifdef ONE_STEP + mk_tab[0 + 4*i] = mds(0, q40(by)); mk_tab[1 + 4*i] = mds(1, q41(by)); + mk_tab[2 + 4*i] = mds(2, q42(by)); mk_tab[3 + 4*i] = mds(3, q43(by)); +#else + sb[0][i] = q40(by); sb[1][i] = q41(by); + sb[2][i] = q42(by); sb[3][i] = q43(by); +#endif + } + } +}; + +# ifdef ONE_STEP +# define g0_fun(x) ( mk_tab[0 + 4*extract_byte(x,0)] ^ mk_tab[1 + 4*extract_byte(x,1)] \ + ^ mk_tab[2 + 4*extract_byte(x,2)] ^ mk_tab[3 + 4*extract_byte(x,3)] ) +# define g1_fun(x) ( mk_tab[0 + 4*extract_byte(x,3)] ^ mk_tab[1 + 4*extract_byte(x,0)] \ + ^ mk_tab[2 + 4*extract_byte(x,1)] ^ mk_tab[3 + 4*extract_byte(x,2)] ) + + +# else +# define g0_fun(x) ( mds(0, sb[0][extract_byte(x,0)]) ^ mds(1, sb[1][extract_byte(x,1)]) \ + ^ mds(2, sb[2][extract_byte(x,2)]) ^ mds(3, sb[3][extract_byte(x,3)]) ) +# define g1_fun(x) ( mds(0, sb[0][extract_byte(x,3)]) ^ mds(1, sb[1][extract_byte(x,0)]) \ + ^ mds(2, sb[2][extract_byte(x,1)]) ^ mds(3, sb[3][extract_byte(x,2)]) ) +# endif + +#else + +#define g0_fun(x) h_fun(instance, x, instance->s_key) +#define g1_fun(x) h_fun(instance, rotl(x,8), instance->s_key) + +#endif + +/* The (12,8) Reed Soloman code has the generator polynomial + + g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1 + +where the coefficients are in the finite field GF(2^8) with a +modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the +remainder we have to start with a 12th order polynomial with our +eight input bytes as the coefficients of the 4th to 11th terms. +That is: + + m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 + +We then multiply the generator polynomial by m[7] * x^7 and subtract +it - xor in GF(2^8) - from the above to eliminate the x^7 term (the +artihmetic on the coefficients is done in GF(2^8). We then multiply +the generator polynomial by x^6 * coeff(x^10) and use this to remove +the x^10 term. We carry on in this way until the x^4 term is removed +so that we are left with: + + r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] + +which give the resulting 4 bytes of the remainder. This is equivalent +to the matrix multiplication in the Twofish description but much faster +to implement. + +*/ + +#define G_MOD 0x0000014d + +static u4byte mds_rem(u4byte p0, u4byte p1) +{ u4byte i, t, u; + + for(i = 0; i < 8; ++i) + { + t = p1 >> 24; // get most significant coefficient + + p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; // shift others up + + // multiply t by a (the primitive element - i.e. left shift) + + u = (t << 1); + + if(t & 0x80) // subtract modular polynomial on overflow + + u ^= G_MOD; + + p1 ^= t ^ (u << 16); // remove t * (a * x^2 + 1) + + u ^= (t >> 1); // form u = a * t + t / a = t * (a + 1 / a); + + if(t & 0x01) // add the modular polynomial on underflow + + u ^= G_MOD >> 1; + + p1 ^= (u << 24) | (u << 8); // remove t * (a + 1/a) * (x^3 + x) + } + + return p1; +}; + +/* initialise the key schedule from the user supplied key */ + +u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[], const u4byte key_len) +{ u4byte i, a, b, me_key[4], mo_key[4]; + u4byte *l_key, *s_key; + + l_key = instance->l_key; + s_key = instance->s_key; + +#ifdef Q_TABLES + if(!qt_gen) + { + gen_qtab(); qt_gen = 1; + } +#endif + +#ifdef M_TABLE + if(!mt_gen) + { + gen_mtab(); mt_gen = 1; + } +#endif + + instance->k_len = key_len / 64; /* 2, 3 or 4 */ + + for(i = 0; i < instance->k_len; ++i) + { + a = LE32(in_key[i + i]); me_key[i] = a; + b = LE32(in_key[i + i + 1]); mo_key[i] = b; + s_key[instance->k_len - i - 1] = mds_rem(a, b); + } + + for(i = 0; i < 40; i += 2) + { + a = 0x01010101 * i; b = a + 0x01010101; + a = h_fun(instance, a, me_key); + b = rotl(h_fun(instance, b, mo_key), 8); + l_key[i] = a + b; + l_key[i + 1] = rotl(a + 2 * b, 9); + } + +#ifdef MK_TABLE + gen_mk_tab(instance, s_key); +#endif + + return l_key; +}; + +/* encrypt a block of text */ + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define f_rnd(i) \ + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ + blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); \ + blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); \ + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ + blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); \ + blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]) + +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; + u4byte *mk_tab = instance->mk_tab; + + blk[0] = LE32(in_blk[0]) ^ l_key[0]; + blk[1] = LE32(in_blk[1]) ^ l_key[1]; + blk[2] = LE32(in_blk[2]) ^ l_key[2]; + blk[3] = LE32(in_blk[3]) ^ l_key[3]; + + f_rnd(0); f_rnd(1); f_rnd(2); f_rnd(3); + f_rnd(4); f_rnd(5); f_rnd(6); f_rnd(7); + + out_blk[0] = LE32(blk[2] ^ l_key[4]); + out_blk[1] = LE32(blk[3] ^ l_key[5]); + out_blk[2] = LE32(blk[0] ^ l_key[6]); + out_blk[3] = LE32(blk[1] ^ l_key[7]); +}; + +#else // TC_MINIMIZE_CODE_SIZE + +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; +#ifdef TC_WINDOWS_BOOT_TWOFISH + u4byte *mk_tab = instance->mk_tab; +#endif + int i; + + blk[0] = LE32(in_blk[0]) ^ l_key[0]; + blk[1] = LE32(in_blk[1]) ^ l_key[1]; + blk[2] = LE32(in_blk[2]) ^ l_key[2]; + blk[3] = LE32(in_blk[3]) ^ l_key[3]; + + for (i = 0; i <= 7; ++i) + { + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); + blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); + blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); + blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); + blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]); + } + + out_blk[0] = LE32(blk[2] ^ l_key[4]); + out_blk[1] = LE32(blk[3] ^ l_key[5]); + out_blk[2] = LE32(blk[0] ^ l_key[6]); + out_blk[3] = LE32(blk[1] ^ l_key[7]); +}; + +#endif // TC_MINIMIZE_CODE_SIZE + +/* decrypt a block of text */ + +#ifndef TC_MINIMIZE_CODE_SIZE + +#define i_rnd(i) \ + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \ + blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); \ + blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); \ + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \ + blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); \ + blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1) + +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; + u4byte *mk_tab = instance->mk_tab; + + blk[0] = LE32(in_blk[0]) ^ l_key[4]; + blk[1] = LE32(in_blk[1]) ^ l_key[5]; + blk[2] = LE32(in_blk[2]) ^ l_key[6]; + blk[3] = LE32(in_blk[3]) ^ l_key[7]; + + i_rnd(7); i_rnd(6); i_rnd(5); i_rnd(4); + i_rnd(3); i_rnd(2); i_rnd(1); i_rnd(0); + + out_blk[0] = LE32(blk[2] ^ l_key[0]); + out_blk[1] = LE32(blk[3] ^ l_key[1]); + out_blk[2] = LE32(blk[0] ^ l_key[2]); + out_blk[3] = LE32(blk[1] ^ l_key[3]); +}; + +#else // TC_MINIMIZE_CODE_SIZE + +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]) +{ u4byte t0, t1, blk[4]; + + u4byte *l_key = instance->l_key; +#ifdef TC_WINDOWS_BOOT_TWOFISH + u4byte *mk_tab = instance->mk_tab; +#endif + int i; + + blk[0] = LE32(in_blk[0]) ^ l_key[4]; + blk[1] = LE32(in_blk[1]) ^ l_key[5]; + blk[2] = LE32(in_blk[2]) ^ l_key[6]; + blk[3] = LE32(in_blk[3]) ^ l_key[7]; + + for (i = 7; i >= 0; --i) + { + t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); + blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); + blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); + t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); + blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); + blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1); + } + + out_blk[0] = LE32(blk[2] ^ l_key[0]); + out_blk[1] = LE32(blk[3] ^ l_key[1]); + out_blk[2] = LE32(blk[0] ^ l_key[2]); + out_blk[3] = LE32(blk[1] ^ l_key[3]); +}; + +#endif // TC_MINIMIZE_CODE_SIZE diff --git a/Crypto/Twofish.h b/Crypto/Twofish.h new file mode 100644 index 0000000..90cdd7d --- /dev/null +++ b/Crypto/Twofish.h @@ -0,0 +1,55 @@ +#ifndef TWOFISH_H +#define TWOFISH_H + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#ifndef u4byte +#define u4byte unsigned __int32 +#endif +#ifndef u1byte +#define u1byte unsigned char +#endif + +#ifndef extract_byte +#define extract_byte(x,n) ((u1byte)((x) >> (8 * n))) +#endif + +#ifndef rotl + +#ifdef _WIN32 +#include +#pragma intrinsic(_lrotr,_lrotl) +#define rotr(x,n) _lrotr(x,n) +#define rotl(x,n) _lrotl(x,n) +#else +#define rotr(x,n) (((x)>>(n))|((x)<<(32-(n)))) +#define rotl(x,n) (((x)<<(n))|((x)>>(32-(n)))) +#endif + +#endif +typedef struct +{ + u4byte l_key[40]; + u4byte s_key[4]; +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_TWOFISH) + u4byte mk_tab[4 * 256]; +#endif + u4byte k_len; +} TwofishInstance; + +#define TWOFISH_KS sizeof(TwofishInstance) + +u4byte * twofish_set_key(TwofishInstance *instance, const u4byte in_key[], const u4byte key_len); +void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[]); +void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4]); + +#if defined(__cplusplus) +} +#endif + +#endif // TWOFISH_H diff --git a/Crypto/Whirlpool.c b/Crypto/Whirlpool.c new file mode 100644 index 0000000..42ce87a --- /dev/null +++ b/Crypto/Whirlpool.c @@ -0,0 +1,1058 @@ +/** + * The Whirlpool hashing function. + * + *

+ * References + * + *

+ * The Whirlpool algorithm was developed by + * Paulo S. L. M. Barreto and + * Vincent Rijmen. + * + * See + * P.S.L.M. Barreto, V. Rijmen, + * ``The Whirlpool hashing function,'' + * NESSIE submission, 2000 (tweaked version, 2001), + * + * + * @author Paulo S.L.M. Barreto + * @author Vincent Rijmen. + * Adapted for TrueCrypt. + * + * @version 3.0 (2003.03.12) + * + * ============================================================================= + * + * Differences from version 2.1: + * + * - Suboptimal diffusion matrix replaced by cir(1, 1, 4, 1, 8, 5, 2, 9). + * + * ============================================================================= + * + * Differences from version 2.0: + * + * - Generation of ISO/IEC 10118-3 test vectors. + * - Bug fix: nonzero carry was ignored when tallying the data length + * (this bug apparently only manifested itself when feeding data + * in pieces rather than in a single chunk at once). + * - Support for MS Visual C++ 64-bit integer arithmetic. + * + * Differences from version 1.0: + * + * - Original S-box replaced by the tweaked, hardware-efficient version. + * + * ============================================================================= + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + /* The code contained in this file (Whirlpool.c) is in the public domain. */ + +#include +#include +#include +#include + +#include "Whirlpool.h" + +/* #define TRACE_INTERMEDIATE_VALUES */ + +/* + * The number of rounds of the internal dedicated block cipher. + */ +#define R 10 + +/* + * Though Whirlpool is endianness-neutral, the encryption tables are listed + * in BIG-ENDIAN format, which is adopted throughout this implementation + * (but little-endian notation would be equally suitable if consistently + * employed). + */ + +static const u64 C0[256] = { + LL(0x18186018c07830d8), LL(0x23238c2305af4626), LL(0xc6c63fc67ef991b8), LL(0xe8e887e8136fcdfb), + LL(0x878726874ca113cb), LL(0xb8b8dab8a9626d11), LL(0x0101040108050209), LL(0x4f4f214f426e9e0d), + LL(0x3636d836adee6c9b), LL(0xa6a6a2a6590451ff), LL(0xd2d26fd2debdb90c), LL(0xf5f5f3f5fb06f70e), + LL(0x7979f979ef80f296), LL(0x6f6fa16f5fcede30), LL(0x91917e91fcef3f6d), LL(0x52525552aa07a4f8), + LL(0x60609d6027fdc047), LL(0xbcbccabc89766535), LL(0x9b9b569baccd2b37), LL(0x8e8e028e048c018a), + LL(0xa3a3b6a371155bd2), LL(0x0c0c300c603c186c), LL(0x7b7bf17bff8af684), LL(0x3535d435b5e16a80), + LL(0x1d1d741de8693af5), LL(0xe0e0a7e05347ddb3), LL(0xd7d77bd7f6acb321), LL(0xc2c22fc25eed999c), + LL(0x2e2eb82e6d965c43), LL(0x4b4b314b627a9629), LL(0xfefedffea321e15d), LL(0x575741578216aed5), + LL(0x15155415a8412abd), LL(0x7777c1779fb6eee8), LL(0x3737dc37a5eb6e92), LL(0xe5e5b3e57b56d79e), + LL(0x9f9f469f8cd92313), LL(0xf0f0e7f0d317fd23), LL(0x4a4a354a6a7f9420), LL(0xdada4fda9e95a944), + LL(0x58587d58fa25b0a2), LL(0xc9c903c906ca8fcf), LL(0x2929a429558d527c), LL(0x0a0a280a5022145a), + LL(0xb1b1feb1e14f7f50), LL(0xa0a0baa0691a5dc9), LL(0x6b6bb16b7fdad614), LL(0x85852e855cab17d9), + LL(0xbdbdcebd8173673c), LL(0x5d5d695dd234ba8f), LL(0x1010401080502090), LL(0xf4f4f7f4f303f507), + LL(0xcbcb0bcb16c08bdd), LL(0x3e3ef83eedc67cd3), LL(0x0505140528110a2d), LL(0x676781671fe6ce78), + LL(0xe4e4b7e47353d597), LL(0x27279c2725bb4e02), LL(0x4141194132588273), LL(0x8b8b168b2c9d0ba7), + LL(0xa7a7a6a7510153f6), LL(0x7d7de97dcf94fab2), LL(0x95956e95dcfb3749), LL(0xd8d847d88e9fad56), + LL(0xfbfbcbfb8b30eb70), LL(0xeeee9fee2371c1cd), LL(0x7c7ced7cc791f8bb), LL(0x6666856617e3cc71), + LL(0xdddd53dda68ea77b), LL(0x17175c17b84b2eaf), LL(0x4747014702468e45), LL(0x9e9e429e84dc211a), + LL(0xcaca0fca1ec589d4), LL(0x2d2db42d75995a58), LL(0xbfbfc6bf9179632e), LL(0x07071c07381b0e3f), + LL(0xadad8ead012347ac), LL(0x5a5a755aea2fb4b0), LL(0x838336836cb51bef), LL(0x3333cc3385ff66b6), + LL(0x636391633ff2c65c), LL(0x02020802100a0412), LL(0xaaaa92aa39384993), LL(0x7171d971afa8e2de), + LL(0xc8c807c80ecf8dc6), LL(0x19196419c87d32d1), LL(0x494939497270923b), LL(0xd9d943d9869aaf5f), + LL(0xf2f2eff2c31df931), LL(0xe3e3abe34b48dba8), LL(0x5b5b715be22ab6b9), LL(0x88881a8834920dbc), + LL(0x9a9a529aa4c8293e), LL(0x262698262dbe4c0b), LL(0x3232c8328dfa64bf), LL(0xb0b0fab0e94a7d59), + LL(0xe9e983e91b6acff2), LL(0x0f0f3c0f78331e77), LL(0xd5d573d5e6a6b733), LL(0x80803a8074ba1df4), + LL(0xbebec2be997c6127), LL(0xcdcd13cd26de87eb), LL(0x3434d034bde46889), LL(0x48483d487a759032), + LL(0xffffdbffab24e354), LL(0x7a7af57af78ff48d), LL(0x90907a90f4ea3d64), LL(0x5f5f615fc23ebe9d), + LL(0x202080201da0403d), LL(0x6868bd6867d5d00f), LL(0x1a1a681ad07234ca), LL(0xaeae82ae192c41b7), + LL(0xb4b4eab4c95e757d), LL(0x54544d549a19a8ce), LL(0x93937693ece53b7f), LL(0x222288220daa442f), + LL(0x64648d6407e9c863), LL(0xf1f1e3f1db12ff2a), LL(0x7373d173bfa2e6cc), LL(0x12124812905a2482), + LL(0x40401d403a5d807a), LL(0x0808200840281048), LL(0xc3c32bc356e89b95), LL(0xecec97ec337bc5df), + LL(0xdbdb4bdb9690ab4d), LL(0xa1a1bea1611f5fc0), LL(0x8d8d0e8d1c830791), LL(0x3d3df43df5c97ac8), + LL(0x97976697ccf1335b), LL(0x0000000000000000), LL(0xcfcf1bcf36d483f9), LL(0x2b2bac2b4587566e), + LL(0x7676c57697b3ece1), LL(0x8282328264b019e6), LL(0xd6d67fd6fea9b128), LL(0x1b1b6c1bd87736c3), + LL(0xb5b5eeb5c15b7774), LL(0xafaf86af112943be), LL(0x6a6ab56a77dfd41d), LL(0x50505d50ba0da0ea), + LL(0x45450945124c8a57), LL(0xf3f3ebf3cb18fb38), LL(0x3030c0309df060ad), LL(0xefef9bef2b74c3c4), + LL(0x3f3ffc3fe5c37eda), LL(0x55554955921caac7), LL(0xa2a2b2a2791059db), LL(0xeaea8fea0365c9e9), + LL(0x656589650fecca6a), LL(0xbabad2bab9686903), LL(0x2f2fbc2f65935e4a), LL(0xc0c027c04ee79d8e), + LL(0xdede5fdebe81a160), LL(0x1c1c701ce06c38fc), LL(0xfdfdd3fdbb2ee746), LL(0x4d4d294d52649a1f), + LL(0x92927292e4e03976), LL(0x7575c9758fbceafa), LL(0x06061806301e0c36), LL(0x8a8a128a249809ae), + LL(0xb2b2f2b2f940794b), LL(0xe6e6bfe66359d185), LL(0x0e0e380e70361c7e), LL(0x1f1f7c1ff8633ee7), + LL(0x6262956237f7c455), LL(0xd4d477d4eea3b53a), LL(0xa8a89aa829324d81), LL(0x96966296c4f43152), + LL(0xf9f9c3f99b3aef62), LL(0xc5c533c566f697a3), LL(0x2525942535b14a10), LL(0x59597959f220b2ab), + LL(0x84842a8454ae15d0), LL(0x7272d572b7a7e4c5), LL(0x3939e439d5dd72ec), LL(0x4c4c2d4c5a619816), + LL(0x5e5e655eca3bbc94), LL(0x7878fd78e785f09f), LL(0x3838e038ddd870e5), LL(0x8c8c0a8c14860598), + LL(0xd1d163d1c6b2bf17), LL(0xa5a5aea5410b57e4), LL(0xe2e2afe2434dd9a1), LL(0x616199612ff8c24e), + LL(0xb3b3f6b3f1457b42), LL(0x2121842115a54234), LL(0x9c9c4a9c94d62508), LL(0x1e1e781ef0663cee), + LL(0x4343114322528661), LL(0xc7c73bc776fc93b1), LL(0xfcfcd7fcb32be54f), LL(0x0404100420140824), + LL(0x51515951b208a2e3), LL(0x99995e99bcc72f25), LL(0x6d6da96d4fc4da22), LL(0x0d0d340d68391a65), + LL(0xfafacffa8335e979), LL(0xdfdf5bdfb684a369), LL(0x7e7ee57ed79bfca9), LL(0x242490243db44819), + LL(0x3b3bec3bc5d776fe), LL(0xabab96ab313d4b9a), LL(0xcece1fce3ed181f0), LL(0x1111441188552299), + LL(0x8f8f068f0c890383), LL(0x4e4e254e4a6b9c04), LL(0xb7b7e6b7d1517366), LL(0xebeb8beb0b60cbe0), + LL(0x3c3cf03cfdcc78c1), LL(0x81813e817cbf1ffd), LL(0x94946a94d4fe3540), LL(0xf7f7fbf7eb0cf31c), + LL(0xb9b9deb9a1676f18), LL(0x13134c13985f268b), LL(0x2c2cb02c7d9c5851), LL(0xd3d36bd3d6b8bb05), + LL(0xe7e7bbe76b5cd38c), LL(0x6e6ea56e57cbdc39), LL(0xc4c437c46ef395aa), LL(0x03030c03180f061b), + LL(0x565645568a13acdc), LL(0x44440d441a49885e), LL(0x7f7fe17fdf9efea0), LL(0xa9a99ea921374f88), + LL(0x2a2aa82a4d825467), LL(0xbbbbd6bbb16d6b0a), LL(0xc1c123c146e29f87), LL(0x53535153a202a6f1), + LL(0xdcdc57dcae8ba572), LL(0x0b0b2c0b58271653), LL(0x9d9d4e9d9cd32701), LL(0x6c6cad6c47c1d82b), + LL(0x3131c43195f562a4), LL(0x7474cd7487b9e8f3), LL(0xf6f6fff6e309f115), LL(0x464605460a438c4c), + LL(0xacac8aac092645a5), LL(0x89891e893c970fb5), LL(0x14145014a04428b4), LL(0xe1e1a3e15b42dfba), + LL(0x16165816b04e2ca6), LL(0x3a3ae83acdd274f7), LL(0x6969b9696fd0d206), LL(0x09092409482d1241), + LL(0x7070dd70a7ade0d7), LL(0xb6b6e2b6d954716f), LL(0xd0d067d0ceb7bd1e), LL(0xeded93ed3b7ec7d6), + LL(0xcccc17cc2edb85e2), LL(0x424215422a578468), LL(0x98985a98b4c22d2c), LL(0xa4a4aaa4490e55ed), + LL(0x2828a0285d885075), LL(0x5c5c6d5cda31b886), LL(0xf8f8c7f8933fed6b), LL(0x8686228644a411c2), +}; + +static const u64 C1[256] = { + LL(0xd818186018c07830), LL(0x2623238c2305af46), LL(0xb8c6c63fc67ef991), LL(0xfbe8e887e8136fcd), + LL(0xcb878726874ca113), LL(0x11b8b8dab8a9626d), LL(0x0901010401080502), LL(0x0d4f4f214f426e9e), + LL(0x9b3636d836adee6c), LL(0xffa6a6a2a6590451), LL(0x0cd2d26fd2debdb9), LL(0x0ef5f5f3f5fb06f7), + LL(0x967979f979ef80f2), LL(0x306f6fa16f5fcede), LL(0x6d91917e91fcef3f), LL(0xf852525552aa07a4), + LL(0x4760609d6027fdc0), LL(0x35bcbccabc897665), LL(0x379b9b569baccd2b), LL(0x8a8e8e028e048c01), + LL(0xd2a3a3b6a371155b), LL(0x6c0c0c300c603c18), LL(0x847b7bf17bff8af6), LL(0x803535d435b5e16a), + LL(0xf51d1d741de8693a), LL(0xb3e0e0a7e05347dd), LL(0x21d7d77bd7f6acb3), LL(0x9cc2c22fc25eed99), + LL(0x432e2eb82e6d965c), LL(0x294b4b314b627a96), LL(0x5dfefedffea321e1), LL(0xd5575741578216ae), + LL(0xbd15155415a8412a), LL(0xe87777c1779fb6ee), LL(0x923737dc37a5eb6e), LL(0x9ee5e5b3e57b56d7), + LL(0x139f9f469f8cd923), LL(0x23f0f0e7f0d317fd), LL(0x204a4a354a6a7f94), LL(0x44dada4fda9e95a9), + LL(0xa258587d58fa25b0), LL(0xcfc9c903c906ca8f), LL(0x7c2929a429558d52), LL(0x5a0a0a280a502214), + LL(0x50b1b1feb1e14f7f), LL(0xc9a0a0baa0691a5d), LL(0x146b6bb16b7fdad6), LL(0xd985852e855cab17), + LL(0x3cbdbdcebd817367), LL(0x8f5d5d695dd234ba), LL(0x9010104010805020), LL(0x07f4f4f7f4f303f5), + LL(0xddcbcb0bcb16c08b), LL(0xd33e3ef83eedc67c), LL(0x2d0505140528110a), LL(0x78676781671fe6ce), + LL(0x97e4e4b7e47353d5), LL(0x0227279c2725bb4e), LL(0x7341411941325882), LL(0xa78b8b168b2c9d0b), + LL(0xf6a7a7a6a7510153), LL(0xb27d7de97dcf94fa), LL(0x4995956e95dcfb37), LL(0x56d8d847d88e9fad), + LL(0x70fbfbcbfb8b30eb), LL(0xcdeeee9fee2371c1), LL(0xbb7c7ced7cc791f8), LL(0x716666856617e3cc), + LL(0x7bdddd53dda68ea7), LL(0xaf17175c17b84b2e), LL(0x454747014702468e), LL(0x1a9e9e429e84dc21), + LL(0xd4caca0fca1ec589), LL(0x582d2db42d75995a), LL(0x2ebfbfc6bf917963), LL(0x3f07071c07381b0e), + LL(0xacadad8ead012347), LL(0xb05a5a755aea2fb4), LL(0xef838336836cb51b), LL(0xb63333cc3385ff66), + LL(0x5c636391633ff2c6), LL(0x1202020802100a04), LL(0x93aaaa92aa393849), LL(0xde7171d971afa8e2), + LL(0xc6c8c807c80ecf8d), LL(0xd119196419c87d32), LL(0x3b49493949727092), LL(0x5fd9d943d9869aaf), + LL(0x31f2f2eff2c31df9), LL(0xa8e3e3abe34b48db), LL(0xb95b5b715be22ab6), LL(0xbc88881a8834920d), + LL(0x3e9a9a529aa4c829), LL(0x0b262698262dbe4c), LL(0xbf3232c8328dfa64), LL(0x59b0b0fab0e94a7d), + LL(0xf2e9e983e91b6acf), LL(0x770f0f3c0f78331e), LL(0x33d5d573d5e6a6b7), LL(0xf480803a8074ba1d), + LL(0x27bebec2be997c61), LL(0xebcdcd13cd26de87), LL(0x893434d034bde468), LL(0x3248483d487a7590), + LL(0x54ffffdbffab24e3), LL(0x8d7a7af57af78ff4), LL(0x6490907a90f4ea3d), LL(0x9d5f5f615fc23ebe), + LL(0x3d202080201da040), LL(0x0f6868bd6867d5d0), LL(0xca1a1a681ad07234), LL(0xb7aeae82ae192c41), + LL(0x7db4b4eab4c95e75), LL(0xce54544d549a19a8), LL(0x7f93937693ece53b), LL(0x2f222288220daa44), + LL(0x6364648d6407e9c8), LL(0x2af1f1e3f1db12ff), LL(0xcc7373d173bfa2e6), LL(0x8212124812905a24), + LL(0x7a40401d403a5d80), LL(0x4808082008402810), LL(0x95c3c32bc356e89b), LL(0xdfecec97ec337bc5), + LL(0x4ddbdb4bdb9690ab), LL(0xc0a1a1bea1611f5f), LL(0x918d8d0e8d1c8307), LL(0xc83d3df43df5c97a), + LL(0x5b97976697ccf133), LL(0x0000000000000000), LL(0xf9cfcf1bcf36d483), LL(0x6e2b2bac2b458756), + LL(0xe17676c57697b3ec), LL(0xe68282328264b019), LL(0x28d6d67fd6fea9b1), LL(0xc31b1b6c1bd87736), + LL(0x74b5b5eeb5c15b77), LL(0xbeafaf86af112943), LL(0x1d6a6ab56a77dfd4), LL(0xea50505d50ba0da0), + LL(0x5745450945124c8a), LL(0x38f3f3ebf3cb18fb), LL(0xad3030c0309df060), LL(0xc4efef9bef2b74c3), + LL(0xda3f3ffc3fe5c37e), LL(0xc755554955921caa), LL(0xdba2a2b2a2791059), LL(0xe9eaea8fea0365c9), + LL(0x6a656589650fecca), LL(0x03babad2bab96869), LL(0x4a2f2fbc2f65935e), LL(0x8ec0c027c04ee79d), + LL(0x60dede5fdebe81a1), LL(0xfc1c1c701ce06c38), LL(0x46fdfdd3fdbb2ee7), LL(0x1f4d4d294d52649a), + LL(0x7692927292e4e039), LL(0xfa7575c9758fbcea), LL(0x3606061806301e0c), LL(0xae8a8a128a249809), + LL(0x4bb2b2f2b2f94079), LL(0x85e6e6bfe66359d1), LL(0x7e0e0e380e70361c), LL(0xe71f1f7c1ff8633e), + LL(0x556262956237f7c4), LL(0x3ad4d477d4eea3b5), LL(0x81a8a89aa829324d), LL(0x5296966296c4f431), + LL(0x62f9f9c3f99b3aef), LL(0xa3c5c533c566f697), LL(0x102525942535b14a), LL(0xab59597959f220b2), + LL(0xd084842a8454ae15), LL(0xc57272d572b7a7e4), LL(0xec3939e439d5dd72), LL(0x164c4c2d4c5a6198), + LL(0x945e5e655eca3bbc), LL(0x9f7878fd78e785f0), LL(0xe53838e038ddd870), LL(0x988c8c0a8c148605), + LL(0x17d1d163d1c6b2bf), LL(0xe4a5a5aea5410b57), LL(0xa1e2e2afe2434dd9), LL(0x4e616199612ff8c2), + LL(0x42b3b3f6b3f1457b), LL(0x342121842115a542), LL(0x089c9c4a9c94d625), LL(0xee1e1e781ef0663c), + LL(0x6143431143225286), LL(0xb1c7c73bc776fc93), LL(0x4ffcfcd7fcb32be5), LL(0x2404041004201408), + LL(0xe351515951b208a2), LL(0x2599995e99bcc72f), LL(0x226d6da96d4fc4da), LL(0x650d0d340d68391a), + LL(0x79fafacffa8335e9), LL(0x69dfdf5bdfb684a3), LL(0xa97e7ee57ed79bfc), LL(0x19242490243db448), + LL(0xfe3b3bec3bc5d776), LL(0x9aabab96ab313d4b), LL(0xf0cece1fce3ed181), LL(0x9911114411885522), + LL(0x838f8f068f0c8903), LL(0x044e4e254e4a6b9c), LL(0x66b7b7e6b7d15173), LL(0xe0ebeb8beb0b60cb), + LL(0xc13c3cf03cfdcc78), LL(0xfd81813e817cbf1f), LL(0x4094946a94d4fe35), LL(0x1cf7f7fbf7eb0cf3), + LL(0x18b9b9deb9a1676f), LL(0x8b13134c13985f26), LL(0x512c2cb02c7d9c58), LL(0x05d3d36bd3d6b8bb), + LL(0x8ce7e7bbe76b5cd3), LL(0x396e6ea56e57cbdc), LL(0xaac4c437c46ef395), LL(0x1b03030c03180f06), + LL(0xdc565645568a13ac), LL(0x5e44440d441a4988), LL(0xa07f7fe17fdf9efe), LL(0x88a9a99ea921374f), + LL(0x672a2aa82a4d8254), LL(0x0abbbbd6bbb16d6b), LL(0x87c1c123c146e29f), LL(0xf153535153a202a6), + LL(0x72dcdc57dcae8ba5), LL(0x530b0b2c0b582716), LL(0x019d9d4e9d9cd327), LL(0x2b6c6cad6c47c1d8), + LL(0xa43131c43195f562), LL(0xf37474cd7487b9e8), LL(0x15f6f6fff6e309f1), LL(0x4c464605460a438c), + LL(0xa5acac8aac092645), LL(0xb589891e893c970f), LL(0xb414145014a04428), LL(0xbae1e1a3e15b42df), + LL(0xa616165816b04e2c), LL(0xf73a3ae83acdd274), LL(0x066969b9696fd0d2), LL(0x4109092409482d12), + LL(0xd77070dd70a7ade0), LL(0x6fb6b6e2b6d95471), LL(0x1ed0d067d0ceb7bd), LL(0xd6eded93ed3b7ec7), + LL(0xe2cccc17cc2edb85), LL(0x68424215422a5784), LL(0x2c98985a98b4c22d), LL(0xeda4a4aaa4490e55), + LL(0x752828a0285d8850), LL(0x865c5c6d5cda31b8), LL(0x6bf8f8c7f8933fed), LL(0xc28686228644a411), +}; + +static const u64 C2[256] = { + LL(0x30d818186018c078), LL(0x462623238c2305af), LL(0x91b8c6c63fc67ef9), LL(0xcdfbe8e887e8136f), + LL(0x13cb878726874ca1), LL(0x6d11b8b8dab8a962), LL(0x0209010104010805), LL(0x9e0d4f4f214f426e), + LL(0x6c9b3636d836adee), LL(0x51ffa6a6a2a65904), LL(0xb90cd2d26fd2debd), LL(0xf70ef5f5f3f5fb06), + LL(0xf2967979f979ef80), LL(0xde306f6fa16f5fce), LL(0x3f6d91917e91fcef), LL(0xa4f852525552aa07), + LL(0xc04760609d6027fd), LL(0x6535bcbccabc8976), LL(0x2b379b9b569baccd), LL(0x018a8e8e028e048c), + LL(0x5bd2a3a3b6a37115), LL(0x186c0c0c300c603c), LL(0xf6847b7bf17bff8a), LL(0x6a803535d435b5e1), + LL(0x3af51d1d741de869), LL(0xddb3e0e0a7e05347), LL(0xb321d7d77bd7f6ac), LL(0x999cc2c22fc25eed), + LL(0x5c432e2eb82e6d96), LL(0x96294b4b314b627a), LL(0xe15dfefedffea321), LL(0xaed5575741578216), + LL(0x2abd15155415a841), LL(0xeee87777c1779fb6), LL(0x6e923737dc37a5eb), LL(0xd79ee5e5b3e57b56), + LL(0x23139f9f469f8cd9), LL(0xfd23f0f0e7f0d317), LL(0x94204a4a354a6a7f), LL(0xa944dada4fda9e95), + LL(0xb0a258587d58fa25), LL(0x8fcfc9c903c906ca), LL(0x527c2929a429558d), LL(0x145a0a0a280a5022), + LL(0x7f50b1b1feb1e14f), LL(0x5dc9a0a0baa0691a), LL(0xd6146b6bb16b7fda), LL(0x17d985852e855cab), + LL(0x673cbdbdcebd8173), LL(0xba8f5d5d695dd234), LL(0x2090101040108050), LL(0xf507f4f4f7f4f303), + LL(0x8bddcbcb0bcb16c0), LL(0x7cd33e3ef83eedc6), LL(0x0a2d050514052811), LL(0xce78676781671fe6), + LL(0xd597e4e4b7e47353), LL(0x4e0227279c2725bb), LL(0x8273414119413258), LL(0x0ba78b8b168b2c9d), + LL(0x53f6a7a7a6a75101), LL(0xfab27d7de97dcf94), LL(0x374995956e95dcfb), LL(0xad56d8d847d88e9f), + LL(0xeb70fbfbcbfb8b30), LL(0xc1cdeeee9fee2371), LL(0xf8bb7c7ced7cc791), LL(0xcc716666856617e3), + LL(0xa77bdddd53dda68e), LL(0x2eaf17175c17b84b), LL(0x8e45474701470246), LL(0x211a9e9e429e84dc), + LL(0x89d4caca0fca1ec5), LL(0x5a582d2db42d7599), LL(0x632ebfbfc6bf9179), LL(0x0e3f07071c07381b), + LL(0x47acadad8ead0123), LL(0xb4b05a5a755aea2f), LL(0x1bef838336836cb5), LL(0x66b63333cc3385ff), + LL(0xc65c636391633ff2), LL(0x041202020802100a), LL(0x4993aaaa92aa3938), LL(0xe2de7171d971afa8), + LL(0x8dc6c8c807c80ecf), LL(0x32d119196419c87d), LL(0x923b494939497270), LL(0xaf5fd9d943d9869a), + LL(0xf931f2f2eff2c31d), LL(0xdba8e3e3abe34b48), LL(0xb6b95b5b715be22a), LL(0x0dbc88881a883492), + LL(0x293e9a9a529aa4c8), LL(0x4c0b262698262dbe), LL(0x64bf3232c8328dfa), LL(0x7d59b0b0fab0e94a), + LL(0xcff2e9e983e91b6a), LL(0x1e770f0f3c0f7833), LL(0xb733d5d573d5e6a6), LL(0x1df480803a8074ba), + LL(0x6127bebec2be997c), LL(0x87ebcdcd13cd26de), LL(0x68893434d034bde4), LL(0x903248483d487a75), + LL(0xe354ffffdbffab24), LL(0xf48d7a7af57af78f), LL(0x3d6490907a90f4ea), LL(0xbe9d5f5f615fc23e), + LL(0x403d202080201da0), LL(0xd00f6868bd6867d5), LL(0x34ca1a1a681ad072), LL(0x41b7aeae82ae192c), + LL(0x757db4b4eab4c95e), LL(0xa8ce54544d549a19), LL(0x3b7f93937693ece5), LL(0x442f222288220daa), + LL(0xc86364648d6407e9), LL(0xff2af1f1e3f1db12), LL(0xe6cc7373d173bfa2), LL(0x248212124812905a), + LL(0x807a40401d403a5d), LL(0x1048080820084028), LL(0x9b95c3c32bc356e8), LL(0xc5dfecec97ec337b), + LL(0xab4ddbdb4bdb9690), LL(0x5fc0a1a1bea1611f), LL(0x07918d8d0e8d1c83), LL(0x7ac83d3df43df5c9), + LL(0x335b97976697ccf1), LL(0x0000000000000000), LL(0x83f9cfcf1bcf36d4), LL(0x566e2b2bac2b4587), + LL(0xece17676c57697b3), LL(0x19e68282328264b0), LL(0xb128d6d67fd6fea9), LL(0x36c31b1b6c1bd877), + LL(0x7774b5b5eeb5c15b), LL(0x43beafaf86af1129), LL(0xd41d6a6ab56a77df), LL(0xa0ea50505d50ba0d), + LL(0x8a5745450945124c), LL(0xfb38f3f3ebf3cb18), LL(0x60ad3030c0309df0), LL(0xc3c4efef9bef2b74), + LL(0x7eda3f3ffc3fe5c3), LL(0xaac755554955921c), LL(0x59dba2a2b2a27910), LL(0xc9e9eaea8fea0365), + LL(0xca6a656589650fec), LL(0x6903babad2bab968), LL(0x5e4a2f2fbc2f6593), LL(0x9d8ec0c027c04ee7), + LL(0xa160dede5fdebe81), LL(0x38fc1c1c701ce06c), LL(0xe746fdfdd3fdbb2e), LL(0x9a1f4d4d294d5264), + LL(0x397692927292e4e0), LL(0xeafa7575c9758fbc), LL(0x0c3606061806301e), LL(0x09ae8a8a128a2498), + LL(0x794bb2b2f2b2f940), LL(0xd185e6e6bfe66359), LL(0x1c7e0e0e380e7036), LL(0x3ee71f1f7c1ff863), + LL(0xc4556262956237f7), LL(0xb53ad4d477d4eea3), LL(0x4d81a8a89aa82932), LL(0x315296966296c4f4), + LL(0xef62f9f9c3f99b3a), LL(0x97a3c5c533c566f6), LL(0x4a102525942535b1), LL(0xb2ab59597959f220), + LL(0x15d084842a8454ae), LL(0xe4c57272d572b7a7), LL(0x72ec3939e439d5dd), LL(0x98164c4c2d4c5a61), + LL(0xbc945e5e655eca3b), LL(0xf09f7878fd78e785), LL(0x70e53838e038ddd8), LL(0x05988c8c0a8c1486), + LL(0xbf17d1d163d1c6b2), LL(0x57e4a5a5aea5410b), LL(0xd9a1e2e2afe2434d), LL(0xc24e616199612ff8), + LL(0x7b42b3b3f6b3f145), LL(0x42342121842115a5), LL(0x25089c9c4a9c94d6), LL(0x3cee1e1e781ef066), + LL(0x8661434311432252), LL(0x93b1c7c73bc776fc), LL(0xe54ffcfcd7fcb32b), LL(0x0824040410042014), + LL(0xa2e351515951b208), LL(0x2f2599995e99bcc7), LL(0xda226d6da96d4fc4), LL(0x1a650d0d340d6839), + LL(0xe979fafacffa8335), LL(0xa369dfdf5bdfb684), LL(0xfca97e7ee57ed79b), LL(0x4819242490243db4), + LL(0x76fe3b3bec3bc5d7), LL(0x4b9aabab96ab313d), LL(0x81f0cece1fce3ed1), LL(0x2299111144118855), + LL(0x03838f8f068f0c89), LL(0x9c044e4e254e4a6b), LL(0x7366b7b7e6b7d151), LL(0xcbe0ebeb8beb0b60), + LL(0x78c13c3cf03cfdcc), LL(0x1ffd81813e817cbf), LL(0x354094946a94d4fe), LL(0xf31cf7f7fbf7eb0c), + LL(0x6f18b9b9deb9a167), LL(0x268b13134c13985f), LL(0x58512c2cb02c7d9c), LL(0xbb05d3d36bd3d6b8), + LL(0xd38ce7e7bbe76b5c), LL(0xdc396e6ea56e57cb), LL(0x95aac4c437c46ef3), LL(0x061b03030c03180f), + LL(0xacdc565645568a13), LL(0x885e44440d441a49), LL(0xfea07f7fe17fdf9e), LL(0x4f88a9a99ea92137), + LL(0x54672a2aa82a4d82), LL(0x6b0abbbbd6bbb16d), LL(0x9f87c1c123c146e2), LL(0xa6f153535153a202), + LL(0xa572dcdc57dcae8b), LL(0x16530b0b2c0b5827), LL(0x27019d9d4e9d9cd3), LL(0xd82b6c6cad6c47c1), + LL(0x62a43131c43195f5), LL(0xe8f37474cd7487b9), LL(0xf115f6f6fff6e309), LL(0x8c4c464605460a43), + LL(0x45a5acac8aac0926), LL(0x0fb589891e893c97), LL(0x28b414145014a044), LL(0xdfbae1e1a3e15b42), + LL(0x2ca616165816b04e), LL(0x74f73a3ae83acdd2), LL(0xd2066969b9696fd0), LL(0x124109092409482d), + LL(0xe0d77070dd70a7ad), LL(0x716fb6b6e2b6d954), LL(0xbd1ed0d067d0ceb7), LL(0xc7d6eded93ed3b7e), + LL(0x85e2cccc17cc2edb), LL(0x8468424215422a57), LL(0x2d2c98985a98b4c2), LL(0x55eda4a4aaa4490e), + LL(0x50752828a0285d88), LL(0xb8865c5c6d5cda31), LL(0xed6bf8f8c7f8933f), LL(0x11c28686228644a4), +}; + +static const u64 C3[256] = { + LL(0x7830d818186018c0), LL(0xaf462623238c2305), LL(0xf991b8c6c63fc67e), LL(0x6fcdfbe8e887e813), + LL(0xa113cb878726874c), LL(0x626d11b8b8dab8a9), LL(0x0502090101040108), LL(0x6e9e0d4f4f214f42), + LL(0xee6c9b3636d836ad), LL(0x0451ffa6a6a2a659), LL(0xbdb90cd2d26fd2de), LL(0x06f70ef5f5f3f5fb), + LL(0x80f2967979f979ef), LL(0xcede306f6fa16f5f), LL(0xef3f6d91917e91fc), LL(0x07a4f852525552aa), + LL(0xfdc04760609d6027), LL(0x766535bcbccabc89), LL(0xcd2b379b9b569bac), LL(0x8c018a8e8e028e04), + LL(0x155bd2a3a3b6a371), LL(0x3c186c0c0c300c60), LL(0x8af6847b7bf17bff), LL(0xe16a803535d435b5), + LL(0x693af51d1d741de8), LL(0x47ddb3e0e0a7e053), LL(0xacb321d7d77bd7f6), LL(0xed999cc2c22fc25e), + LL(0x965c432e2eb82e6d), LL(0x7a96294b4b314b62), LL(0x21e15dfefedffea3), LL(0x16aed55757415782), + LL(0x412abd15155415a8), LL(0xb6eee87777c1779f), LL(0xeb6e923737dc37a5), LL(0x56d79ee5e5b3e57b), + LL(0xd923139f9f469f8c), LL(0x17fd23f0f0e7f0d3), LL(0x7f94204a4a354a6a), LL(0x95a944dada4fda9e), + LL(0x25b0a258587d58fa), LL(0xca8fcfc9c903c906), LL(0x8d527c2929a42955), LL(0x22145a0a0a280a50), + LL(0x4f7f50b1b1feb1e1), LL(0x1a5dc9a0a0baa069), LL(0xdad6146b6bb16b7f), LL(0xab17d985852e855c), + LL(0x73673cbdbdcebd81), LL(0x34ba8f5d5d695dd2), LL(0x5020901010401080), LL(0x03f507f4f4f7f4f3), + LL(0xc08bddcbcb0bcb16), LL(0xc67cd33e3ef83eed), LL(0x110a2d0505140528), LL(0xe6ce78676781671f), + LL(0x53d597e4e4b7e473), LL(0xbb4e0227279c2725), LL(0x5882734141194132), LL(0x9d0ba78b8b168b2c), + LL(0x0153f6a7a7a6a751), LL(0x94fab27d7de97dcf), LL(0xfb374995956e95dc), LL(0x9fad56d8d847d88e), + LL(0x30eb70fbfbcbfb8b), LL(0x71c1cdeeee9fee23), LL(0x91f8bb7c7ced7cc7), LL(0xe3cc716666856617), + LL(0x8ea77bdddd53dda6), LL(0x4b2eaf17175c17b8), LL(0x468e454747014702), LL(0xdc211a9e9e429e84), + LL(0xc589d4caca0fca1e), LL(0x995a582d2db42d75), LL(0x79632ebfbfc6bf91), LL(0x1b0e3f07071c0738), + LL(0x2347acadad8ead01), LL(0x2fb4b05a5a755aea), LL(0xb51bef838336836c), LL(0xff66b63333cc3385), + LL(0xf2c65c636391633f), LL(0x0a04120202080210), LL(0x384993aaaa92aa39), LL(0xa8e2de7171d971af), + LL(0xcf8dc6c8c807c80e), LL(0x7d32d119196419c8), LL(0x70923b4949394972), LL(0x9aaf5fd9d943d986), + LL(0x1df931f2f2eff2c3), LL(0x48dba8e3e3abe34b), LL(0x2ab6b95b5b715be2), LL(0x920dbc88881a8834), + LL(0xc8293e9a9a529aa4), LL(0xbe4c0b262698262d), LL(0xfa64bf3232c8328d), LL(0x4a7d59b0b0fab0e9), + LL(0x6acff2e9e983e91b), LL(0x331e770f0f3c0f78), LL(0xa6b733d5d573d5e6), LL(0xba1df480803a8074), + LL(0x7c6127bebec2be99), LL(0xde87ebcdcd13cd26), LL(0xe468893434d034bd), LL(0x75903248483d487a), + LL(0x24e354ffffdbffab), LL(0x8ff48d7a7af57af7), LL(0xea3d6490907a90f4), LL(0x3ebe9d5f5f615fc2), + LL(0xa0403d202080201d), LL(0xd5d00f6868bd6867), LL(0x7234ca1a1a681ad0), LL(0x2c41b7aeae82ae19), + LL(0x5e757db4b4eab4c9), LL(0x19a8ce54544d549a), LL(0xe53b7f93937693ec), LL(0xaa442f222288220d), + LL(0xe9c86364648d6407), LL(0x12ff2af1f1e3f1db), LL(0xa2e6cc7373d173bf), LL(0x5a24821212481290), + LL(0x5d807a40401d403a), LL(0x2810480808200840), LL(0xe89b95c3c32bc356), LL(0x7bc5dfecec97ec33), + LL(0x90ab4ddbdb4bdb96), LL(0x1f5fc0a1a1bea161), LL(0x8307918d8d0e8d1c), LL(0xc97ac83d3df43df5), + LL(0xf1335b97976697cc), LL(0x0000000000000000), LL(0xd483f9cfcf1bcf36), LL(0x87566e2b2bac2b45), + LL(0xb3ece17676c57697), LL(0xb019e68282328264), LL(0xa9b128d6d67fd6fe), LL(0x7736c31b1b6c1bd8), + LL(0x5b7774b5b5eeb5c1), LL(0x2943beafaf86af11), LL(0xdfd41d6a6ab56a77), LL(0x0da0ea50505d50ba), + LL(0x4c8a574545094512), LL(0x18fb38f3f3ebf3cb), LL(0xf060ad3030c0309d), LL(0x74c3c4efef9bef2b), + LL(0xc37eda3f3ffc3fe5), LL(0x1caac75555495592), LL(0x1059dba2a2b2a279), LL(0x65c9e9eaea8fea03), + LL(0xecca6a656589650f), LL(0x686903babad2bab9), LL(0x935e4a2f2fbc2f65), LL(0xe79d8ec0c027c04e), + LL(0x81a160dede5fdebe), LL(0x6c38fc1c1c701ce0), LL(0x2ee746fdfdd3fdbb), LL(0x649a1f4d4d294d52), + LL(0xe0397692927292e4), LL(0xbceafa7575c9758f), LL(0x1e0c360606180630), LL(0x9809ae8a8a128a24), + LL(0x40794bb2b2f2b2f9), LL(0x59d185e6e6bfe663), LL(0x361c7e0e0e380e70), LL(0x633ee71f1f7c1ff8), + LL(0xf7c4556262956237), LL(0xa3b53ad4d477d4ee), LL(0x324d81a8a89aa829), LL(0xf4315296966296c4), + LL(0x3aef62f9f9c3f99b), LL(0xf697a3c5c533c566), LL(0xb14a102525942535), LL(0x20b2ab59597959f2), + LL(0xae15d084842a8454), LL(0xa7e4c57272d572b7), LL(0xdd72ec3939e439d5), LL(0x6198164c4c2d4c5a), + LL(0x3bbc945e5e655eca), LL(0x85f09f7878fd78e7), LL(0xd870e53838e038dd), LL(0x8605988c8c0a8c14), + LL(0xb2bf17d1d163d1c6), LL(0x0b57e4a5a5aea541), LL(0x4dd9a1e2e2afe243), LL(0xf8c24e616199612f), + LL(0x457b42b3b3f6b3f1), LL(0xa542342121842115), LL(0xd625089c9c4a9c94), LL(0x663cee1e1e781ef0), + LL(0x5286614343114322), LL(0xfc93b1c7c73bc776), LL(0x2be54ffcfcd7fcb3), LL(0x1408240404100420), + LL(0x08a2e351515951b2), LL(0xc72f2599995e99bc), LL(0xc4da226d6da96d4f), LL(0x391a650d0d340d68), + LL(0x35e979fafacffa83), LL(0x84a369dfdf5bdfb6), LL(0x9bfca97e7ee57ed7), LL(0xb44819242490243d), + LL(0xd776fe3b3bec3bc5), LL(0x3d4b9aabab96ab31), LL(0xd181f0cece1fce3e), LL(0x5522991111441188), + LL(0x8903838f8f068f0c), LL(0x6b9c044e4e254e4a), LL(0x517366b7b7e6b7d1), LL(0x60cbe0ebeb8beb0b), + LL(0xcc78c13c3cf03cfd), LL(0xbf1ffd81813e817c), LL(0xfe354094946a94d4), LL(0x0cf31cf7f7fbf7eb), + LL(0x676f18b9b9deb9a1), LL(0x5f268b13134c1398), LL(0x9c58512c2cb02c7d), LL(0xb8bb05d3d36bd3d6), + LL(0x5cd38ce7e7bbe76b), LL(0xcbdc396e6ea56e57), LL(0xf395aac4c437c46e), LL(0x0f061b03030c0318), + LL(0x13acdc565645568a), LL(0x49885e44440d441a), LL(0x9efea07f7fe17fdf), LL(0x374f88a9a99ea921), + LL(0x8254672a2aa82a4d), LL(0x6d6b0abbbbd6bbb1), LL(0xe29f87c1c123c146), LL(0x02a6f153535153a2), + LL(0x8ba572dcdc57dcae), LL(0x2716530b0b2c0b58), LL(0xd327019d9d4e9d9c), LL(0xc1d82b6c6cad6c47), + LL(0xf562a43131c43195), LL(0xb9e8f37474cd7487), LL(0x09f115f6f6fff6e3), LL(0x438c4c464605460a), + LL(0x2645a5acac8aac09), LL(0x970fb589891e893c), LL(0x4428b414145014a0), LL(0x42dfbae1e1a3e15b), + LL(0x4e2ca616165816b0), LL(0xd274f73a3ae83acd), LL(0xd0d2066969b9696f), LL(0x2d12410909240948), + LL(0xade0d77070dd70a7), LL(0x54716fb6b6e2b6d9), LL(0xb7bd1ed0d067d0ce), LL(0x7ec7d6eded93ed3b), + LL(0xdb85e2cccc17cc2e), LL(0x578468424215422a), LL(0xc22d2c98985a98b4), LL(0x0e55eda4a4aaa449), + LL(0x8850752828a0285d), LL(0x31b8865c5c6d5cda), LL(0x3fed6bf8f8c7f893), LL(0xa411c28686228644), +}; + +static const u64 C4[256] = { + LL(0xc07830d818186018), LL(0x05af462623238c23), LL(0x7ef991b8c6c63fc6), LL(0x136fcdfbe8e887e8), + LL(0x4ca113cb87872687), LL(0xa9626d11b8b8dab8), LL(0x0805020901010401), LL(0x426e9e0d4f4f214f), + LL(0xadee6c9b3636d836), LL(0x590451ffa6a6a2a6), LL(0xdebdb90cd2d26fd2), LL(0xfb06f70ef5f5f3f5), + LL(0xef80f2967979f979), LL(0x5fcede306f6fa16f), LL(0xfcef3f6d91917e91), LL(0xaa07a4f852525552), + LL(0x27fdc04760609d60), LL(0x89766535bcbccabc), LL(0xaccd2b379b9b569b), LL(0x048c018a8e8e028e), + LL(0x71155bd2a3a3b6a3), LL(0x603c186c0c0c300c), LL(0xff8af6847b7bf17b), LL(0xb5e16a803535d435), + LL(0xe8693af51d1d741d), LL(0x5347ddb3e0e0a7e0), LL(0xf6acb321d7d77bd7), LL(0x5eed999cc2c22fc2), + LL(0x6d965c432e2eb82e), LL(0x627a96294b4b314b), LL(0xa321e15dfefedffe), LL(0x8216aed557574157), + LL(0xa8412abd15155415), LL(0x9fb6eee87777c177), LL(0xa5eb6e923737dc37), LL(0x7b56d79ee5e5b3e5), + LL(0x8cd923139f9f469f), LL(0xd317fd23f0f0e7f0), LL(0x6a7f94204a4a354a), LL(0x9e95a944dada4fda), + LL(0xfa25b0a258587d58), LL(0x06ca8fcfc9c903c9), LL(0x558d527c2929a429), LL(0x5022145a0a0a280a), + LL(0xe14f7f50b1b1feb1), LL(0x691a5dc9a0a0baa0), LL(0x7fdad6146b6bb16b), LL(0x5cab17d985852e85), + LL(0x8173673cbdbdcebd), LL(0xd234ba8f5d5d695d), LL(0x8050209010104010), LL(0xf303f507f4f4f7f4), + LL(0x16c08bddcbcb0bcb), LL(0xedc67cd33e3ef83e), LL(0x28110a2d05051405), LL(0x1fe6ce7867678167), + LL(0x7353d597e4e4b7e4), LL(0x25bb4e0227279c27), LL(0x3258827341411941), LL(0x2c9d0ba78b8b168b), + LL(0x510153f6a7a7a6a7), LL(0xcf94fab27d7de97d), LL(0xdcfb374995956e95), LL(0x8e9fad56d8d847d8), + LL(0x8b30eb70fbfbcbfb), LL(0x2371c1cdeeee9fee), LL(0xc791f8bb7c7ced7c), LL(0x17e3cc7166668566), + LL(0xa68ea77bdddd53dd), LL(0xb84b2eaf17175c17), LL(0x02468e4547470147), LL(0x84dc211a9e9e429e), + LL(0x1ec589d4caca0fca), LL(0x75995a582d2db42d), LL(0x9179632ebfbfc6bf), LL(0x381b0e3f07071c07), + LL(0x012347acadad8ead), LL(0xea2fb4b05a5a755a), LL(0x6cb51bef83833683), LL(0x85ff66b63333cc33), + LL(0x3ff2c65c63639163), LL(0x100a041202020802), LL(0x39384993aaaa92aa), LL(0xafa8e2de7171d971), + LL(0x0ecf8dc6c8c807c8), LL(0xc87d32d119196419), LL(0x7270923b49493949), LL(0x869aaf5fd9d943d9), + LL(0xc31df931f2f2eff2), LL(0x4b48dba8e3e3abe3), LL(0xe22ab6b95b5b715b), LL(0x34920dbc88881a88), + LL(0xa4c8293e9a9a529a), LL(0x2dbe4c0b26269826), LL(0x8dfa64bf3232c832), LL(0xe94a7d59b0b0fab0), + LL(0x1b6acff2e9e983e9), LL(0x78331e770f0f3c0f), LL(0xe6a6b733d5d573d5), LL(0x74ba1df480803a80), + LL(0x997c6127bebec2be), LL(0x26de87ebcdcd13cd), LL(0xbde468893434d034), LL(0x7a75903248483d48), + LL(0xab24e354ffffdbff), LL(0xf78ff48d7a7af57a), LL(0xf4ea3d6490907a90), LL(0xc23ebe9d5f5f615f), + LL(0x1da0403d20208020), LL(0x67d5d00f6868bd68), LL(0xd07234ca1a1a681a), LL(0x192c41b7aeae82ae), + LL(0xc95e757db4b4eab4), LL(0x9a19a8ce54544d54), LL(0xece53b7f93937693), LL(0x0daa442f22228822), + LL(0x07e9c86364648d64), LL(0xdb12ff2af1f1e3f1), LL(0xbfa2e6cc7373d173), LL(0x905a248212124812), + LL(0x3a5d807a40401d40), LL(0x4028104808082008), LL(0x56e89b95c3c32bc3), LL(0x337bc5dfecec97ec), + LL(0x9690ab4ddbdb4bdb), LL(0x611f5fc0a1a1bea1), LL(0x1c8307918d8d0e8d), LL(0xf5c97ac83d3df43d), + LL(0xccf1335b97976697), LL(0x0000000000000000), LL(0x36d483f9cfcf1bcf), LL(0x4587566e2b2bac2b), + LL(0x97b3ece17676c576), LL(0x64b019e682823282), LL(0xfea9b128d6d67fd6), LL(0xd87736c31b1b6c1b), + LL(0xc15b7774b5b5eeb5), LL(0x112943beafaf86af), LL(0x77dfd41d6a6ab56a), LL(0xba0da0ea50505d50), + LL(0x124c8a5745450945), LL(0xcb18fb38f3f3ebf3), LL(0x9df060ad3030c030), LL(0x2b74c3c4efef9bef), + LL(0xe5c37eda3f3ffc3f), LL(0x921caac755554955), LL(0x791059dba2a2b2a2), LL(0x0365c9e9eaea8fea), + LL(0x0fecca6a65658965), LL(0xb9686903babad2ba), LL(0x65935e4a2f2fbc2f), LL(0x4ee79d8ec0c027c0), + LL(0xbe81a160dede5fde), LL(0xe06c38fc1c1c701c), LL(0xbb2ee746fdfdd3fd), LL(0x52649a1f4d4d294d), + LL(0xe4e0397692927292), LL(0x8fbceafa7575c975), LL(0x301e0c3606061806), LL(0x249809ae8a8a128a), + LL(0xf940794bb2b2f2b2), LL(0x6359d185e6e6bfe6), LL(0x70361c7e0e0e380e), LL(0xf8633ee71f1f7c1f), + LL(0x37f7c45562629562), LL(0xeea3b53ad4d477d4), LL(0x29324d81a8a89aa8), LL(0xc4f4315296966296), + LL(0x9b3aef62f9f9c3f9), LL(0x66f697a3c5c533c5), LL(0x35b14a1025259425), LL(0xf220b2ab59597959), + LL(0x54ae15d084842a84), LL(0xb7a7e4c57272d572), LL(0xd5dd72ec3939e439), LL(0x5a6198164c4c2d4c), + LL(0xca3bbc945e5e655e), LL(0xe785f09f7878fd78), LL(0xddd870e53838e038), LL(0x148605988c8c0a8c), + LL(0xc6b2bf17d1d163d1), LL(0x410b57e4a5a5aea5), LL(0x434dd9a1e2e2afe2), LL(0x2ff8c24e61619961), + LL(0xf1457b42b3b3f6b3), LL(0x15a5423421218421), LL(0x94d625089c9c4a9c), LL(0xf0663cee1e1e781e), + LL(0x2252866143431143), LL(0x76fc93b1c7c73bc7), LL(0xb32be54ffcfcd7fc), LL(0x2014082404041004), + LL(0xb208a2e351515951), LL(0xbcc72f2599995e99), LL(0x4fc4da226d6da96d), LL(0x68391a650d0d340d), + LL(0x8335e979fafacffa), LL(0xb684a369dfdf5bdf), LL(0xd79bfca97e7ee57e), LL(0x3db4481924249024), + LL(0xc5d776fe3b3bec3b), LL(0x313d4b9aabab96ab), LL(0x3ed181f0cece1fce), LL(0x8855229911114411), + LL(0x0c8903838f8f068f), LL(0x4a6b9c044e4e254e), LL(0xd1517366b7b7e6b7), LL(0x0b60cbe0ebeb8beb), + LL(0xfdcc78c13c3cf03c), LL(0x7cbf1ffd81813e81), LL(0xd4fe354094946a94), LL(0xeb0cf31cf7f7fbf7), + LL(0xa1676f18b9b9deb9), LL(0x985f268b13134c13), LL(0x7d9c58512c2cb02c), LL(0xd6b8bb05d3d36bd3), + LL(0x6b5cd38ce7e7bbe7), LL(0x57cbdc396e6ea56e), LL(0x6ef395aac4c437c4), LL(0x180f061b03030c03), + LL(0x8a13acdc56564556), LL(0x1a49885e44440d44), LL(0xdf9efea07f7fe17f), LL(0x21374f88a9a99ea9), + LL(0x4d8254672a2aa82a), LL(0xb16d6b0abbbbd6bb), LL(0x46e29f87c1c123c1), LL(0xa202a6f153535153), + LL(0xae8ba572dcdc57dc), LL(0x582716530b0b2c0b), LL(0x9cd327019d9d4e9d), LL(0x47c1d82b6c6cad6c), + LL(0x95f562a43131c431), LL(0x87b9e8f37474cd74), LL(0xe309f115f6f6fff6), LL(0x0a438c4c46460546), + LL(0x092645a5acac8aac), LL(0x3c970fb589891e89), LL(0xa04428b414145014), LL(0x5b42dfbae1e1a3e1), + LL(0xb04e2ca616165816), LL(0xcdd274f73a3ae83a), LL(0x6fd0d2066969b969), LL(0x482d124109092409), + LL(0xa7ade0d77070dd70), LL(0xd954716fb6b6e2b6), LL(0xceb7bd1ed0d067d0), LL(0x3b7ec7d6eded93ed), + LL(0x2edb85e2cccc17cc), LL(0x2a57846842421542), LL(0xb4c22d2c98985a98), LL(0x490e55eda4a4aaa4), + LL(0x5d8850752828a028), LL(0xda31b8865c5c6d5c), LL(0x933fed6bf8f8c7f8), LL(0x44a411c286862286), +}; + +static const u64 C5[256] = { + LL(0x18c07830d8181860), LL(0x2305af462623238c), LL(0xc67ef991b8c6c63f), LL(0xe8136fcdfbe8e887), + LL(0x874ca113cb878726), LL(0xb8a9626d11b8b8da), LL(0x0108050209010104), LL(0x4f426e9e0d4f4f21), + LL(0x36adee6c9b3636d8), LL(0xa6590451ffa6a6a2), LL(0xd2debdb90cd2d26f), LL(0xf5fb06f70ef5f5f3), + LL(0x79ef80f2967979f9), LL(0x6f5fcede306f6fa1), LL(0x91fcef3f6d91917e), LL(0x52aa07a4f8525255), + LL(0x6027fdc04760609d), LL(0xbc89766535bcbcca), LL(0x9baccd2b379b9b56), LL(0x8e048c018a8e8e02), + LL(0xa371155bd2a3a3b6), LL(0x0c603c186c0c0c30), LL(0x7bff8af6847b7bf1), LL(0x35b5e16a803535d4), + LL(0x1de8693af51d1d74), LL(0xe05347ddb3e0e0a7), LL(0xd7f6acb321d7d77b), LL(0xc25eed999cc2c22f), + LL(0x2e6d965c432e2eb8), LL(0x4b627a96294b4b31), LL(0xfea321e15dfefedf), LL(0x578216aed5575741), + LL(0x15a8412abd151554), LL(0x779fb6eee87777c1), LL(0x37a5eb6e923737dc), LL(0xe57b56d79ee5e5b3), + LL(0x9f8cd923139f9f46), LL(0xf0d317fd23f0f0e7), LL(0x4a6a7f94204a4a35), LL(0xda9e95a944dada4f), + LL(0x58fa25b0a258587d), LL(0xc906ca8fcfc9c903), LL(0x29558d527c2929a4), LL(0x0a5022145a0a0a28), + LL(0xb1e14f7f50b1b1fe), LL(0xa0691a5dc9a0a0ba), LL(0x6b7fdad6146b6bb1), LL(0x855cab17d985852e), + LL(0xbd8173673cbdbdce), LL(0x5dd234ba8f5d5d69), LL(0x1080502090101040), LL(0xf4f303f507f4f4f7), + LL(0xcb16c08bddcbcb0b), LL(0x3eedc67cd33e3ef8), LL(0x0528110a2d050514), LL(0x671fe6ce78676781), + LL(0xe47353d597e4e4b7), LL(0x2725bb4e0227279c), LL(0x4132588273414119), LL(0x8b2c9d0ba78b8b16), + LL(0xa7510153f6a7a7a6), LL(0x7dcf94fab27d7de9), LL(0x95dcfb374995956e), LL(0xd88e9fad56d8d847), + LL(0xfb8b30eb70fbfbcb), LL(0xee2371c1cdeeee9f), LL(0x7cc791f8bb7c7ced), LL(0x6617e3cc71666685), + LL(0xdda68ea77bdddd53), LL(0x17b84b2eaf17175c), LL(0x4702468e45474701), LL(0x9e84dc211a9e9e42), + LL(0xca1ec589d4caca0f), LL(0x2d75995a582d2db4), LL(0xbf9179632ebfbfc6), LL(0x07381b0e3f07071c), + LL(0xad012347acadad8e), LL(0x5aea2fb4b05a5a75), LL(0x836cb51bef838336), LL(0x3385ff66b63333cc), + LL(0x633ff2c65c636391), LL(0x02100a0412020208), LL(0xaa39384993aaaa92), LL(0x71afa8e2de7171d9), + LL(0xc80ecf8dc6c8c807), LL(0x19c87d32d1191964), LL(0x497270923b494939), LL(0xd9869aaf5fd9d943), + LL(0xf2c31df931f2f2ef), LL(0xe34b48dba8e3e3ab), LL(0x5be22ab6b95b5b71), LL(0x8834920dbc88881a), + LL(0x9aa4c8293e9a9a52), LL(0x262dbe4c0b262698), LL(0x328dfa64bf3232c8), LL(0xb0e94a7d59b0b0fa), + LL(0xe91b6acff2e9e983), LL(0x0f78331e770f0f3c), LL(0xd5e6a6b733d5d573), LL(0x8074ba1df480803a), + LL(0xbe997c6127bebec2), LL(0xcd26de87ebcdcd13), LL(0x34bde468893434d0), LL(0x487a75903248483d), + LL(0xffab24e354ffffdb), LL(0x7af78ff48d7a7af5), LL(0x90f4ea3d6490907a), LL(0x5fc23ebe9d5f5f61), + LL(0x201da0403d202080), LL(0x6867d5d00f6868bd), LL(0x1ad07234ca1a1a68), LL(0xae192c41b7aeae82), + LL(0xb4c95e757db4b4ea), LL(0x549a19a8ce54544d), LL(0x93ece53b7f939376), LL(0x220daa442f222288), + LL(0x6407e9c86364648d), LL(0xf1db12ff2af1f1e3), LL(0x73bfa2e6cc7373d1), LL(0x12905a2482121248), + LL(0x403a5d807a40401d), LL(0x0840281048080820), LL(0xc356e89b95c3c32b), LL(0xec337bc5dfecec97), + LL(0xdb9690ab4ddbdb4b), LL(0xa1611f5fc0a1a1be), LL(0x8d1c8307918d8d0e), LL(0x3df5c97ac83d3df4), + LL(0x97ccf1335b979766), LL(0x0000000000000000), LL(0xcf36d483f9cfcf1b), LL(0x2b4587566e2b2bac), + LL(0x7697b3ece17676c5), LL(0x8264b019e6828232), LL(0xd6fea9b128d6d67f), LL(0x1bd87736c31b1b6c), + LL(0xb5c15b7774b5b5ee), LL(0xaf112943beafaf86), LL(0x6a77dfd41d6a6ab5), LL(0x50ba0da0ea50505d), + LL(0x45124c8a57454509), LL(0xf3cb18fb38f3f3eb), LL(0x309df060ad3030c0), LL(0xef2b74c3c4efef9b), + LL(0x3fe5c37eda3f3ffc), LL(0x55921caac7555549), LL(0xa2791059dba2a2b2), LL(0xea0365c9e9eaea8f), + LL(0x650fecca6a656589), LL(0xbab9686903babad2), LL(0x2f65935e4a2f2fbc), LL(0xc04ee79d8ec0c027), + LL(0xdebe81a160dede5f), LL(0x1ce06c38fc1c1c70), LL(0xfdbb2ee746fdfdd3), LL(0x4d52649a1f4d4d29), + LL(0x92e4e03976929272), LL(0x758fbceafa7575c9), LL(0x06301e0c36060618), LL(0x8a249809ae8a8a12), + LL(0xb2f940794bb2b2f2), LL(0xe66359d185e6e6bf), LL(0x0e70361c7e0e0e38), LL(0x1ff8633ee71f1f7c), + LL(0x6237f7c455626295), LL(0xd4eea3b53ad4d477), LL(0xa829324d81a8a89a), LL(0x96c4f43152969662), + LL(0xf99b3aef62f9f9c3), LL(0xc566f697a3c5c533), LL(0x2535b14a10252594), LL(0x59f220b2ab595979), + LL(0x8454ae15d084842a), LL(0x72b7a7e4c57272d5), LL(0x39d5dd72ec3939e4), LL(0x4c5a6198164c4c2d), + LL(0x5eca3bbc945e5e65), LL(0x78e785f09f7878fd), LL(0x38ddd870e53838e0), LL(0x8c148605988c8c0a), + LL(0xd1c6b2bf17d1d163), LL(0xa5410b57e4a5a5ae), LL(0xe2434dd9a1e2e2af), LL(0x612ff8c24e616199), + LL(0xb3f1457b42b3b3f6), LL(0x2115a54234212184), LL(0x9c94d625089c9c4a), LL(0x1ef0663cee1e1e78), + LL(0x4322528661434311), LL(0xc776fc93b1c7c73b), LL(0xfcb32be54ffcfcd7), LL(0x0420140824040410), + LL(0x51b208a2e3515159), LL(0x99bcc72f2599995e), LL(0x6d4fc4da226d6da9), LL(0x0d68391a650d0d34), + LL(0xfa8335e979fafacf), LL(0xdfb684a369dfdf5b), LL(0x7ed79bfca97e7ee5), LL(0x243db44819242490), + LL(0x3bc5d776fe3b3bec), LL(0xab313d4b9aabab96), LL(0xce3ed181f0cece1f), LL(0x1188552299111144), + LL(0x8f0c8903838f8f06), LL(0x4e4a6b9c044e4e25), LL(0xb7d1517366b7b7e6), LL(0xeb0b60cbe0ebeb8b), + LL(0x3cfdcc78c13c3cf0), LL(0x817cbf1ffd81813e), LL(0x94d4fe354094946a), LL(0xf7eb0cf31cf7f7fb), + LL(0xb9a1676f18b9b9de), LL(0x13985f268b13134c), LL(0x2c7d9c58512c2cb0), LL(0xd3d6b8bb05d3d36b), + LL(0xe76b5cd38ce7e7bb), LL(0x6e57cbdc396e6ea5), LL(0xc46ef395aac4c437), LL(0x03180f061b03030c), + LL(0x568a13acdc565645), LL(0x441a49885e44440d), LL(0x7fdf9efea07f7fe1), LL(0xa921374f88a9a99e), + LL(0x2a4d8254672a2aa8), LL(0xbbb16d6b0abbbbd6), LL(0xc146e29f87c1c123), LL(0x53a202a6f1535351), + LL(0xdcae8ba572dcdc57), LL(0x0b582716530b0b2c), LL(0x9d9cd327019d9d4e), LL(0x6c47c1d82b6c6cad), + LL(0x3195f562a43131c4), LL(0x7487b9e8f37474cd), LL(0xf6e309f115f6f6ff), LL(0x460a438c4c464605), + LL(0xac092645a5acac8a), LL(0x893c970fb589891e), LL(0x14a04428b4141450), LL(0xe15b42dfbae1e1a3), + LL(0x16b04e2ca6161658), LL(0x3acdd274f73a3ae8), LL(0x696fd0d2066969b9), LL(0x09482d1241090924), + LL(0x70a7ade0d77070dd), LL(0xb6d954716fb6b6e2), LL(0xd0ceb7bd1ed0d067), LL(0xed3b7ec7d6eded93), + LL(0xcc2edb85e2cccc17), LL(0x422a578468424215), LL(0x98b4c22d2c98985a), LL(0xa4490e55eda4a4aa), + LL(0x285d8850752828a0), LL(0x5cda31b8865c5c6d), LL(0xf8933fed6bf8f8c7), LL(0x8644a411c2868622), +}; + +static const u64 C6[256] = { + LL(0x6018c07830d81818), LL(0x8c2305af46262323), LL(0x3fc67ef991b8c6c6), LL(0x87e8136fcdfbe8e8), + LL(0x26874ca113cb8787), LL(0xdab8a9626d11b8b8), LL(0x0401080502090101), LL(0x214f426e9e0d4f4f), + LL(0xd836adee6c9b3636), LL(0xa2a6590451ffa6a6), LL(0x6fd2debdb90cd2d2), LL(0xf3f5fb06f70ef5f5), + LL(0xf979ef80f2967979), LL(0xa16f5fcede306f6f), LL(0x7e91fcef3f6d9191), LL(0x5552aa07a4f85252), + LL(0x9d6027fdc0476060), LL(0xcabc89766535bcbc), LL(0x569baccd2b379b9b), LL(0x028e048c018a8e8e), + LL(0xb6a371155bd2a3a3), LL(0x300c603c186c0c0c), LL(0xf17bff8af6847b7b), LL(0xd435b5e16a803535), + LL(0x741de8693af51d1d), LL(0xa7e05347ddb3e0e0), LL(0x7bd7f6acb321d7d7), LL(0x2fc25eed999cc2c2), + LL(0xb82e6d965c432e2e), LL(0x314b627a96294b4b), LL(0xdffea321e15dfefe), LL(0x41578216aed55757), + LL(0x5415a8412abd1515), LL(0xc1779fb6eee87777), LL(0xdc37a5eb6e923737), LL(0xb3e57b56d79ee5e5), + LL(0x469f8cd923139f9f), LL(0xe7f0d317fd23f0f0), LL(0x354a6a7f94204a4a), LL(0x4fda9e95a944dada), + LL(0x7d58fa25b0a25858), LL(0x03c906ca8fcfc9c9), LL(0xa429558d527c2929), LL(0x280a5022145a0a0a), + LL(0xfeb1e14f7f50b1b1), LL(0xbaa0691a5dc9a0a0), LL(0xb16b7fdad6146b6b), LL(0x2e855cab17d98585), + LL(0xcebd8173673cbdbd), LL(0x695dd234ba8f5d5d), LL(0x4010805020901010), LL(0xf7f4f303f507f4f4), + LL(0x0bcb16c08bddcbcb), LL(0xf83eedc67cd33e3e), LL(0x140528110a2d0505), LL(0x81671fe6ce786767), + LL(0xb7e47353d597e4e4), LL(0x9c2725bb4e022727), LL(0x1941325882734141), LL(0x168b2c9d0ba78b8b), + LL(0xa6a7510153f6a7a7), LL(0xe97dcf94fab27d7d), LL(0x6e95dcfb37499595), LL(0x47d88e9fad56d8d8), + LL(0xcbfb8b30eb70fbfb), LL(0x9fee2371c1cdeeee), LL(0xed7cc791f8bb7c7c), LL(0x856617e3cc716666), + LL(0x53dda68ea77bdddd), LL(0x5c17b84b2eaf1717), LL(0x014702468e454747), LL(0x429e84dc211a9e9e), + LL(0x0fca1ec589d4caca), LL(0xb42d75995a582d2d), LL(0xc6bf9179632ebfbf), LL(0x1c07381b0e3f0707), + LL(0x8ead012347acadad), LL(0x755aea2fb4b05a5a), LL(0x36836cb51bef8383), LL(0xcc3385ff66b63333), + LL(0x91633ff2c65c6363), LL(0x0802100a04120202), LL(0x92aa39384993aaaa), LL(0xd971afa8e2de7171), + LL(0x07c80ecf8dc6c8c8), LL(0x6419c87d32d11919), LL(0x39497270923b4949), LL(0x43d9869aaf5fd9d9), + LL(0xeff2c31df931f2f2), LL(0xabe34b48dba8e3e3), LL(0x715be22ab6b95b5b), LL(0x1a8834920dbc8888), + LL(0x529aa4c8293e9a9a), LL(0x98262dbe4c0b2626), LL(0xc8328dfa64bf3232), LL(0xfab0e94a7d59b0b0), + LL(0x83e91b6acff2e9e9), LL(0x3c0f78331e770f0f), LL(0x73d5e6a6b733d5d5), LL(0x3a8074ba1df48080), + LL(0xc2be997c6127bebe), LL(0x13cd26de87ebcdcd), LL(0xd034bde468893434), LL(0x3d487a7590324848), + LL(0xdbffab24e354ffff), LL(0xf57af78ff48d7a7a), LL(0x7a90f4ea3d649090), LL(0x615fc23ebe9d5f5f), + LL(0x80201da0403d2020), LL(0xbd6867d5d00f6868), LL(0x681ad07234ca1a1a), LL(0x82ae192c41b7aeae), + LL(0xeab4c95e757db4b4), LL(0x4d549a19a8ce5454), LL(0x7693ece53b7f9393), LL(0x88220daa442f2222), + LL(0x8d6407e9c8636464), LL(0xe3f1db12ff2af1f1), LL(0xd173bfa2e6cc7373), LL(0x4812905a24821212), + LL(0x1d403a5d807a4040), LL(0x2008402810480808), LL(0x2bc356e89b95c3c3), LL(0x97ec337bc5dfecec), + LL(0x4bdb9690ab4ddbdb), LL(0xbea1611f5fc0a1a1), LL(0x0e8d1c8307918d8d), LL(0xf43df5c97ac83d3d), + LL(0x6697ccf1335b9797), LL(0x0000000000000000), LL(0x1bcf36d483f9cfcf), LL(0xac2b4587566e2b2b), + LL(0xc57697b3ece17676), LL(0x328264b019e68282), LL(0x7fd6fea9b128d6d6), LL(0x6c1bd87736c31b1b), + LL(0xeeb5c15b7774b5b5), LL(0x86af112943beafaf), LL(0xb56a77dfd41d6a6a), LL(0x5d50ba0da0ea5050), + LL(0x0945124c8a574545), LL(0xebf3cb18fb38f3f3), LL(0xc0309df060ad3030), LL(0x9bef2b74c3c4efef), + LL(0xfc3fe5c37eda3f3f), LL(0x4955921caac75555), LL(0xb2a2791059dba2a2), LL(0x8fea0365c9e9eaea), + LL(0x89650fecca6a6565), LL(0xd2bab9686903baba), LL(0xbc2f65935e4a2f2f), LL(0x27c04ee79d8ec0c0), + LL(0x5fdebe81a160dede), LL(0x701ce06c38fc1c1c), LL(0xd3fdbb2ee746fdfd), LL(0x294d52649a1f4d4d), + LL(0x7292e4e039769292), LL(0xc9758fbceafa7575), LL(0x1806301e0c360606), LL(0x128a249809ae8a8a), + LL(0xf2b2f940794bb2b2), LL(0xbfe66359d185e6e6), LL(0x380e70361c7e0e0e), LL(0x7c1ff8633ee71f1f), + LL(0x956237f7c4556262), LL(0x77d4eea3b53ad4d4), LL(0x9aa829324d81a8a8), LL(0x6296c4f431529696), + LL(0xc3f99b3aef62f9f9), LL(0x33c566f697a3c5c5), LL(0x942535b14a102525), LL(0x7959f220b2ab5959), + LL(0x2a8454ae15d08484), LL(0xd572b7a7e4c57272), LL(0xe439d5dd72ec3939), LL(0x2d4c5a6198164c4c), + LL(0x655eca3bbc945e5e), LL(0xfd78e785f09f7878), LL(0xe038ddd870e53838), LL(0x0a8c148605988c8c), + LL(0x63d1c6b2bf17d1d1), LL(0xaea5410b57e4a5a5), LL(0xafe2434dd9a1e2e2), LL(0x99612ff8c24e6161), + LL(0xf6b3f1457b42b3b3), LL(0x842115a542342121), LL(0x4a9c94d625089c9c), LL(0x781ef0663cee1e1e), + LL(0x1143225286614343), LL(0x3bc776fc93b1c7c7), LL(0xd7fcb32be54ffcfc), LL(0x1004201408240404), + LL(0x5951b208a2e35151), LL(0x5e99bcc72f259999), LL(0xa96d4fc4da226d6d), LL(0x340d68391a650d0d), + LL(0xcffa8335e979fafa), LL(0x5bdfb684a369dfdf), LL(0xe57ed79bfca97e7e), LL(0x90243db448192424), + LL(0xec3bc5d776fe3b3b), LL(0x96ab313d4b9aabab), LL(0x1fce3ed181f0cece), LL(0x4411885522991111), + LL(0x068f0c8903838f8f), LL(0x254e4a6b9c044e4e), LL(0xe6b7d1517366b7b7), LL(0x8beb0b60cbe0ebeb), + LL(0xf03cfdcc78c13c3c), LL(0x3e817cbf1ffd8181), LL(0x6a94d4fe35409494), LL(0xfbf7eb0cf31cf7f7), + LL(0xdeb9a1676f18b9b9), LL(0x4c13985f268b1313), LL(0xb02c7d9c58512c2c), LL(0x6bd3d6b8bb05d3d3), + LL(0xbbe76b5cd38ce7e7), LL(0xa56e57cbdc396e6e), LL(0x37c46ef395aac4c4), LL(0x0c03180f061b0303), + LL(0x45568a13acdc5656), LL(0x0d441a49885e4444), LL(0xe17fdf9efea07f7f), LL(0x9ea921374f88a9a9), + LL(0xa82a4d8254672a2a), LL(0xd6bbb16d6b0abbbb), LL(0x23c146e29f87c1c1), LL(0x5153a202a6f15353), + LL(0x57dcae8ba572dcdc), LL(0x2c0b582716530b0b), LL(0x4e9d9cd327019d9d), LL(0xad6c47c1d82b6c6c), + LL(0xc43195f562a43131), LL(0xcd7487b9e8f37474), LL(0xfff6e309f115f6f6), LL(0x05460a438c4c4646), + LL(0x8aac092645a5acac), LL(0x1e893c970fb58989), LL(0x5014a04428b41414), LL(0xa3e15b42dfbae1e1), + LL(0x5816b04e2ca61616), LL(0xe83acdd274f73a3a), LL(0xb9696fd0d2066969), LL(0x2409482d12410909), + LL(0xdd70a7ade0d77070), LL(0xe2b6d954716fb6b6), LL(0x67d0ceb7bd1ed0d0), LL(0x93ed3b7ec7d6eded), + LL(0x17cc2edb85e2cccc), LL(0x15422a5784684242), LL(0x5a98b4c22d2c9898), LL(0xaaa4490e55eda4a4), + LL(0xa0285d8850752828), LL(0x6d5cda31b8865c5c), LL(0xc7f8933fed6bf8f8), LL(0x228644a411c28686), +}; + +static const u64 C7[256] = { + LL(0x186018c07830d818), LL(0x238c2305af462623), LL(0xc63fc67ef991b8c6), LL(0xe887e8136fcdfbe8), + LL(0x8726874ca113cb87), LL(0xb8dab8a9626d11b8), LL(0x0104010805020901), LL(0x4f214f426e9e0d4f), + LL(0x36d836adee6c9b36), LL(0xa6a2a6590451ffa6), LL(0xd26fd2debdb90cd2), LL(0xf5f3f5fb06f70ef5), + LL(0x79f979ef80f29679), LL(0x6fa16f5fcede306f), LL(0x917e91fcef3f6d91), LL(0x525552aa07a4f852), + LL(0x609d6027fdc04760), LL(0xbccabc89766535bc), LL(0x9b569baccd2b379b), LL(0x8e028e048c018a8e), + LL(0xa3b6a371155bd2a3), LL(0x0c300c603c186c0c), LL(0x7bf17bff8af6847b), LL(0x35d435b5e16a8035), + LL(0x1d741de8693af51d), LL(0xe0a7e05347ddb3e0), LL(0xd77bd7f6acb321d7), LL(0xc22fc25eed999cc2), + LL(0x2eb82e6d965c432e), LL(0x4b314b627a96294b), LL(0xfedffea321e15dfe), LL(0x5741578216aed557), + LL(0x155415a8412abd15), LL(0x77c1779fb6eee877), LL(0x37dc37a5eb6e9237), LL(0xe5b3e57b56d79ee5), + LL(0x9f469f8cd923139f), LL(0xf0e7f0d317fd23f0), LL(0x4a354a6a7f94204a), LL(0xda4fda9e95a944da), + LL(0x587d58fa25b0a258), LL(0xc903c906ca8fcfc9), LL(0x29a429558d527c29), LL(0x0a280a5022145a0a), + LL(0xb1feb1e14f7f50b1), LL(0xa0baa0691a5dc9a0), LL(0x6bb16b7fdad6146b), LL(0x852e855cab17d985), + LL(0xbdcebd8173673cbd), LL(0x5d695dd234ba8f5d), LL(0x1040108050209010), LL(0xf4f7f4f303f507f4), + LL(0xcb0bcb16c08bddcb), LL(0x3ef83eedc67cd33e), LL(0x05140528110a2d05), LL(0x6781671fe6ce7867), + LL(0xe4b7e47353d597e4), LL(0x279c2725bb4e0227), LL(0x4119413258827341), LL(0x8b168b2c9d0ba78b), + LL(0xa7a6a7510153f6a7), LL(0x7de97dcf94fab27d), LL(0x956e95dcfb374995), LL(0xd847d88e9fad56d8), + LL(0xfbcbfb8b30eb70fb), LL(0xee9fee2371c1cdee), LL(0x7ced7cc791f8bb7c), LL(0x66856617e3cc7166), + LL(0xdd53dda68ea77bdd), LL(0x175c17b84b2eaf17), LL(0x47014702468e4547), LL(0x9e429e84dc211a9e), + LL(0xca0fca1ec589d4ca), LL(0x2db42d75995a582d), LL(0xbfc6bf9179632ebf), LL(0x071c07381b0e3f07), + LL(0xad8ead012347acad), LL(0x5a755aea2fb4b05a), LL(0x8336836cb51bef83), LL(0x33cc3385ff66b633), + LL(0x6391633ff2c65c63), LL(0x020802100a041202), LL(0xaa92aa39384993aa), LL(0x71d971afa8e2de71), + LL(0xc807c80ecf8dc6c8), LL(0x196419c87d32d119), LL(0x4939497270923b49), LL(0xd943d9869aaf5fd9), + LL(0xf2eff2c31df931f2), LL(0xe3abe34b48dba8e3), LL(0x5b715be22ab6b95b), LL(0x881a8834920dbc88), + LL(0x9a529aa4c8293e9a), LL(0x2698262dbe4c0b26), LL(0x32c8328dfa64bf32), LL(0xb0fab0e94a7d59b0), + LL(0xe983e91b6acff2e9), LL(0x0f3c0f78331e770f), LL(0xd573d5e6a6b733d5), LL(0x803a8074ba1df480), + LL(0xbec2be997c6127be), LL(0xcd13cd26de87ebcd), LL(0x34d034bde4688934), LL(0x483d487a75903248), + LL(0xffdbffab24e354ff), LL(0x7af57af78ff48d7a), LL(0x907a90f4ea3d6490), LL(0x5f615fc23ebe9d5f), + LL(0x2080201da0403d20), LL(0x68bd6867d5d00f68), LL(0x1a681ad07234ca1a), LL(0xae82ae192c41b7ae), + LL(0xb4eab4c95e757db4), LL(0x544d549a19a8ce54), LL(0x937693ece53b7f93), LL(0x2288220daa442f22), + LL(0x648d6407e9c86364), LL(0xf1e3f1db12ff2af1), LL(0x73d173bfa2e6cc73), LL(0x124812905a248212), + LL(0x401d403a5d807a40), LL(0x0820084028104808), LL(0xc32bc356e89b95c3), LL(0xec97ec337bc5dfec), + LL(0xdb4bdb9690ab4ddb), LL(0xa1bea1611f5fc0a1), LL(0x8d0e8d1c8307918d), LL(0x3df43df5c97ac83d), + LL(0x976697ccf1335b97), LL(0x0000000000000000), LL(0xcf1bcf36d483f9cf), LL(0x2bac2b4587566e2b), + LL(0x76c57697b3ece176), LL(0x82328264b019e682), LL(0xd67fd6fea9b128d6), LL(0x1b6c1bd87736c31b), + LL(0xb5eeb5c15b7774b5), LL(0xaf86af112943beaf), LL(0x6ab56a77dfd41d6a), LL(0x505d50ba0da0ea50), + LL(0x450945124c8a5745), LL(0xf3ebf3cb18fb38f3), LL(0x30c0309df060ad30), LL(0xef9bef2b74c3c4ef), + LL(0x3ffc3fe5c37eda3f), LL(0x554955921caac755), LL(0xa2b2a2791059dba2), LL(0xea8fea0365c9e9ea), + LL(0x6589650fecca6a65), LL(0xbad2bab9686903ba), LL(0x2fbc2f65935e4a2f), LL(0xc027c04ee79d8ec0), + LL(0xde5fdebe81a160de), LL(0x1c701ce06c38fc1c), LL(0xfdd3fdbb2ee746fd), LL(0x4d294d52649a1f4d), + LL(0x927292e4e0397692), LL(0x75c9758fbceafa75), LL(0x061806301e0c3606), LL(0x8a128a249809ae8a), + LL(0xb2f2b2f940794bb2), LL(0xe6bfe66359d185e6), LL(0x0e380e70361c7e0e), LL(0x1f7c1ff8633ee71f), + LL(0x62956237f7c45562), LL(0xd477d4eea3b53ad4), LL(0xa89aa829324d81a8), LL(0x966296c4f4315296), + LL(0xf9c3f99b3aef62f9), LL(0xc533c566f697a3c5), LL(0x25942535b14a1025), LL(0x597959f220b2ab59), + LL(0x842a8454ae15d084), LL(0x72d572b7a7e4c572), LL(0x39e439d5dd72ec39), LL(0x4c2d4c5a6198164c), + LL(0x5e655eca3bbc945e), LL(0x78fd78e785f09f78), LL(0x38e038ddd870e538), LL(0x8c0a8c148605988c), + LL(0xd163d1c6b2bf17d1), LL(0xa5aea5410b57e4a5), LL(0xe2afe2434dd9a1e2), LL(0x6199612ff8c24e61), + LL(0xb3f6b3f1457b42b3), LL(0x21842115a5423421), LL(0x9c4a9c94d625089c), LL(0x1e781ef0663cee1e), + LL(0x4311432252866143), LL(0xc73bc776fc93b1c7), LL(0xfcd7fcb32be54ffc), LL(0x0410042014082404), + LL(0x515951b208a2e351), LL(0x995e99bcc72f2599), LL(0x6da96d4fc4da226d), LL(0x0d340d68391a650d), + LL(0xfacffa8335e979fa), LL(0xdf5bdfb684a369df), LL(0x7ee57ed79bfca97e), LL(0x2490243db4481924), + LL(0x3bec3bc5d776fe3b), LL(0xab96ab313d4b9aab), LL(0xce1fce3ed181f0ce), LL(0x1144118855229911), + LL(0x8f068f0c8903838f), LL(0x4e254e4a6b9c044e), LL(0xb7e6b7d1517366b7), LL(0xeb8beb0b60cbe0eb), + LL(0x3cf03cfdcc78c13c), LL(0x813e817cbf1ffd81), LL(0x946a94d4fe354094), LL(0xf7fbf7eb0cf31cf7), + LL(0xb9deb9a1676f18b9), LL(0x134c13985f268b13), LL(0x2cb02c7d9c58512c), LL(0xd36bd3d6b8bb05d3), + LL(0xe7bbe76b5cd38ce7), LL(0x6ea56e57cbdc396e), LL(0xc437c46ef395aac4), LL(0x030c03180f061b03), + LL(0x5645568a13acdc56), LL(0x440d441a49885e44), LL(0x7fe17fdf9efea07f), LL(0xa99ea921374f88a9), + LL(0x2aa82a4d8254672a), LL(0xbbd6bbb16d6b0abb), LL(0xc123c146e29f87c1), LL(0x535153a202a6f153), + LL(0xdc57dcae8ba572dc), LL(0x0b2c0b582716530b), LL(0x9d4e9d9cd327019d), LL(0x6cad6c47c1d82b6c), + LL(0x31c43195f562a431), LL(0x74cd7487b9e8f374), LL(0xf6fff6e309f115f6), LL(0x4605460a438c4c46), + LL(0xac8aac092645a5ac), LL(0x891e893c970fb589), LL(0x145014a04428b414), LL(0xe1a3e15b42dfbae1), + LL(0x165816b04e2ca616), LL(0x3ae83acdd274f73a), LL(0x69b9696fd0d20669), LL(0x092409482d124109), + LL(0x70dd70a7ade0d770), LL(0xb6e2b6d954716fb6), LL(0xd067d0ceb7bd1ed0), LL(0xed93ed3b7ec7d6ed), + LL(0xcc17cc2edb85e2cc), LL(0x4215422a57846842), LL(0x985a98b4c22d2c98), LL(0xa4aaa4490e55eda4), + LL(0x28a0285d88507528), LL(0x5c6d5cda31b8865c), LL(0xf8c7f8933fed6bf8), LL(0x86228644a411c286), +}; + +static const u64 rc[R + 1] = { + LL(0x0000000000000000), + LL(0x1823c6e887b8014f), + LL(0x36a6d2f5796f9152), + LL(0x60bc9b8ea30c7b35), + LL(0x1de0d7c22e4bfe57), + LL(0x157737e59ff04ada), + LL(0x58c9290ab1a06b85), + LL(0xbd5d10f4cb3e0567), + LL(0xe427418ba77d95d8), + LL(0xfbee7c66dd17479e), + LL(0xca2dbf07ad5a8333), +}; + +/** + * The core Whirlpool transform. + */ +static void processBuffer(struct NESSIEstruct * const structpointer) { + int i, r; + u64 K[8]; /* the round key */ + u64 block[8]; /* mu(buffer) */ + u64 state[8]; /* the cipher state */ + u64 L[8]; + u8 *buffer = structpointer->buffer; + /* + * map the buffer to a block: + */ + for (i = 0; i < 8; i++, buffer += 8) { + block[i] = + (((u64)buffer[0] ) << 56) ^ + (((u64)buffer[1] & 0xffL) << 48) ^ + (((u64)buffer[2] & 0xffL) << 40) ^ + (((u64)buffer[3] & 0xffL) << 32) ^ + (((u64)buffer[4] & 0xffL) << 24) ^ + (((u64)buffer[5] & 0xffL) << 16) ^ + (((u64)buffer[6] & 0xffL) << 8) ^ + (((u64)buffer[7] & 0xffL) ); + } + /* + * compute and apply K^0 to the cipher state: + */ + state[0] = block[0] ^ (K[0] = structpointer->hash[0]); + state[1] = block[1] ^ (K[1] = structpointer->hash[1]); + state[2] = block[2] ^ (K[2] = structpointer->hash[2]); + state[3] = block[3] ^ (K[3] = structpointer->hash[3]); + state[4] = block[4] ^ (K[4] = structpointer->hash[4]); + state[5] = block[5] ^ (K[5] = structpointer->hash[5]); + state[6] = block[6] ^ (K[6] = structpointer->hash[6]); + state[7] = block[7] ^ (K[7] = structpointer->hash[7]); + /* + * iterate over all rounds: + */ + for (r = 1; r <= R; r++) { + /* + * compute K^r from K^{r-1}: + */ + L[0] = + C0[(int)(K[0] >> 56) ] ^ + C1[(int)(K[7] >> 48) & 0xff] ^ + C2[(int)(K[6] >> 40) & 0xff] ^ + C3[(int)(K[5] >> 32) & 0xff] ^ + C4[(int)(K[4] >> 24) & 0xff] ^ + C5[(int)(K[3] >> 16) & 0xff] ^ + C6[(int)(K[2] >> 8) & 0xff] ^ + C7[(int)(K[1] ) & 0xff] ^ + rc[r]; + L[1] = + C0[(int)(K[1] >> 56) ] ^ + C1[(int)(K[0] >> 48) & 0xff] ^ + C2[(int)(K[7] >> 40) & 0xff] ^ + C3[(int)(K[6] >> 32) & 0xff] ^ + C4[(int)(K[5] >> 24) & 0xff] ^ + C5[(int)(K[4] >> 16) & 0xff] ^ + C6[(int)(K[3] >> 8) & 0xff] ^ + C7[(int)(K[2] ) & 0xff]; + L[2] = + C0[(int)(K[2] >> 56) ] ^ + C1[(int)(K[1] >> 48) & 0xff] ^ + C2[(int)(K[0] >> 40) & 0xff] ^ + C3[(int)(K[7] >> 32) & 0xff] ^ + C4[(int)(K[6] >> 24) & 0xff] ^ + C5[(int)(K[5] >> 16) & 0xff] ^ + C6[(int)(K[4] >> 8) & 0xff] ^ + C7[(int)(K[3] ) & 0xff]; + L[3] = + C0[(int)(K[3] >> 56) ] ^ + C1[(int)(K[2] >> 48) & 0xff] ^ + C2[(int)(K[1] >> 40) & 0xff] ^ + C3[(int)(K[0] >> 32) & 0xff] ^ + C4[(int)(K[7] >> 24) & 0xff] ^ + C5[(int)(K[6] >> 16) & 0xff] ^ + C6[(int)(K[5] >> 8) & 0xff] ^ + C7[(int)(K[4] ) & 0xff]; + L[4] = + C0[(int)(K[4] >> 56) ] ^ + C1[(int)(K[3] >> 48) & 0xff] ^ + C2[(int)(K[2] >> 40) & 0xff] ^ + C3[(int)(K[1] >> 32) & 0xff] ^ + C4[(int)(K[0] >> 24) & 0xff] ^ + C5[(int)(K[7] >> 16) & 0xff] ^ + C6[(int)(K[6] >> 8) & 0xff] ^ + C7[(int)(K[5] ) & 0xff]; + L[5] = + C0[(int)(K[5] >> 56) ] ^ + C1[(int)(K[4] >> 48) & 0xff] ^ + C2[(int)(K[3] >> 40) & 0xff] ^ + C3[(int)(K[2] >> 32) & 0xff] ^ + C4[(int)(K[1] >> 24) & 0xff] ^ + C5[(int)(K[0] >> 16) & 0xff] ^ + C6[(int)(K[7] >> 8) & 0xff] ^ + C7[(int)(K[6] ) & 0xff]; + L[6] = + C0[(int)(K[6] >> 56) ] ^ + C1[(int)(K[5] >> 48) & 0xff] ^ + C2[(int)(K[4] >> 40) & 0xff] ^ + C3[(int)(K[3] >> 32) & 0xff] ^ + C4[(int)(K[2] >> 24) & 0xff] ^ + C5[(int)(K[1] >> 16) & 0xff] ^ + C6[(int)(K[0] >> 8) & 0xff] ^ + C7[(int)(K[7] ) & 0xff]; + L[7] = + C0[(int)(K[7] >> 56) ] ^ + C1[(int)(K[6] >> 48) & 0xff] ^ + C2[(int)(K[5] >> 40) & 0xff] ^ + C3[(int)(K[4] >> 32) & 0xff] ^ + C4[(int)(K[3] >> 24) & 0xff] ^ + C5[(int)(K[2] >> 16) & 0xff] ^ + C6[(int)(K[1] >> 8) & 0xff] ^ + C7[(int)(K[0] ) & 0xff]; + K[0] = L[0]; + K[1] = L[1]; + K[2] = L[2]; + K[3] = L[3]; + K[4] = L[4]; + K[5] = L[5]; + K[6] = L[6]; + K[7] = L[7]; + /* + * apply the r-th round transformation: + */ + L[0] = + C0[(int)(state[0] >> 56) ] ^ + C1[(int)(state[7] >> 48) & 0xff] ^ + C2[(int)(state[6] >> 40) & 0xff] ^ + C3[(int)(state[5] >> 32) & 0xff] ^ + C4[(int)(state[4] >> 24) & 0xff] ^ + C5[(int)(state[3] >> 16) & 0xff] ^ + C6[(int)(state[2] >> 8) & 0xff] ^ + C7[(int)(state[1] ) & 0xff] ^ + K[0]; + L[1] = + C0[(int)(state[1] >> 56) ] ^ + C1[(int)(state[0] >> 48) & 0xff] ^ + C2[(int)(state[7] >> 40) & 0xff] ^ + C3[(int)(state[6] >> 32) & 0xff] ^ + C4[(int)(state[5] >> 24) & 0xff] ^ + C5[(int)(state[4] >> 16) & 0xff] ^ + C6[(int)(state[3] >> 8) & 0xff] ^ + C7[(int)(state[2] ) & 0xff] ^ + K[1]; + L[2] = + C0[(int)(state[2] >> 56) ] ^ + C1[(int)(state[1] >> 48) & 0xff] ^ + C2[(int)(state[0] >> 40) & 0xff] ^ + C3[(int)(state[7] >> 32) & 0xff] ^ + C4[(int)(state[6] >> 24) & 0xff] ^ + C5[(int)(state[5] >> 16) & 0xff] ^ + C6[(int)(state[4] >> 8) & 0xff] ^ + C7[(int)(state[3] ) & 0xff] ^ + K[2]; + L[3] = + C0[(int)(state[3] >> 56) ] ^ + C1[(int)(state[2] >> 48) & 0xff] ^ + C2[(int)(state[1] >> 40) & 0xff] ^ + C3[(int)(state[0] >> 32) & 0xff] ^ + C4[(int)(state[7] >> 24) & 0xff] ^ + C5[(int)(state[6] >> 16) & 0xff] ^ + C6[(int)(state[5] >> 8) & 0xff] ^ + C7[(int)(state[4] ) & 0xff] ^ + K[3]; + L[4] = + C0[(int)(state[4] >> 56) ] ^ + C1[(int)(state[3] >> 48) & 0xff] ^ + C2[(int)(state[2] >> 40) & 0xff] ^ + C3[(int)(state[1] >> 32) & 0xff] ^ + C4[(int)(state[0] >> 24) & 0xff] ^ + C5[(int)(state[7] >> 16) & 0xff] ^ + C6[(int)(state[6] >> 8) & 0xff] ^ + C7[(int)(state[5] ) & 0xff] ^ + K[4]; + L[5] = + C0[(int)(state[5] >> 56) ] ^ + C1[(int)(state[4] >> 48) & 0xff] ^ + C2[(int)(state[3] >> 40) & 0xff] ^ + C3[(int)(state[2] >> 32) & 0xff] ^ + C4[(int)(state[1] >> 24) & 0xff] ^ + C5[(int)(state[0] >> 16) & 0xff] ^ + C6[(int)(state[7] >> 8) & 0xff] ^ + C7[(int)(state[6] ) & 0xff] ^ + K[5]; + L[6] = + C0[(int)(state[6] >> 56) ] ^ + C1[(int)(state[5] >> 48) & 0xff] ^ + C2[(int)(state[4] >> 40) & 0xff] ^ + C3[(int)(state[3] >> 32) & 0xff] ^ + C4[(int)(state[2] >> 24) & 0xff] ^ + C5[(int)(state[1] >> 16) & 0xff] ^ + C6[(int)(state[0] >> 8) & 0xff] ^ + C7[(int)(state[7] ) & 0xff] ^ + K[6]; + L[7] = + C0[(int)(state[7] >> 56) ] ^ + C1[(int)(state[6] >> 48) & 0xff] ^ + C2[(int)(state[5] >> 40) & 0xff] ^ + C3[(int)(state[4] >> 32) & 0xff] ^ + C4[(int)(state[3] >> 24) & 0xff] ^ + C5[(int)(state[2] >> 16) & 0xff] ^ + C6[(int)(state[1] >> 8) & 0xff] ^ + C7[(int)(state[0] ) & 0xff] ^ + K[7]; + state[0] = L[0]; + state[1] = L[1]; + state[2] = L[2]; + state[3] = L[3]; + state[4] = L[4]; + state[5] = L[5]; + state[6] = L[6]; + state[7] = L[7]; + } + /* + * apply the Miyaguchi-Preneel compression function: + */ + structpointer->hash[0] ^= state[0] ^ block[0]; + structpointer->hash[1] ^= state[1] ^ block[1]; + structpointer->hash[2] ^= state[2] ^ block[2]; + structpointer->hash[3] ^= state[3] ^ block[3]; + structpointer->hash[4] ^= state[4] ^ block[4]; + structpointer->hash[5] ^= state[5] ^ block[5]; + structpointer->hash[6] ^= state[6] ^ block[6]; + structpointer->hash[7] ^= state[7] ^ block[7]; +} + +/** + * Initialize the hashing state. + */ +void WHIRLPOOL_init(struct NESSIEstruct * const structpointer) { + int i; + + memset(structpointer->bitLength, 0, 32); + structpointer->bufferBits = structpointer->bufferPos = 0; + structpointer->buffer[0] = 0; /* it's only necessary to cleanup buffer[bufferPos] */ + for (i = 0; i < 8; i++) { + structpointer->hash[i] = 0L; /* initial value */ + } +} + +/** + * Delivers input data to the hashing algorithm. + * + * @param source plaintext data to hash. + * @param sourceBits how many bits of plaintext to process. + * + * This method maintains the invariant: bufferBits < DIGESTBITS + */ +void WHIRLPOOL_add(const unsigned char * const source, + unsigned __int32 sourceBits, + struct NESSIEstruct * const structpointer) { + /* + sourcePos + | + +-------+-------+------- + ||||||||||||||||||||| source + +-------+-------+------- + +-------+-------+-------+-------+-------+------- + |||||||||||||||||||||| buffer + +-------+-------+-------+-------+-------+------- + | + bufferPos + */ + int sourcePos = 0; /* index of leftmost source u8 containing data (1 to 8 bits). */ + int sourceGap = (8 - ((int)sourceBits & 7)) & 7; /* space on source[sourcePos]. */ + int bufferRem = structpointer->bufferBits & 7; /* occupied bits on buffer[bufferPos]. */ + int i; + u32 b, carry; + u8 *buffer = structpointer->buffer; + u8 *bitLength = structpointer->bitLength; + int bufferBits = structpointer->bufferBits; + int bufferPos = structpointer->bufferPos; + + /* + * tally the length of the added data: + */ + u64 value = sourceBits; + for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != LL(0)); i--) { + carry += bitLength[i] + ((u32)value & 0xff); + bitLength[i] = (u8)carry; + carry >>= 8; + value >>= 8; + } + /* + * process data in chunks of 8 bits (a more efficient approach would be to take whole-word chunks): + */ + while (sourceBits > 8) { + /* N.B. at least source[sourcePos] and source[sourcePos+1] contain data. */ + /* + * take a byte from the source: + */ + b = ((source[sourcePos] << sourceGap) & 0xff) | + ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); + /* + * process this byte: + */ + buffer[bufferPos++] |= (u8)(b >> bufferRem); + bufferBits += 8 - bufferRem; /* bufferBits = 8*bufferPos; */ + if (bufferBits == DIGESTBITS) { + /* + * process data block: + */ + processBuffer(structpointer); + /* + * reset buffer: + */ + bufferBits = bufferPos = 0; + } + buffer[bufferPos] = (u8) (b << (8 - bufferRem)); + bufferBits += bufferRem; + /* + * proceed to remaining data: + */ + sourceBits -= 8; + sourcePos++; + } + /* now 0 <= sourceBits <= 8; + * furthermore, all data (if any is left) is in source[sourcePos]. + */ + if (sourceBits > 0) { + b = (source[sourcePos] << sourceGap) & 0xff; /* bits are left-justified on b. */ + /* + * process the remaining bits: + */ + buffer[bufferPos] |= b >> bufferRem; + } else { + b = 0; + } + if (bufferRem + sourceBits < 8) { + /* + * all remaining data fits on buffer[bufferPos], + * and there still remains some space. + */ + bufferBits += sourceBits; + } else { + /* + * buffer[bufferPos] is full: + */ + bufferPos++; + bufferBits += 8 - bufferRem; /* bufferBits = 8*bufferPos; */ + sourceBits -= 8 - bufferRem; + /* now 0 <= sourceBits < 8; + * furthermore, all data (if any is left) is in source[sourcePos]. + */ + if (bufferBits == DIGESTBITS) { + /* + * process data block: + */ + processBuffer(structpointer); + /* + * reset buffer: + */ + bufferBits = bufferPos = 0; + } + buffer[bufferPos] = (u8) (b << (8 - bufferRem)); + bufferBits += (int)sourceBits; + } + structpointer->bufferBits = bufferBits; + structpointer->bufferPos = bufferPos; +} + +/** + * Get the hash value from the hashing state. + * + * This method uses the invariant: bufferBits < DIGESTBITS + */ +void WHIRLPOOL_finalize(struct NESSIEstruct * const structpointer, + unsigned char * const result) { + int i; + u8 *buffer = structpointer->buffer; + u8 *bitLength = structpointer->bitLength; + int bufferBits = structpointer->bufferBits; + int bufferPos = structpointer->bufferPos; + u8 *digest = result; + + /* + * append a '1'-bit: + */ + buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + bufferPos++; /* all remaining bits on the current u8 are set to zero. */ + /* + * pad with zero bits to complete (N*WBLOCKBITS - LENGTHBITS) bits: + */ + if (bufferPos > WBLOCKBYTES - LENGTHBYTES) { + if (bufferPos < WBLOCKBYTES) { + memset(&buffer[bufferPos], 0, WBLOCKBYTES - bufferPos); + } + /* + * process data block: + */ + processBuffer(structpointer); + /* + * reset buffer: + */ + bufferPos = 0; + } + if (bufferPos < WBLOCKBYTES - LENGTHBYTES) { + memset(&buffer[bufferPos], 0, (WBLOCKBYTES - LENGTHBYTES) - bufferPos); + } + bufferPos = WBLOCKBYTES - LENGTHBYTES; + /* + * append bit length of hashed data: + */ + memcpy(&buffer[WBLOCKBYTES - LENGTHBYTES], bitLength, LENGTHBYTES); + /* + * process data block: + */ + processBuffer(structpointer); + /* + * return the completed message digest: + */ + for (i = 0; i < DIGESTBYTES/8; i++) { + digest[0] = (u8)(structpointer->hash[i] >> 56); + digest[1] = (u8)(structpointer->hash[i] >> 48); + digest[2] = (u8)(structpointer->hash[i] >> 40); + digest[3] = (u8)(structpointer->hash[i] >> 32); + digest[4] = (u8)(structpointer->hash[i] >> 24); + digest[5] = (u8)(structpointer->hash[i] >> 16); + digest[6] = (u8)(structpointer->hash[i] >> 8); + digest[7] = (u8)(structpointer->hash[i] ); + digest += 8; + } + structpointer->bufferBits = bufferBits; + structpointer->bufferPos = bufferPos; +} diff --git a/Crypto/Whirlpool.h b/Crypto/Whirlpool.h new file mode 100644 index 0000000..d497e85 --- /dev/null +++ b/Crypto/Whirlpool.h @@ -0,0 +1,151 @@ +#ifndef WHIRLPOOL_H +#define WHIRLPOOL_H 1 + +#include "Common/Tcdefs.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#ifndef PORTABLE_C__ +#define PORTABLE_C__ + +#include + +/* Definition of minimum-width integer types + * + * u8 -> unsigned integer type, at least 8 bits, equivalent to unsigned char + * u16 -> unsigned integer type, at least 16 bits + * u32 -> unsigned integer type, at least 32 bits + * + * s8, s16, s32 -> signed counterparts of u8, u16, u32 + * + * Always use macro's T8(), T16() or T32() to obtain exact-width results, + * i.e., to specify the size of the result of each expression. + */ + +typedef signed char s8; +typedef unsigned char u8; + +#if UINT_MAX >= 4294967295UL + +typedef signed short s16; +typedef signed int s32; +typedef unsigned short u16; +typedef unsigned int u32; + +#define ONE32 0xffffffffU + +#else + +typedef signed int s16; +typedef signed long s32; +typedef unsigned int u16; +typedef unsigned __int32 u32; + +#define ONE32 0xffffffffUL + +#endif + +#define ONE8 0xffU +#define ONE16 0xffffU + +#define T8(x) ((x) & ONE8) +#define T16(x) ((x) & ONE16) +#define T32(x) ((x) & ONE32) + +#ifdef _MSC_VER +typedef unsigned __int64 u64; +typedef signed __int64 s64; +#define LL(v) (v##ui64) +#define ONE64 LL(0xffffffffffffffff) +#else /* !_MSC_VER */ +typedef unsigned long long u64; +typedef signed long long s64; +#define LL(v) (v##ULL) +#define ONE64 LL(0xffffffffffffffff) +#endif /* ?_MSC_VER */ +#define T64(x) ((x) & ONE64) +#define ROTR64(v, n) (((v) >> (n)) | T64((v) << (64 - (n)))) +/* + * Note: the test is used to detect native 64-bit architectures; + * if the unsigned long is strictly greater than 32-bit, it is + * assumed to be at least 64-bit. This will not work correctly + * on (old) 36-bit architectures (PDP-11 for instance). + * + * On non-64-bit architectures, "long long" is used. + */ + +/* + * U8TO32_BIG(c) returns the 32-bit value stored in big-endian convention + * in the unsigned char array pointed to by c. + */ +#define U8TO32_BIG(c) (((u32)T8(*(c)) << 24) | ((u32)T8(*((c) + 1)) << 16) | ((u32)T8(*((c) + 2)) << 8) | ((u32)T8(*((c) + 3)))) + +/* + * U8TO32_LITTLE(c) returns the 32-bit value stored in little-endian convention + * in the unsigned char array pointed to by c. + */ +#define U8TO32_LITTLE(c) (((u32)T8(*(c))) | ((u32)T8(*((c) + 1)) << 8) | (u32)T8(*((c) + 2)) << 16) | ((u32)T8(*((c) + 3)) << 24)) + +/* + * U8TO32_BIG(c, v) stores the 32-bit-value v in big-endian convention + * into the unsigned char array pointed to by c. + */ +#define U32TO8_BIG(c, v) do { u32 x = (v); u8 *d = (c); d[0] = T8(x >> 24); d[1] = T8(x >> 16); d[2] = T8(x >> 8); d[3] = T8(x); } while (0) + +/* + * U8TO32_LITTLE(c, v) stores the 32-bit-value v in little-endian convention + * into the unsigned char array pointed to by c. + */ +#define U32TO8_LITTLE(c, v) do { u32 x = (v); u8 *d = (c); d[0] = T8(x); d[1] = T8(x >> 8); d[2] = T8(x >> 16); d[3] = T8(x >> 24); } while (0) + +/* + * ROTL32(v, n) returns the value of the 32-bit unsigned value v after + * a rotation of n bits to the left. It might be replaced by the appropriate + * architecture-specific macro. + * + * It evaluates v and n twice. + * + * The compiler might emit a warning if n is the constant 0. The result + * is undefined if n is greater than 31. + */ +#define ROTL32(v, n) (T32((v) << (n)) | ((v) >> (32 - (n)))) + +/* + * Whirlpool-specific definitions. + */ + +#define DIGESTBYTES 64 +#define DIGESTBITS (8*DIGESTBYTES) /* 512 */ + +#define WBLOCKBYTES 64 +#define WBLOCKBITS (8*WBLOCKBYTES) /* 512 */ + +#define LENGTHBYTES 32 +#define LENGTHBITS (8*LENGTHBYTES) /* 256 */ + +typedef struct NESSIEstruct { + u8 bitLength[LENGTHBYTES]; /* global number of hashed bits (256-bit counter) */ + u8 buffer[WBLOCKBYTES]; /* buffer of data to hash */ + int bufferBits; /* current number of bits on the buffer */ + int bufferPos; /* current (possibly incomplete) byte slot on the buffer */ + u64 hash[DIGESTBYTES/8]; /* the hashing state */ +} NESSIEstruct; + +#endif /* PORTABLE_C__ */ + +// ------------- + +typedef NESSIEstruct WHIRLPOOL_CTX; + +void WHIRLPOOL_add(const unsigned char * const source, unsigned __int32 sourceBits, struct NESSIEstruct * const structpointer); +void WHIRLPOOL_finalize(struct NESSIEstruct * const structpointer, unsigned char * const result); +void WHIRLPOOL_init(struct NESSIEstruct * const structpointer); + +#if defined(__cplusplus) +} +#endif + +#endif /* WHIRLPOOL_H */ -- cgit