00001 #include <stdint.h>
00002
00035 #ifndef __AES_CORE__
00036 #define __AES_CORE__
00037
00038 #define AES_BLOCK_SIZE 16
00039
00040 static const uint32_t Te4[256] = {
00041 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
00042 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
00043 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
00044 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
00045 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
00046 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
00047 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
00048 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
00049 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
00050 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
00051 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
00052 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
00053 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
00054 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
00055 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
00056 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
00057 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
00058 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
00059 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
00060 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
00061 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
00062 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
00063 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
00064 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
00065 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
00066 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
00067 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
00068 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
00069 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
00070 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
00071 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
00072 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
00073 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
00074 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
00075 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
00076 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
00077 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
00078 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
00079 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
00080 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
00081 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
00082 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
00083 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
00084 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
00085 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
00086 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
00087 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
00088 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
00089 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
00090 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
00091 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
00092 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
00093 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
00094 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
00095 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
00096 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
00097 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
00098 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
00099 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
00100 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
00101 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
00102 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
00103 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
00104 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
00105 };
00106
00107 static const uint32_t rcon_host[10] = {
00108 0x01000000, 0x02000000, 0x04000000, 0x08000000,
00109 0x10000000, 0x20000000, 0x40000000, 0x80000000,
00110 0x1B000000, 0x36000000,
00111 };
00112
00113
00114
00115
00116
00117 __constant__ __device__ uint32_t rcon[10] = {
00118 0x01000000, 0x02000000, 0x04000000, 0x08000000,
00119 0x10000000, 0x20000000, 0x40000000, 0x80000000,
00120 0x1B000000, 0x36000000,
00121 };
00122
00123
00124 __constant__ __device__ uint32_t Te0_ConstMem[256] = {
00125 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
00126 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
00127 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
00128 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
00129 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
00130 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
00131 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
00132 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
00133 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
00134 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
00135 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
00136 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
00137 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
00138 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
00139 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
00140 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
00141 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
00142 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
00143 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
00144 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
00145 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
00146 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
00147 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
00148 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
00149 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
00150 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
00151 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
00152 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
00153 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
00154 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
00155 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
00156 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
00157 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
00158 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
00159 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
00160 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
00161 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
00162 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
00163 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
00164 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
00165 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
00166 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
00167 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
00168 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
00169 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
00170 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
00171 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
00172 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
00173 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
00174 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
00175 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
00176 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
00177 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
00178 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
00179 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
00180 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
00181 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
00182 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
00183 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
00184 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
00185 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
00186 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
00187 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
00188 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
00189 };
00190
00191 __constant__ __device__ uint32_t Te1_ConstMem[256] = {
00192 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
00193 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
00194 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
00195 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
00196 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
00197 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
00198 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
00199 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
00200 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
00201 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
00202 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
00203 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
00204 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
00205 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
00206 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
00207 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
00208 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
00209 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
00210 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
00211 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
00212 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
00213 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
00214 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
00215 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
00216 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
00217 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
00218 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
00219 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
00220 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
00221 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
00222 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
00223 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
00224 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
00225 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
00226 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
00227 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
00228 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
00229 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
00230 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
00231 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
00232 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
00233 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
00234 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
00235 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
00236 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
00237 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
00238 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
00239 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
00240 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
00241 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
00242 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
00243 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
00244 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
00245 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
00246 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
00247 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
00248 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
00249 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
00250 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
00251 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
00252 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
00253 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
00254 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
00255 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
00256 };
00257
00258 __constant__ __device__ uint32_t Te2_ConstMem[256] = {
00259 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
00260 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
00261 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
00262 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
00263 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
00264 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
00265 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
00266 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
00267 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
00268 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
00269 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
00270 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
00271 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
00272 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
00273 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
00274 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
00275 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
00276 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
00277 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
00278 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
00279 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
00280 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
00281 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
00282 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
00283 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
00284 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
00285 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
00286 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
00287 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
00288 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
00289 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
00290 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
00291 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
00292 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
00293 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
00294 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
00295 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
00296 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
00297 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
00298 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
00299 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
00300 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
00301 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
00302 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
00303 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
00304 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
00305 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
00306 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
00307 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
00308 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
00309 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
00310 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
00311 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
00312 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
00313 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
00314 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
00315 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
00316 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
00317 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
00318 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
00319 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
00320 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
00321 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
00322 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
00323 };
00324
00325 __constant__ __device__ uint32_t Te3_ConstMem[256] = {
00326 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
00327 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
00328 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
00329 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
00330 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
00331 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
00332 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
00333 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
00334 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
00335 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
00336 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
00337 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
00338 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
00339 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
00340 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
00341 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
00342 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
00343 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
00344 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
00345 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
00346 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
00347 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
00348 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
00349 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
00350 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
00351 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
00352 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
00353 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
00354 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
00355 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
00356 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
00357 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
00358 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
00359 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
00360 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
00361 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
00362 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
00363 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
00364 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
00365 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
00366 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
00367 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
00368 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
00369 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
00370 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
00371 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
00372 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
00373 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
00374 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
00375 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
00376 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
00377 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
00378 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
00379 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
00380 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
00381 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
00382 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
00383 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
00384 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
00385 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
00386 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
00387 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
00388 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
00389 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
00390 };
00391
00392 __constant__ __device__ uint32_t Td0_ConstMem[256] = {
00393 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
00394 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
00395 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
00396 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
00397 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
00398 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
00399 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
00400 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
00401 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
00402 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
00403 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
00404 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
00405 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
00406 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
00407 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
00408 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
00409 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
00410 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
00411 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
00412 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
00413 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
00414 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
00415 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
00416 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
00417 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
00418 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
00419 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
00420 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
00421 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
00422 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
00423 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
00424 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
00425 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
00426 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
00427 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
00428 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
00429 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
00430 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
00431 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
00432 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
00433 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
00434 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
00435 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
00436 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
00437 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
00438 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
00439 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
00440 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
00441 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
00442 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
00443 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
00444 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
00445 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
00446 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
00447 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
00448 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
00449 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
00450 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
00451 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
00452 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
00453 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
00454 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
00455 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
00456 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
00457 };
00458
00459 __constant__ __device__ uint32_t Td1_ConstMem[256] = {
00460 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
00461 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
00462 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
00463 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
00464 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
00465 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
00466 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
00467 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
00468 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
00469 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
00470 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
00471 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
00472 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
00473 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
00474 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
00475 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
00476 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
00477 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
00478 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
00479 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
00480 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
00481 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
00482 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
00483 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
00484 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
00485 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
00486 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
00487 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
00488 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
00489 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
00490 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
00491 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
00492 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
00493 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
00494 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
00495 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
00496 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
00497 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
00498 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
00499 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
00500 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
00501 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
00502 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
00503 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
00504 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
00505 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
00506 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
00507 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
00508 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
00509 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
00510 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
00511 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
00512 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
00513 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
00514 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
00515 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
00516 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
00517 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
00518 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
00519 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
00520 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
00521 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
00522 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
00523 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
00524 };
00525
00526 __constant__ __device__ uint32_t Td2_ConstMem[256] = {
00527 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
00528 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
00529 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
00530 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
00531 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
00532 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
00533 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
00534 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
00535 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
00536 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
00537 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
00538 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
00539 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
00540 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
00541 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
00542 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
00543 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
00544 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
00545 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
00546 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
00547 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
00548 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
00549 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
00550 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
00551 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
00552 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
00553 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
00554 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
00555 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
00556 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
00557 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
00558 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
00559 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
00560 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
00561 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
00562 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
00563 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
00564 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
00565 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
00566 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
00567 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
00568 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
00569 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
00570 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
00571 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
00572 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
00573 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
00574 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
00575 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
00576 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
00577 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
00578 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
00579 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
00580 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
00581 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
00582 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
00583 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
00584 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
00585 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
00586 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
00587 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
00588 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
00589 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
00590 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
00591 };
00592
00593 __constant__ __device__ uint32_t Td3_ConstMem[256] = {
00594 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
00595 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
00596 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
00597 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
00598 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
00599 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
00600 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
00601 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
00602 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
00603 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
00604 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
00605 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
00606 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
00607 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
00608 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
00609 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
00610 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
00611 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
00612 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
00613 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
00614 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
00615 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
00616 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
00617 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
00618 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
00619 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
00620 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
00621 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
00622 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
00623 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
00624 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
00625 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
00626 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
00627 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
00628 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
00629 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
00630 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
00631 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
00632 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
00633 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
00634 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
00635 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
00636 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
00637 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
00638 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
00639 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
00640 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
00641 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
00642 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
00643 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
00644 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
00645 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
00646 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
00647 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
00648 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
00649 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
00650 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
00651 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
00652 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
00653 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
00654 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
00655 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
00656 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
00657 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
00658 };
00659
00660 __constant__ __device__ uint8_t Td4_ConstMem[256] = {
00661 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
00662 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
00663 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
00664 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
00665 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
00666 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
00667 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
00668 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
00669 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
00670 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
00671 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
00672 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
00673 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
00674 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
00675 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
00676 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
00677 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
00678 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
00679 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
00680 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
00681 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
00682 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
00683 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
00684 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
00685 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
00686 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
00687 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
00688 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
00689 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
00690 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
00691 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
00692 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
00693 };
00694
00695
00696
00697
00698
00699 __host__ __device__ uint32_t GETU32(const uint8_t *pt)
00700 {
00701 uint32_t i = *((uint32_t*)pt);
00702 return ((i & 0x000000ffU) << 24) ^
00703 ((i & 0x0000ff00U) << 8) ^
00704 ((i & 0x00ff0000U) >> 8) ^
00705 ((i & 0xff000000U) >> 24);
00706 }
00707
00708 __device__ void PUTU32(uint8_t *ct, uint32_t st)
00709 {
00710 *((uint32_t*)ct) = ((st >> 24) ^
00711 (((st << 8) >> 24 ) << 8) ^
00712 (((st << 16) >> 24) << 16) ^
00713 (st << 24));
00714 }
00715
00716 __device__ void next_rk_128(uint32_t* rk,
00717 const int round,
00718 const uint32_t Te0[],
00719 const uint32_t Te1[],
00720 const uint32_t Te2[],
00721 const uint32_t Te3[],
00722 const uint32_t rcon[])
00723 {
00724 uint32_t temp;
00725 temp = rk[3];
00726 rk[0] = rk[0] ^
00727 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
00728 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
00729 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
00730 (Te1[(temp >> 24) ] & 0x000000ff) ^
00731 rcon[round];
00732 rk[1] = rk[1] ^ rk[0];
00733 rk[2] = rk[2] ^ rk[1];
00734 rk[3] = rk[3] ^ rk[2];
00735 }
00736
00737
00738 __device__ void next_rk_128_decrypt(uint32_t* rk,
00739 const int round,
00740 const uint32_t Te0[],
00741 const uint32_t Te1[],
00742 const uint32_t Te2[],
00743 const uint32_t Te3[],
00744 const uint32_t rcon[])
00745 {
00746 rk[3] = rk[2] ^ rk[3];
00747 rk[2] = rk[1] ^ rk[2];
00748 rk[1] = rk[0] ^ rk[1];
00749 rk[0] = rk[0] ^
00750 (Te2[(rk[3] >> 16) & 0xff] & 0xff000000) ^
00751 (Te3[(rk[3] >> 8) & 0xff] & 0x00ff0000) ^
00752 (Te0[(rk[3] ) & 0xff] & 0x0000ff00) ^
00753 (Te1[(rk[3] >> 24) ] & 0x000000ff) ^
00754 rcon[round];
00755 }
00756
00757
00758 __device__ void
00759 AES_128_encrypt(const uint8_t *in,
00760 uint8_t *out,
00761 const uint8_t *key,
00762 const uint32_t Te0[],
00763 const uint32_t Te1[],
00764 const uint32_t Te2[],
00765 const uint32_t Te3[],
00766 const uint32_t rcon[])
00767 {
00768 uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
00769
00770
00771 uint32_t rk[4];
00772 rk[0] = GETU32(key);
00773 rk[1] = GETU32(key + 4);
00774 rk[2] = GETU32(key + 8);
00775 rk[3] = GETU32(key + 12);
00776
00777
00778
00779
00780
00781 s0 = GETU32(in ) ^ rk[0];
00782 s1 = GETU32(in + 4) ^ rk[1];
00783 s2 = GETU32(in + 8) ^ rk[2];
00784 s3 = GETU32(in + 12) ^ rk[3];
00785
00786
00787
00788
00789 next_rk_128(rk, 0, Te0, Te1, Te2, Te3, rcon);
00790 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 0];
00791 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 1];
00792 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 2];
00793 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 3];
00794
00795
00796 next_rk_128(rk, 1, Te0, Te1, Te2, Te3, rcon);
00797 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
00798 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
00799 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
00800 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
00801
00802
00803 next_rk_128(rk, 2, Te0, Te1, Te2, Te3, rcon);
00804 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 0];
00805 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 1];
00806 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 2];
00807 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 3];
00808
00809 next_rk_128(rk, 3, Te0, Te1, Te2, Te3, rcon);
00810 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
00811 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
00812 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
00813 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
00814
00815 next_rk_128(rk, 4, Te0, Te1, Te2, Te3, rcon);
00816 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 0];
00817 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 1];
00818 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 2];
00819 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 3];
00820
00821 next_rk_128(rk, 5, Te0, Te1, Te2, Te3, rcon);
00822 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
00823 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
00824 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
00825 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
00826
00827 next_rk_128(rk, 6, Te0, Te1, Te2, Te3, rcon);
00828 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 0];
00829 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 1];
00830 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 2];
00831 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 3];
00832
00833 next_rk_128(rk, 7, Te0, Te1, Te2, Te3, rcon);
00834 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
00835 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
00836 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
00837 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
00838
00839 next_rk_128(rk, 8, Te0, Te1, Te2, Te3, rcon);
00840 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 0];
00841 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 1];
00842 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 2];
00843 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 3];
00844
00845
00846
00847
00848
00849
00850 next_rk_128(rk, 9, Te0, Te1, Te2, Te3, rcon);
00851 s0 =
00852 (Te2[(t0 >> 24) ] & 0xff000000) ^
00853 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
00854 (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
00855 (Te1[(t3 ) & 0xff] & 0x000000ff) ^
00856 rk[0];
00857 PUTU32(out , s0);
00858
00859 s1 =
00860 (Te2[(t1 >> 24) ] & 0xff000000) ^
00861 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
00862 (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
00863 (Te1[(t0 ) & 0xff] & 0x000000ff) ^
00864 rk[1];
00865 PUTU32(out + 4, s1);
00866
00867 s2 =
00868 (Te2[(t2 >> 24) ] & 0xff000000) ^
00869 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
00870 (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
00871 (Te1[(t1 ) & 0xff] & 0x000000ff) ^
00872 rk[2];
00873 PUTU32(out + 8, s2);
00874
00875 s3 =
00876 (Te2[(t3 >> 24) ] & 0xff000000) ^
00877 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
00878 (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
00879 (Te1[(t2 ) & 0xff] & 0x000000ff) ^
00880 rk[3];
00881 PUTU32(out + 12, s3);
00882 }
00883
00884 __device__ void
00885 AES_128_decrypt(const uint8_t *in,
00886 uint8_t *out,
00887 uint32_t *rk,
00888 const uint32_t Td0[],
00889 const uint32_t Td1[],
00890 const uint32_t Td2[],
00891 const uint32_t Td3[],
00892 const uint8_t Td4[],
00893 const uint32_t Te0[],
00894 const uint32_t Te1[],
00895 const uint32_t Te2[],
00896 const uint32_t Te3[],
00897 const uint32_t rcon[])
00898 {
00899 uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
00900
00901
00902
00903 uint32_t temp[4];
00904
00905
00906
00907
00908
00909 s0 = GETU32(in ) ^ rk[0];
00910 s1 = GETU32(in + 4) ^ rk[1];
00911 s2 = GETU32(in + 8) ^ rk[2];
00912 s3 = GETU32(in + 12) ^ rk[3];
00913
00914
00915 next_rk_128_decrypt(rk, 9, Te0, Te1, Te2, Te3, rcon);
00916 temp[0] =
00917 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
00918 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
00919 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
00920 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
00921 temp[1] =
00922 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
00923 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
00924 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
00925 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
00926 temp[2] =
00927 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
00928 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
00929 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
00930 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
00931 temp[3] =
00932 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
00933 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
00934 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
00935 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
00936
00937 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ temp[0];
00938 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ temp[1];
00939 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ temp[2];
00940 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ temp[3];
00941
00942 next_rk_128_decrypt(rk, 8, Te0, Te1, Te2, Te3, rcon);
00943 temp[0] =
00944 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
00945 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
00946 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
00947 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
00948 temp[1] =
00949 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
00950 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
00951 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
00952 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
00953 temp[2] =
00954 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
00955 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
00956 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
00957 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
00958 temp[3] =
00959 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
00960 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
00961 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
00962 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
00963 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ temp[0];
00964 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ temp[1];
00965 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ temp[2];
00966 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ temp[3];
00967
00968 next_rk_128_decrypt(rk, 7, Te0, Te1, Te2, Te3, rcon);
00969 temp[0] =
00970 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
00971 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
00972 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
00973 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
00974 temp[1] =
00975 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
00976 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
00977 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
00978 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
00979 temp[2] =
00980 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
00981 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
00982 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
00983 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
00984 temp[3] =
00985 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
00986 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
00987 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
00988 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
00989 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ temp[0];
00990 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ temp[1];
00991 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ temp[2];
00992 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ temp[3];
00993
00994 next_rk_128_decrypt(rk, 6, Te0, Te1, Te2, Te3, rcon);
00995 temp[0] =
00996 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
00997 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
00998 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
00999 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01000 temp[1] =
01001 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01002 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01003 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01004 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01005 temp[2] =
01006 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01007 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01008 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01009 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01010 temp[3] =
01011 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01012 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01013 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01014 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01015 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ temp[0];
01016 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ temp[1];
01017 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ temp[2];
01018 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ temp[3];
01019
01020 next_rk_128_decrypt(rk, 5, Te0, Te1, Te2, Te3, rcon);
01021 temp[0] =
01022 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
01023 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
01024 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
01025 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01026 temp[1] =
01027 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01028 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01029 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01030 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01031 temp[2] =
01032 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01033 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01034 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01035 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01036 temp[3] =
01037 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01038 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01039 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01040 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01041 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ temp[0];
01042 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ temp[1];
01043 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ temp[2];
01044 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ temp[3];
01045
01046 next_rk_128_decrypt(rk, 4, Te0, Te1, Te2, Te3, rcon);
01047 temp[0] =
01048 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
01049 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
01050 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
01051 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01052 temp[1] =
01053 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01054 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01055 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01056 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01057 temp[2] =
01058 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01059 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01060 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01061 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01062 temp[3] =
01063 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01064 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01065 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01066 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01067 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ temp[0];
01068 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ temp[1];
01069 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ temp[2];
01070 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ temp[3];
01071
01072 next_rk_128_decrypt(rk, 3, Te0, Te1, Te2, Te3, rcon);
01073 temp[0] =
01074 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
01075 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
01076 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
01077 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01078 temp[1] =
01079 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01080 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01081 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01082 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01083 temp[2] =
01084 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01085 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01086 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01087 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01088 temp[3] =
01089 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01090 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01091 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01092 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01093 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ temp[0];
01094 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ temp[1];
01095 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ temp[2];
01096 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ temp[3];
01097
01098 next_rk_128_decrypt(rk, 2, Te0, Te1, Te2, Te3, rcon);
01099 temp[0] =
01100 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
01101 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
01102 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
01103 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01104 temp[1] =
01105 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01106 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01107 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01108 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01109 temp[2] =
01110 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01111 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01112 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01113 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01114 temp[3] =
01115 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01116 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01117 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01118 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01119 s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ temp[0];
01120 s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ temp[1];
01121 s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ temp[2];
01122 s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ temp[3];
01123
01124 next_rk_128_decrypt(rk, 1, Te0, Te1, Te2, Te3, rcon);
01125 temp[0] =
01126 Td0[Te1[(rk[0] >> 24) ] & 0xff] ^
01127 Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
01128 Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
01129 Td3[Te1[(rk[0] ) & 0xff] & 0xff];
01130 temp[1] =
01131 Td0[Te1[(rk[1] >> 24) ] & 0xff] ^
01132 Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
01133 Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
01134 Td3[Te1[(rk[1] ) & 0xff] & 0xff];
01135 temp[2] =
01136 Td0[Te1[(rk[2] >> 24) ] & 0xff] ^
01137 Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
01138 Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
01139 Td3[Te1[(rk[2] ) & 0xff] & 0xff];
01140 temp[3] =
01141 Td0[Te1[(rk[3] >> 24) ] & 0xff] ^
01142 Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
01143 Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
01144 Td3[Te1[(rk[3] ) & 0xff] & 0xff];
01145 t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ temp[0];
01146 t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ temp[1];
01147 t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ temp[2];
01148 t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ temp[3];
01149
01150 next_rk_128_decrypt(rk, 0, Te0, Te1, Te2, Te3, rcon);
01151
01152
01153
01154
01155
01156 s0 =
01157 (Td4[(t0 >> 24) ] << 24) ^
01158 (Td4[(t3 >> 16) & 0xff] << 16) ^
01159 (Td4[(t2 >> 8) & 0xff] << 8) ^
01160 (Td4[(t1 ) & 0xff]) ^
01161 rk[0];
01162 PUTU32(out , s0);
01163 s1 =
01164 (Td4[(t1 >> 24) ] << 24) ^
01165 (Td4[(t0 >> 16) & 0xff] << 16) ^
01166 (Td4[(t3 >> 8) & 0xff] << 8) ^
01167 (Td4[(t2 ) & 0xff]) ^
01168 rk[1];
01169 PUTU32(out + 4, s1);
01170 s2 =
01171 (Td4[(t2 >> 24) ] << 24) ^
01172 (Td4[(t1 >> 16) & 0xff] << 16) ^
01173 (Td4[(t0 >> 8) & 0xff] << 8) ^
01174 (Td4[(t3 ) & 0xff]) ^
01175 rk[2];
01176 PUTU32(out + 8, s2);
01177 s3 =
01178 (Td4[(t3 >> 24) ] << 24) ^
01179 (Td4[(t2 >> 16) & 0xff] << 16) ^
01180 (Td4[(t1 >> 8) & 0xff] << 8) ^
01181 (Td4[(t0 ) & 0xff]) ^
01182 rk[3];
01183 PUTU32(out + 12, s3);
01184 }
01185
01186
01187
01188
01189
01190 __device__ void next_rk_256(uint32_t* rk,
01191 const int round,
01192 const uint32_t Te0[],
01193 const uint32_t Te1[],
01194 const uint32_t Te2[],
01195 const uint32_t Te3[],
01196 const uint32_t rcon[])
01197 {
01198 uint32_t temp;
01199 temp = rk[7];
01200 rk[0] = rk[0] ^
01201 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
01202 (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
01203 (Te0[(temp ) & 0xff] & 0x0000ff00) ^
01204 (Te1[(temp >> 24) ] & 0x000000ff) ^
01205 rcon[round];
01206 rk[1] = rk[1] ^ rk[0];
01207 rk[2] = rk[2] ^ rk[1];
01208 rk[3] = rk[3] ^ rk[2];
01209 temp = rk[3];
01210 rk[4] = rk[4] ^
01211 (Te2[(temp >> 24) ] & 0xff000000) ^
01212 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
01213 (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
01214 (Te1[(temp ) & 0xff] & 0x000000ff);
01215 rk[5] = rk[5] ^ rk[4];
01216 rk[6] = rk[6] ^ rk[5];
01217 rk[7] = rk[7] ^ rk[6];
01218 }
01219
01220 __device__ void
01221 AES_256_encrypt(const uint8_t *in,
01222 uint8_t *out,
01223 const uint8_t *key,
01224 const uint32_t Te0[],
01225 const uint32_t Te1[],
01226 const uint32_t Te2[],
01227 const uint32_t Te3[],
01228 const uint32_t rcon[])
01229 {
01230 uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
01231
01232 uint32_t rk[8];
01233 rk[0] = GETU32(key);
01234 rk[1] = GETU32(key + 4);
01235 rk[2] = GETU32(key + 8);
01236 rk[3] = GETU32(key + 12);
01237 rk[4] = GETU32(key + 16);
01238 rk[5] = GETU32(key + 20);
01239 rk[6] = GETU32(key + 24);
01240 rk[7] = GETU32(key + 28);
01241
01242
01243
01244
01245
01246 s0 = GETU32(in ) ^ rk[0];
01247 s1 = GETU32(in + 4) ^ rk[1];
01248 s2 = GETU32(in + 8) ^ rk[2];
01249 s3 = GETU32(in + 12) ^ rk[3];
01250
01251
01252
01253
01254 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01255 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01256 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01257 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01258
01259 next_rk_256(rk, 0, Te0, Te1, Te2, Te3, rcon);
01260
01261
01262 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01263 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01264 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01265 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01266
01267
01268 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01269 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01270 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01271 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01272
01273
01274 next_rk_256(rk, 1, Te0, Te1, Te2, Te3, rcon);
01275 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01276 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01277 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01278 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01279
01280
01281 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01282 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01283 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01284 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01285
01286
01287 next_rk_256(rk, 2, Te0, Te1, Te2, Te3, rcon);
01288 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01289 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01290 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01291 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01292
01293
01294 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01295 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01296 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01297 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01298
01299
01300 next_rk_256(rk, 3, Te0, Te1, Te2, Te3, rcon);
01301 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01302 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01303 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01304 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01305
01306 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01307 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01308 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01309 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01310
01311 next_rk_256(rk, 4, Te0, Te1, Te2, Te3, rcon);
01312
01313 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01314 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01315 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01316 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01317
01318 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01319 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01320 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01321 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01322 next_rk_256(rk, 5, Te0, Te1, Te2, Te3, rcon);
01323
01324 s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 0];
01325 s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 1];
01326 s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[ 2];
01327 s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[ 3];
01328
01329
01330 t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
01331 t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
01332 t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
01333 t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
01334
01335 next_rk_256(rk, 6, Te0, Te1, Te2, Te3, rcon);
01336
01337 s0 =
01338 (Te2[(t0 >> 24) ] & 0xff000000) ^
01339 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
01340 (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
01341 (Te1[(t3 ) & 0xff] & 0x000000ff) ^
01342 rk[0];
01343 PUTU32(out , s0);
01344
01345 s1 =
01346 (Te2[(t1 >> 24) ] & 0xff000000) ^
01347 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
01348 (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
01349 (Te1[(t0 ) & 0xff] & 0x000000ff) ^
01350 rk[1];
01351 PUTU32(out + 4, s1);
01352
01353 s2 =
01354 (Te2[(t2 >> 24) ] & 0xff000000) ^
01355 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
01356 (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
01357 (Te1[(t1 ) & 0xff] & 0x000000ff) ^
01358 rk[2];
01359 PUTU32(out + 8, s2);
01360
01361 s3 =
01362 (Te2[(t3 >> 24) ] & 0xff000000) ^
01363 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
01364 (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
01365 (Te1[(t2 ) & 0xff] & 0x000000ff) ^
01366 rk[3];
01367 PUTU32(out + 12, s3);
01368 }
01369
01370
01371 #endif