sha_context Class Reference

#include <sha_context.hh>

Collaboration diagram for sha_context:

Public Member Functions
	sha_context (device_context *dev_ctx)
void	hmac_sha1 (const void memory_start, const unsigned long in_pos, const unsigned long keys_pos, const unsigned long pkt_offset_pos, const unsigned long lengths_pos, const unsigned long data_size, unsigned char out, const unsigned long num_flows, unsigned int stream_id)
bool	sync (const unsigned int stream_id, const bool block=true, const bool copy_result=true)

Detailed Description

class sha_context

Interface for HMAC-SHA1 in GPU.

Constructor & Destructor Documentation

sha_context::sha_context ( device_context * dev_ctx )

Constructior.

Parameters:

dev_ctx

Device context pointer. Device context must be initialized before calling this function.

00009 {
00010         for (unsigned i = 0; i <MAX_STREAM; i++) {
00011                 streams[i].out = 0;
00012                 streams[i].out_d = 0;
00013                 streams[i].out_len = 0;
00014         }
00015         dev_ctx_ = dev_ctx;
00016 }

Member Function Documentation

void sha_context::hmac_sha1	(	const void *	memory_start,
		const unsigned long	in_pos,
		const unsigned long	keys_pos,
		const unsigned long	pkt_offset_pos,
		const unsigned long	lengths_pos,
		const unsigned long	data_size,
		unsigned char *	out,
		const unsigned long	num_flows,
		unsigned int	stream_id
	)

It executes hmac_sha1 in GPU. If stream is enabled it will run in non-blocking mode, if not, it will run in blocking mode and the result will be written back to out at the end of function call. This function takes one or more data and returns HMAC-SHA1 value for all of them.

Parameters:

	memory_start	Starting point of input data. All input data should be be packed in to single continous region before making call to this function.
	in_pos	Offset of plain texts.
	keys_pos	Offset of region that stores HHAC keys.
	pkt_offset_pos	Offset of region that stores position of each plain text.
	lengths_pos	Offset of region that stores length of each plain text.
	data_size	Total amount of input data.
	out	Buffer to store output.
	num_flows	Number of plain texts to be hashed.
	stream_id	Stream index.

00032 00033 00034 00035 00036 00037 00038 00039 00040 00041 00042 00043 00044 00045 00046 00047 00048 00049 00050 00051 00052 00053 00054 00055 00056 00057 00058 00059 00060 00061 00062 00063 00064 00065 00066 00067 00068 00069 00070 00071 00072 00073 00074 00075 00076 00077 00078 00079 00080 00081         } 00082 00083 00084 00085 00086 00087 00088 00089 00090 00091 00092         } 00093 }

class="fragment">00031 { assert(dev_ctx_->get_state(stream_id) == READY); dev_ctx_->set_state(stream_id, WAIT_KERNEL); cuda_mem_pool *pool = dev_ctx_->get_cuda_mem_pool(stream_id); void *memory_d = pool->alloc(data_size);; //copy input data cudaMemcpyAsync(memory_d, memory_start, data_size, cudaMemcpyHostToDevice, dev_ctx_->get_stream(stream_id)); //variables need for kernel launch int threads_per_blk = SHA1_THREADS_PER_BLK; int num_blks = (num_flows+threads_per_blk-1)/threads_per_blk; //allocate buffer for output uint32_t *out_d = (uint32_t *)pool->alloc(20 * num_flows); //initialize input memory offset in device memory char *in_d = (char *)memory_d + in_pos; char *keys_d = (char *)memory_d + keys_pos; uint32_t *pkt_offset_d = (uint32_t *)((uint8_t *)memory_d + offsets_pos); uint16_t *lengths_d = (uint16_t *)((uint8_t *)memory_d + lengths_pos); //clear checkbits before kernel execution dev_ctx_->clear_checkbits(stream_id, num_blks); if (dev_ctx_->use_stream() && stream_id > 0) { //with stream hmac_sha1_gpu(in_d, keys_d, pkt_offset_d, lengths_d, out_d, num_flows, dev_ctx_->get_dev_checkbits(stream_id), threads_per_blk, dev_ctx_->get_stream(stream_id)); } else if (!dev_ctx_->use_stream() && stream_id == 0) {//w/o stream hmac_sha1_gpu(in_d, keys_d, pkt_offset_d, lengths_d, out_d, num_flows, dev_ctx_->get_dev_checkbits(stream_id), SHA1_THREADS_PER_BLK); } else { assert(0); assert(cudaGetLastError() == cudaSuccess); streams[stream_id].out_d = (uint8_t*)out_d; streams[stream_id].out = out; streams[stream_id].out_len = 20 * num_flows; //if stream is not used then sync (assuming blocking mode) if (dev_ctx_->use_stream() && stream_id == 0) { sync(stream_id);

Here is the call graph for this function:

bool sha_context::sync	(	const unsigned int	stream_id,
		const bool	block = `true`,
		const bool	copy_result = `true`
	)

Synchronize/query the execution on the stream. This function can be used to check whether the current execution on the stream is finished or also be used to wait until the execution to be finished.

Parameters:

	stream_id	Stream index.
	block	Wait for the execution to finish or not. true by default.
	copy_result	If false, it will not copy result back to CPU.

Returns:: true if the current operation on the stream is finished otherwise false.

00098 {
00099         if (block) {
00100                 dev_ctx_->sync(stream_id, true);
00101                 if (copy_result && dev_ctx_->get_state(stream_id) == WAIT_KERNEL) {
00102                         cutilSafeCall(cudaMemcpyAsync(streams[stream_id].out,
00103                                                       streams[stream_id].out_d,
00104                                                       streams[stream_id].out_len,
00105                                                       cudaMemcpyDeviceToHost,
00106                                                       dev_ctx_->get_stream(stream_id)));
00107                         dev_ctx_->set_state(stream_id, WAIT_COPY);
00108                         dev_ctx_->sync(stream_id, true);
00109                 }
00110                 if (dev_ctx_->get_state(stream_id) == WAIT_COPY) {
00111                         dev_ctx_->sync(stream_id, true);
00112                         dev_ctx_->set_state(stream_id, READY);
00113                 }
00114                 return true;
00115         } else {
00116                 if (!dev_ctx_->sync(stream_id, false))
00117                         return false;
00118 
00119                 if (dev_ctx_->get_state(stream_id) == WAIT_KERNEL) {
00120                         //if no need for data copy
00121                         if (!copy_result) {
00122                                 dev_ctx_->set_state(stream_id, READY);
00123                                 return true;
00124                         }
00125 
00126                         cutilSafeCall(cudaMemcpyAsync(streams[stream_id].out,
00127                                                       streams[stream_id].out_d,
00128                                                       streams[stream_id].out_len,
00129                                                       cudaMemcpyDeviceToHost,
00130                                                       dev_ctx_->get_stream(stream_id)));
00131                         dev_ctx_->set_state(stream_id, WAIT_COPY);
00132 
00133                 } else if (dev_ctx_->get_state(stream_id) == WAIT_COPY) {
00134                         dev_ctx_->set_state(stream_id, READY);
00135                         return true;
00136 
00137                 } else if (dev_ctx_->get_state(stream_id) == READY) {
00138                         return true;
00139 
00140                 } else {
00141                         assert(0);
00142                 }
00143         }
00144         return false;
00145 }

Here is the call graph for this function:

Here is the caller graph for this function:

sha_context Class Reference

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation