00001 #ifndef __DEVICE_CONTEXT__
00002 #define __DEVICE_CONTEXT__
00003
00004 #include "pinned_mem_pool.hh"
00005 #include "cuda_mem_pool.hh"
00006
00007 #include <cuda_runtime.h>
00008 #include <assert.h>
00009
00010 #define MAX_STREAM 16
00011 #define MAX_BLOCKS 8192
00012
00016 enum STATE {
00017 READY,
00018 WAIT_KERNEL,
00019 WAIT_COPY,
00020 };
00021
00022
00023 struct stream_context{
00024 cuda_mem_pool pool;
00025 cudaStream_t stream;
00026
00027 STATE state;
00028 bool finished;
00029
00030 uint8_t *checkbits;
00031 uint8_t *checkbits_d;
00032 unsigned int num_blks;
00033
00034 uint64_t begin_usec;
00035 uint64_t end_usec;
00036 };
00037
00043 class device_context{
00044 public:
00045 device_context();
00046 ~device_context();
00047
00058 bool init(const unsigned long size, const unsigned nstream);
00059
00068 bool sync(const unsigned stream_id, const bool block=true);
00069
00076 void set_state(const unsigned stream_id, const STATE state);
00077
00084 STATE get_state(const unsigned stream_id);
00085
00102 uint8_t *get_dev_checkbits(const unsigned stream_id);
00103
00110 void clear_checkbits(const unsigned stream_id, const unsigned num_blks);
00111
00112
00119 cudaStream_t get_stream(const unsigned stream_id);
00120
00126 bool use_stream() { return (nstream_ != 0); };
00127
00135 class cuda_mem_pool *get_cuda_mem_pool(const unsigned stream_id);
00136
00146 uint64_t get_elapsed_time(const unsigned stream_id)
00147 {
00148 assert(0 <= stream_id && stream_id <= nstream_);
00149 assert((stream_id == 0) ^ (nstream_ > 0));
00150 return stream_ctx_[stream_id].end_usec - stream_ctx_[stream_id].begin_usec;
00151 }
00152 private:
00153 struct stream_context stream_ctx_[MAX_STREAM + 1];
00154 unsigned int nstream_;
00155 bool init_;
00156 };
00157
00158
00159 #endif