Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion 3rdparty/cub/cub/util_allocator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,11 @@ struct CachingDeviceAllocator
while ( (block_itr != cached_blocks.end())
&& (block_itr->device == device)
&& (block_itr->bin == search_key.bin)) {

// use special rule for the last ("exact size") bin: set max memory overuse to 1/8th
if (search_key.bin == (unsigned int) -1 && (block_itr->bytes - search_key.bytes)*8UL > search_key.bytes)
break;

cudaStream_t prev_stream = block_itr->associated_stream;
if ((active_stream == prev_stream)
|| (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) {
Expand Down Expand Up @@ -662,7 +667,7 @@ struct CachingDeviceAllocator
cached_blocks.erase(begin);

if (debug) CubLog("\tdevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].free);
current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].busy);
}

Unlock(&spin_lock);
Expand Down
14 changes: 10 additions & 4 deletions include/caffe/util/gpu_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,17 @@ class gpu_memory {
static void init(const std::vector<int>&, PoolMode, bool);
static void destroy();

static bool initialized_;
static PoolMode mode_;
static size_t poolsize_;
static bool debug_;
static bool initialized_;
static PoolMode mode_;
static bool debug_;

#ifndef CPU_ONLY
struct MemInfo {
size_t free;
size_t total;
};

static vector<MemInfo> dev_info_;

public:
typedef void* pointer;
Expand Down
49 changes: 25 additions & 24 deletions src/caffe/util/gpu_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ namespace caffe {

#ifndef CPU_ONLY // CPU-only Caffe.
static cub::CachingDeviceAllocator* cubAlloc = 0;
vector<gpu_memory::MemInfo> gpu_memory::dev_info_;
#endif

gpu_memory::PoolMode gpu_memory::mode_ = gpu_memory::NoPool;
size_t gpu_memory::poolsize_ = 0;
bool gpu_memory::debug_ = false;

#ifdef CPU_ONLY // CPU-only Caffe.
Expand Down Expand Up @@ -43,8 +43,7 @@ namespace caffe {
}
if (debug)
std::cout << "gpu_memory initialized with "
<< getPoolName() << ". Poolsize = "
<< (1.0*poolsize_)/(1024.0*1024.0*1024.0) << " G." << std::endl;
<< getPoolName() << std::endl;
}

void gpu_memory::destroy() {
Expand Down Expand Up @@ -101,42 +100,42 @@ namespace caffe {
CUDA_CHECK(cudaGetDevice(&initial_device));

for (int i = 0; i < gpus.size(); i++) {
int cur_device = gpus[i];
if (cur_device+1 > dev_info_.size())
dev_info_.resize(cur_device+1);

CUDA_CHECK(cudaSetDevice(gpus[i]));
size_t free_mem, total_mem;
cudaDeviceProp props;
CUDA_CHECK(cudaGetDeviceProperties(&props, gpus[i]));
CUDA_CHECK(cudaMemGetInfo(&free_mem, &total_mem));
CUDA_CHECK(cudaGetDeviceProperties(&props, cur_device));
CUDA_CHECK(cudaMemGetInfo(&dev_info_[cur_device].free,
&dev_info_[cur_device].total));

if (debug_) {
std::cout << "cudaGetDeviceProperties: Mem = "
<< props.totalGlobalMem <<std:: endl;
std::cout << "cudaMemGetInfo: Free= " << free_mem
<< " Total= " << total_mem << std::endl;
<< props.totalGlobalMem <<std::endl;
std::cout << "cudaMemGetInfo_[" << cur_device
<<": Free= " << dev_info_[cur_device].free
<< " Total= " << dev_info_[cur_device].total << std::endl;
}

// make sure we don't ask for more that total device memory
free_mem = std::min(total_mem, free_mem);
free_mem = size_t(0.95*std::min(props.totalGlobalMem, free_mem));
// find out the smallest GPU size
if (poolsize_ == 0 || poolsize_ > free_mem)
poolsize_ = free_mem;
dev_info_[i].free = std::min(dev_info_[cur_device].total,
dev_info_[cur_device].free);
dev_info_[i].free = std::min(props.totalGlobalMem,
dev_info_[cur_device].free);
}


switch ( mode_ ) {
case CubPool:
try {
// if you are paranoid, that doesn't mean they are not after you :)
// just in case someone installed 'no cleanup' arena before
delete cubAlloc;

cubAlloc = new cub::CachingDeviceAllocator( 2, // defaults
cubAlloc = new cub::CachingDeviceAllocator( 2,
6,
32, // largest
// cached
// allocation
// becomes
// 2^32 here
poolsize_,
16,
(size_t)-1,
false,
debug_);
}
Expand Down Expand Up @@ -164,13 +163,15 @@ namespace caffe {
case CubPool:
int cur_device;
CUDA_CHECK(cudaGetDevice(&cur_device));
*total_mem = poolsize_;
*total_mem = dev_info_[cur_device].total;
// Free memory is initial free memory minus outstanding allocations.
// Assuming we only allocate via gpu_memory since its constructon.
*free_mem = poolsize_ - cubAlloc->cached_bytes[cur_device].busy;
*free_mem = dev_info_[cur_device].free -
cubAlloc->cached_bytes[cur_device].busy;
break;
default:
CUDA_CHECK(cudaMemGetInfo(free_mem, total_mem));
break;
}
}
#endif // CPU_ONLY
Expand Down
2 changes: 1 addition & 1 deletion tools/caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ int main(int argc, char** argv) {
// initialize gpu memory arena
vector<int> gpus;
get_gpus(&gpus);
caffe::gpu_memory::arena arena(gpus);
caffe::gpu_memory::arena arena(gpus, caffe::gpu_memory::DefaultPool, false);

if (argc == 2) {
#ifdef WITH_PYTHON_LAYER
Expand Down