From b52ab92e59e9efea5e8b58548f70558c1b26e41b Mon Sep 17 00:00:00 2001 From: thatguymike Date: Wed, 2 Dec 2015 20:27:18 -0800 Subject: [PATCH] Cross merge fixes from Boris --- 3rdparty/cub/cub/util_allocator.cuh | 7 ++++- include/caffe/util/gpu_memory.hpp | 14 ++++++--- src/caffe/util/gpu_memory.cpp | 49 +++++++++++++++-------------- tools/caffe.cpp | 2 +- 4 files changed, 42 insertions(+), 30 deletions(-) diff --git a/3rdparty/cub/cub/util_allocator.cuh b/3rdparty/cub/cub/util_allocator.cuh index 7f6bebe7f74..8de32cdbc47 100644 --- a/3rdparty/cub/cub/util_allocator.cuh +++ b/3rdparty/cub/cub/util_allocator.cuh @@ -407,6 +407,11 @@ struct CachingDeviceAllocator while ( (block_itr != cached_blocks.end()) && (block_itr->device == device) && (block_itr->bin == search_key.bin)) { + + // use special rule for the last ("exact size") bin: set max memory overuse to 1/8th + if (search_key.bin == (unsigned int) -1 && (block_itr->bytes - search_key.bytes)*8UL > search_key.bytes) + break; + cudaStream_t prev_stream = block_itr->associated_stream; if ((active_stream == prev_stream) || (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) { @@ -662,7 +667,7 @@ struct CachingDeviceAllocator cached_blocks.erase(begin); if (debug) CubLog("\tdevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", - current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].free); + current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].busy); } Unlock(&spin_lock); diff --git a/include/caffe/util/gpu_memory.hpp b/include/caffe/util/gpu_memory.hpp index b1a6c919cf7..c57d98a78be 100644 --- a/include/caffe/util/gpu_memory.hpp +++ b/include/caffe/util/gpu_memory.hpp @@ -83,11 +83,17 @@ class gpu_memory { static void init(const std::vector&, PoolMode, bool); static void destroy(); - static bool initialized_; - static PoolMode mode_; - static size_t poolsize_; - static bool debug_; + static bool initialized_; + static PoolMode mode_; + static bool debug_; + #ifndef CPU_ONLY + struct MemInfo { + size_t free; + size_t total; + }; + + static vector dev_info_; public: typedef void* pointer; diff --git a/src/caffe/util/gpu_memory.cpp b/src/caffe/util/gpu_memory.cpp index 65e9b1cc19c..8db4178530e 100644 --- a/src/caffe/util/gpu_memory.cpp +++ b/src/caffe/util/gpu_memory.cpp @@ -12,10 +12,10 @@ namespace caffe { #ifndef CPU_ONLY // CPU-only Caffe. static cub::CachingDeviceAllocator* cubAlloc = 0; + vector gpu_memory::dev_info_; #endif gpu_memory::PoolMode gpu_memory::mode_ = gpu_memory::NoPool; - size_t gpu_memory::poolsize_ = 0; bool gpu_memory::debug_ = false; #ifdef CPU_ONLY // CPU-only Caffe. @@ -43,8 +43,7 @@ namespace caffe { } if (debug) std::cout << "gpu_memory initialized with " - << getPoolName() << ". Poolsize = " - << (1.0*poolsize_)/(1024.0*1024.0*1024.0) << " G." << std::endl; + << getPoolName() << std::endl; } void gpu_memory::destroy() { @@ -101,42 +100,42 @@ namespace caffe { CUDA_CHECK(cudaGetDevice(&initial_device)); for (int i = 0; i < gpus.size(); i++) { + int cur_device = gpus[i]; + if (cur_device+1 > dev_info_.size()) + dev_info_.resize(cur_device+1); + CUDA_CHECK(cudaSetDevice(gpus[i])); - size_t free_mem, total_mem; cudaDeviceProp props; - CUDA_CHECK(cudaGetDeviceProperties(&props, gpus[i])); - CUDA_CHECK(cudaMemGetInfo(&free_mem, &total_mem)); + CUDA_CHECK(cudaGetDeviceProperties(&props, cur_device)); + CUDA_CHECK(cudaMemGetInfo(&dev_info_[cur_device].free, + &dev_info_[cur_device].total)); if (debug_) { std::cout << "cudaGetDeviceProperties: Mem = " - << props.totalGlobalMem <