From 83522fb9068c64c1120f26fffe7150472a170804 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Fri, 30 Sep 2016 14:01:44 -0700 Subject: [PATCH] Make sure memory is allocated to only one device, enforce if debug --- include/caffe/solver.hpp | 1 + include/caffe/syncedmem.hpp | 15 ++++----- src/caffe/solver.cpp | 15 +++++++++ src/caffe/syncedmem.cpp | 62 ++++++++++++++++++++++++++++--------- 4 files changed, 69 insertions(+), 24 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index eafcee32904..ae9eca503c1 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -142,6 +142,7 @@ class WorkerSolver : public Solver { explicit WorkerSolver(const SolverParameter& param, const Solver* root_solver = NULL) : Solver(param, root_solver) {} + virtual ~WorkerSolver(); protected: void ApplyUpdate() {} diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 38ee4664028..9969bc7e4d2 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -44,14 +44,8 @@ inline void CaffeFreeHost(void* ptr, bool use_cuda) { */ class SyncedMemory { public: - SyncedMemory() - : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), - own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false), - gpu_device_(-1) {} - explicit SyncedMemory(size_t size) - : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), - own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false), - gpu_device_(-1) {} + SyncedMemory(); + explicit SyncedMemory(size_t size); ~SyncedMemory(); const void* cpu_data(); void set_cpu_data(void* data); @@ -68,6 +62,9 @@ class SyncedMemory { #endif private: + void check_device(); + void check_device(void* data); + void to_cpu(); void to_gpu(); void* cpu_ptr_; @@ -77,7 +74,7 @@ class SyncedMemory { bool own_cpu_data_; bool cpu_malloc_use_cuda_; bool own_gpu_data_; - int gpu_device_; + int device_; DISABLE_COPY_AND_ASSIGN(SyncedMemory); }; // class SyncedMemory diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index ece3913e88a..be05e1e64d4 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -492,6 +492,21 @@ void Solver::UpdateSmoothedLoss(Dtype loss, int start_iter, } } +template +WorkerSolver::~WorkerSolver() { +#ifndef CPU_ONLY + int device; + CUDA_CHECK(cudaGetDevice(&device)); + CUDA_CHECK(cudaSetDevice(this->param_.device_id())); +#endif + this->net_.reset(); + this->test_nets_.resize(0); +#ifndef CPU_ONLY + CUDA_CHECK(cudaSetDevice(device)); +#endif +} + INSTANTIATE_CLASS(Solver); +INSTANTIATE_CLASS(WorkerSolver); } // namespace caffe diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 4d3564172ab..b66c9861615 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -3,26 +3,41 @@ #include "caffe/util/math_functions.hpp" namespace caffe { +SyncedMemory::SyncedMemory() + : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), + own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) { +#ifndef CPU_ONLY +#ifdef DEBUG + CUDA_CHECK(cudaGetDevice(&device_)); +#endif +#endif +} + +SyncedMemory::SyncedMemory(size_t size) + : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), + own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) { +#ifndef CPU_ONLY +#ifdef DEBUG + CUDA_CHECK(cudaGetDevice(&device_)); +#endif +#endif +} SyncedMemory::~SyncedMemory() { + check_device(); if (cpu_ptr_ && own_cpu_data_) { CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_); } #ifndef CPU_ONLY if (gpu_ptr_ && own_gpu_data_) { - int initial_device; - cudaGetDevice(&initial_device); - if (gpu_device_ != -1) { - CUDA_CHECK(cudaSetDevice(gpu_device_)); - } CUDA_CHECK(cudaFree(gpu_ptr_)); - cudaSetDevice(initial_device); } #endif // CPU_ONLY } inline void SyncedMemory::to_cpu() { + check_device(); switch (head_) { case UNINITIALIZED: CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_); @@ -49,10 +64,10 @@ inline void SyncedMemory::to_cpu() { } inline void SyncedMemory::to_gpu() { + check_device(); #ifndef CPU_ONLY switch (head_) { case UNINITIALIZED: - CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); caffe_gpu_memset(size_, 0, gpu_ptr_); head_ = HEAD_AT_GPU; @@ -60,7 +75,6 @@ inline void SyncedMemory::to_gpu() { break; case HEAD_AT_CPU: if (gpu_ptr_ == NULL) { - CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); own_gpu_data_ = true; } @@ -77,11 +91,13 @@ inline void SyncedMemory::to_gpu() { } const void* SyncedMemory::cpu_data() { + check_device(); to_cpu(); return (const void*)cpu_ptr_; } void SyncedMemory::set_cpu_data(void* data) { + check_device(); CHECK(data); if (own_cpu_data_) { CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_); @@ -92,6 +108,7 @@ void SyncedMemory::set_cpu_data(void* data) { } const void* SyncedMemory::gpu_data() { + check_device(); #ifndef CPU_ONLY to_gpu(); return (const void*)gpu_ptr_; @@ -102,16 +119,11 @@ const void* SyncedMemory::gpu_data() { } void SyncedMemory::set_gpu_data(void* data) { + check_device(data); #ifndef CPU_ONLY CHECK(data); if (own_gpu_data_) { - int initial_device; - cudaGetDevice(&initial_device); - if (gpu_device_ != -1) { - CUDA_CHECK(cudaSetDevice(gpu_device_)); - } CUDA_CHECK(cudaFree(gpu_ptr_)); - cudaSetDevice(initial_device); } gpu_ptr_ = data; head_ = HEAD_AT_GPU; @@ -122,12 +134,14 @@ void SyncedMemory::set_gpu_data(void* data) { } void* SyncedMemory::mutable_cpu_data() { + check_device(); to_cpu(); head_ = HEAD_AT_CPU; return cpu_ptr_; } void* SyncedMemory::mutable_gpu_data() { + check_device(); #ifndef CPU_ONLY to_gpu(); head_ = HEAD_AT_GPU; @@ -140,9 +154,9 @@ void* SyncedMemory::mutable_gpu_data() { #ifndef CPU_ONLY void SyncedMemory::async_gpu_push(const cudaStream_t& stream) { + check_device(); CHECK(head_ == HEAD_AT_CPU); if (gpu_ptr_ == NULL) { - CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); own_gpu_data_ = true; } @@ -153,5 +167,23 @@ void SyncedMemory::async_gpu_push(const cudaStream_t& stream) { } #endif +void SyncedMemory::check_device() { + if (gpu_ptr_ && own_gpu_data_) { + check_device(gpu_ptr_); + } +} +void SyncedMemory::check_device(void* data) { +#ifndef CPU_ONLY +#ifdef DEBUG + int device; + cudaGetDevice(&device); + CHECK(device == device_); + cudaPointerAttributes attributes; + CUDA_CHECK(cudaPointerGetAttributes(&attributes, data)); + CHECK(attributes.device == device_); +#endif +#endif +} + } // namespace caffe