diff --git a/src/operator/rnn_impl.h b/src/operator/rnn_impl.h
index e1b4a2b79c0a..652478f43950 100644
--- a/src/operator/rnn_impl.h
+++ b/src/operator/rnn_impl.h
@@ -34,6 +34,7 @@
 #include <vector>
 #include <string>
 #include <utility>
+#include <random>
 #include "./math.h"
 #include "./math_functions-inl.h"
 #include "./operator_common.h"
@@ -149,7 +150,6 @@ void LstmForwardTraining(DType* ws,
   const int r_size = D * T * N * H * 6;
   const int y_offset = T * N * H * 5;
   const int cell_size = N * H;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   int idx = 0;  // state & cell state's idx;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
   for (int i = 0; i < L; ++i) {
@@ -176,13 +176,17 @@ void LstmForwardTraining(DType* ws,
       if (dropout > 0.0f) {
         #pragma omp parallel for num_threads(omp_threads)
         for (int j = 0; j < T * N * H * D; j++) {
-          int rand_data = rand_r(&seed_);
+          static thread_local std::random_device device;
+          static thread_local std::default_random_engine generator(device());
+          static thread_local std::uniform_int_distribution<int> distribution;
+          static thread_local auto dice = std::bind(distribution, generator);
+          int rand_data = dice();
           if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
             dropout_random[i * T * N * H * D + j] = 0;
             y.dptr_[j] = 0;
           } else {
             dropout_random[i * T * N * H * D + j] = 1.0f - dropout;
-            y.dptr_[j] =  y.dptr_[j] / (1.0f - dropout);
+            y.dptr_[j] = y.dptr_[j] / (1.0f - dropout);
           }
         }
       }
@@ -994,7 +998,6 @@ void GruForwardTraining(DType* ws,
   DType* bx_l = bx;
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1004,7 +1007,11 @@ void GruForwardTraining(DType* ws,
       const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        static thread_local std::random_device device;
+        static thread_local std::default_random_engine generator(device());
+        static thread_local std::uniform_int_distribution<int> distribution;
+        static thread_local auto dice = std::bind(distribution, generator);
+        int rand_data = dice();
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
@@ -1881,7 +1888,6 @@ void VanillaRNNForwardTraining(DType* ws,
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1890,7 +1896,11 @@ void VanillaRNNForwardTraining(DType* ws,
     if (dropout > 0.0f && l > 0) {
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        static thread_local std::random_device device;
+        static thread_local std::default_random_engine generator(device());
+        static thread_local std::uniform_int_distribution<int> distribution;
+        static thread_local auto dice = std::bind(distribution, generator);
+        int rand_data = dice();
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc
index 6d669c19bcaa..5ae1ae9f1f1e 100644
--- a/tests/cpp/engine/threaded_engine_test.cc
+++ b/tests/cpp/engine/threaded_engine_test.cc
@@ -33,6 +33,7 @@
 #include <thread>
 #include <chrono>
 #include <vector>
+#include <random>
 
 #include "../src/engine/engine_impl.h"
 #include "../include/test_util.h"
@@ -58,17 +59,20 @@ void GenerateWorkload(int num_workloads, int num_var,
                       int min_read, int max_read,
                       int min_time, int max_time,
                       std::vector<Workload>* workloads) {
+  static thread_local std::default_random_engine generator(seed_);
+  static thread_local std::uniform_int_distribution<int> distribution;
+  static thread_local auto dice = std::bind(distribution, generator);
   workloads->clear();
   workloads->resize(num_workloads);
   for (int i = 0; i < num_workloads; ++i) {
     auto& wl = workloads->at(i);
-    wl.write = rand_r(&seed_) % num_var;
-    int r = rand_r(&seed_);
+    wl.write = dice() % num_var;
+    int r = dice();
     int num_read = min_read + (r % (max_read - min_read));
     for (int j = 0; j < num_read; ++j) {
-      wl.reads.push_back(rand_r(&seed_) % num_var);
+      wl.reads.push_back(dice() % num_var);
     }
-    wl.time = min_time + rand_r(&seed_) % (max_time - min_time);
+    wl.time = min_time + dice() % (max_time - min_time);
   }
 }
 
diff --git a/tests/cpp/include/test_ndarray_utils.h b/tests/cpp/include/test_ndarray_utils.h
index f5ab96794ada..e62b7fe29aef 100644
--- a/tests/cpp/include/test_ndarray_utils.h
+++ b/tests/cpp/include/test_ndarray_utils.h
@@ -29,6 +29,7 @@
 #include <cstdlib>
 #include <string>
 #include <map>
+#incldue <random>
 #include "test_util.h"
 #include "test_op.h"
 
@@ -54,9 +55,11 @@ inline unsigned gen_rand_seed() {
 }
 
 inline float RandFloat() {
-  static unsigned seed = gen_rand_seed();
-  double v = rand_r(&seed) * 1.0 / RAND_MAX;
-  return static_cast<float>(v);
+  static thread_local std::random_device device;
+  static thread_local std::default_random_engine generator(device());
+  static thread_local std::uniform_real_distribution<float> distribution;
+  static thread_local auto dice = std::bind(distribution, generator);
+  return dice();
 }
 
 // Get an NDArray with provided indices, prepared for a RowSparse NDArray.