diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 935406bb16a8..12bc428dbab1 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -1603,8 +1603,11 @@ nightly_scala_demo_test_cpu() {
     bash bin/run_im.sh
 }
 
-nightly_estimator() {
+nightly_python() {
     set -ex
+    cd /work/mxnet/tests/nightly/
+    export PYTHONPATH=/work/mxnet/python/
+    nosetests test_fixed_seed.py
     cd /work/mxnet/tests/nightly/estimator
     export PYTHONPATH=/work/mxnet/python/
     nosetests test_estimator_cnn.py
diff --git a/include/mxnet/resource.h b/include/mxnet/resource.h
index f8ee6364807c..80fcb189d700 100644
--- a/include/mxnet/resource.h
+++ b/include/mxnet/resource.h
@@ -173,8 +173,7 @@ struct Resource {
   void get_cudnn_dropout_desc(
       cudnnDropoutDescriptor_t* dropout_desc,
       mshadow::Stream<gpu> *stream,
-      const float dropout,
-      uint64_t seed) const;
+      const float dropout) const;
 #endif  // MXNET_USE_CUDNN == 1
 
   /*!
diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h
index 6387dff96eb7..580994d4da34 100644
--- a/src/operator/nn/dropout-inl.h
+++ b/src/operator/nn/dropout-inl.h
@@ -253,9 +253,8 @@ class DropoutOp {
                            const TBlob &mask,
                            const TBlob &out) {
       Stream<xpu> *s = ctx.get_stream<xpu>();
-
       // set dropout state.
-      ctx.requested[0].get_cudnn_dropout_desc(&dropout_desc_, s, 1.0f - this->pkeep_, seed_);
+      ctx.requested[0].get_cudnn_dropout_desc(&dropout_desc_, s, 1.0f - this->pkeep_);
 
       // describe input/output tensor
       int dim[4], stride[4];
@@ -494,7 +493,6 @@ class DropoutOp {
   Context ctx_;
   cudnnDataType_t dtype_;
   cudnnDropoutDescriptor_t dropout_desc_;
-  uint64_t seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   size_t dropout_reserve_byte_;
   cudnnTensorDescriptor_t x_desc_, y_desc_, dx_desc_, dy_desc_;
 #endif  // MXNET_USE_CUDNN_DROPOUT
diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h
index ead7501a48b0..3b493d24d740 100644
--- a/src/operator/rnn-inl.h
+++ b/src/operator/rnn-inl.h
@@ -253,6 +253,7 @@ inline size_t GetNumInputArguments(RNNParam param_) {
  *          cy_ptr: Only used in lstm mode. pointer of tensor cy  containing the cell state
  *                  for t=seq_length. cy' shape is [num_layers, batch_size, state_size]
  *          dropout: should be 0 <= dropout < 1
+ *          seed: random seed for dropout
  *          mode: Specifies the type of RNN to compute.
  */
 template <typename DType>
@@ -274,23 +275,24 @@ void RNNForwardTraining(DType* ws,
                         DType* hy_ptr,
                         DType* cy_ptr,
                         const float dropout,
+                        unsigned seed,
                         int mode) {
   switch (mode) {
     case rnn_enum::kLstm:
       LstmForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
                                  batch_size, input_size, state_size, x_ptr, hx_ptr, cx_ptr,
-                                 w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout);
+                                 w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout, seed);
       break;
     case rnn_enum::kGru:
       GruForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
                                 batch_size, input_size, state_size, x_ptr, hx_ptr,
-                                w_ptr, y_ptr, hy_ptr, dropout);
+                                w_ptr, y_ptr, hy_ptr, dropout, seed);
       break;
     case rnn_enum::kRnnTanh:
     case rnn_enum::kRnnRelu:
       VanillaRNNForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
                                        batch_size, input_size, state_size, x_ptr, hx_ptr,
-                                       w_ptr, y_ptr, hy_ptr, dropout, mode);
+                                       w_ptr, y_ptr, hy_ptr, dropout, seed, mode);
       break;
     default:
       LOG(FATAL) << "unknown RNN mode " << mode;
@@ -866,6 +868,11 @@ class RNNOp {
         }
         DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.data().dptr_);
 
+        // get a random seed from cpu
+        Stream<cpu> *s = NewStream<cpu>(0);
+        Random<cpu, unsigned> *prnd = ctx.requested[0].get_random<cpu, unsigned>(s);
+        unsigned seed = prnd->GetRandInt();
+        seed = 17 + seed % 4096;
         RNNForwardTraining<DType>(work_cpu_space,
                                   reserve_space_ptr,
                                   param_.state_outputs,
@@ -884,6 +891,7 @@ class RNNOp {
                                   hy_ptr,
                                   cy_ptr,
                                   param_.p,
+                                  seed,
                                   param_.mode);
       } else {
 #if MXNET_USE_MKLDNN == 1
@@ -1360,15 +1368,16 @@ class RNNOp {
       // Create Dropout descriptors
       if (param_.p > 0) {
          ctx.requested[rnn_enum::kCuDNNDropoutDescSpace].get_cudnn_dropout_desc
-            (&dropout_desc_, s, 1.0f - param_.p, seed_);
+            (&dropout_desc_, s, 1.0f - param_.p);
       }
       // Only update the probability by passing in a null dropout_states ptr
       DType* dropout_states = NULL;
       size_t dropout_bytes = 0;
+      // use dummy seed as state is null
       CUDNN_CALL(cudnnSetDropoutDescriptor(dropout_desc_, s->dnn_handle_,
                                            param_.p,  // discard probability
                                            dropout_states, dropout_bytes,
-                                           seed_));
+                                           0));
 
       // RNN descriptors
       cudnnDataType_t dtype_with_fallback_;
@@ -1520,7 +1529,6 @@ class RNNOp {
   cudnnRNNInputMode_t input_mode_;
   cudnnDropoutDescriptor_t dropout_desc_;
   Storage::Handle reserve_space_;
-  uint64_t seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   size_t workspace_byte_, reserve_space_byte_;
   int workspace_size_;
   std::vector<cudnnTensorDescriptor_t> x_desc_vec_, y_desc_vec_, dx_desc_vec_, dy_desc_vec_;
diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc
index b2ac2f0cb615..8bc0416a8614 100644
--- a/src/operator/rnn.cc
+++ b/src/operator/rnn.cc
@@ -180,6 +180,9 @@ static std::vector<ResourceRequest> RNNResourceEx(const NodeAttrs& attrs, const
     }
 #endif
   }
+  if (dev_mask == kCPU) {
+    request.emplace_back(ResourceRequest::kRandom);
+  }
   return request;
 }
 
diff --git a/src/operator/rnn_impl.h b/src/operator/rnn_impl.h
index 425ea4a3c6ab..91998118ddd9 100644
--- a/src/operator/rnn_impl.h
+++ b/src/operator/rnn_impl.h
@@ -146,7 +146,8 @@ void LstmForwardTraining(DType* ws,
                          DType* y_ptr,
                          DType* hy_ptr,
                          DType* cy_ptr,
-                         const float dropout) {
+                         const float dropout,
+                         unsigned seed) {
   DType* dropout_random = rs;
   DType* rs2 = dropout_random + (L - 1) * D * T * N * H;
   const int total_layers = D * L;
@@ -156,7 +157,6 @@ void LstmForwardTraining(DType* ws,
   const int r_size = D * T * N * H * 6;
   const int y_offset = T * N * H * 5;
   const int cell_size = N * H;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   int idx = 0;  // state & cell state's idx;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
   for (int i = 0; i < L; ++i) {
@@ -183,7 +183,7 @@ void LstmForwardTraining(DType* ws,
       if (dropout > 0.0f) {
         #pragma omp parallel for num_threads(omp_threads)
         for (int j = 0; j < T * N * H * D; j++) {
-          int rand_data = rand_r(&seed_);
+          int rand_data = rand_r(&seed);
           if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
             dropout_random[i * T * N * H * D + j] = 0;
             y.dptr_[j] = 0;
@@ -980,7 +980,8 @@ void GruForwardTraining(DType* ws,
                         DType* w_ptr,
                         DType* y_ptr,
                         DType* hy_ptr,
-                        const float dropout) {
+                        const float dropout,
+                        unsigned seed) {
   DType* wx = w_ptr;
   DType* wh = wx + I * H * 3;
   DType* bx = wh + H * H * 3 + (D - 1) * (H * H * 3 + I * H * 3)
@@ -1001,7 +1002,6 @@ void GruForwardTraining(DType* ws,
   DType* bx_l = bx;
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1011,7 +1011,7 @@ void GruForwardTraining(DType* ws,
       const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        int rand_data = rand_r(&seed);
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
@@ -1869,6 +1869,7 @@ void VanillaRNNForwardTraining(DType* ws,
                                DType* y_ptr,
                                DType* hy_ptr,
                                const float dropout,
+                               unsigned seed,
                                int mode) {
   DType* wx = w_ptr;
   DType* wh = wx + I * H;
@@ -1888,7 +1889,6 @@ void VanillaRNNForwardTraining(DType* ws,
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1897,7 +1897,7 @@ void VanillaRNNForwardTraining(DType* ws,
     if (dropout > 0.0f && l > 0) {
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        int rand_data = rand_r(&seed);
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
diff --git a/src/resource.cc b/src/resource.cc
index 3f461243e499..616808cccb69 100644
--- a/src/resource.cc
+++ b/src/resource.cc
@@ -422,15 +422,20 @@ void* Resource::get_host_space_internal(size_t size) const {
 void Resource::get_cudnn_dropout_desc(
     cudnnDropoutDescriptor_t* dropout_desc,
     mshadow::Stream<gpu> *stream,
-    const float dropout,
-    uint64_t seed) const {
-
+    const float dropout) const {
+  using namespace mshadow;
   CHECK_EQ(req.type, ResourceRequest::kCuDNNDropoutDesc);
   auto state_space = static_cast<resource::SpaceAllocator*>(ptr_);
   CHECK_EQ(state_space->ctx.dev_id, stream->dev_id)
     << "The device id of cudnn dropout state space doesn't match that from stream.";
   if (!state_space->handle.size) {
     // not initialized yet.
+    // get a random seed from mxnet cpu random
+    Resource request = ResourceManager::Get()->Request(Context::CPU(), ResourceRequest::kRandom);
+    Stream<cpu> *s = NewStream<cpu>(0);
+    Random<cpu, unsigned> *prnd = request.get_random<cpu, unsigned>(s);
+    unsigned data = prnd->GetRandInt();
+    uint64_t seed = 17 + static_cast<uint64_t>(data) % 4096;
     size_t dropout_state_size;
     CUDNN_CALL(cudnnDropoutGetStatesSize(stream->dnn_handle_, &dropout_state_size));
     // reserve GPU space
@@ -444,11 +449,12 @@ void Resource::get_cudnn_dropout_desc(
   } else {
     // cudnnRestoreDropoutDescriptor() introduced with cuDNN v7
     STATIC_ASSERT_CUDNN_VERSION_GE(7000);
+    // already initialized, using dummy seed 0 as it has no effect here
     CUDNN_CALL(cudnnRestoreDropoutDescriptor(*dropout_desc, stream->dnn_handle_,
                                              dropout,
                                              state_space->handle.dptr,
                                              state_space->handle.size,
-                                             seed));
+                                             0));
   }
 }
 #endif  // MXNET_USE_CUDNN == 1
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index a66159d0075b..e2c7573df2d0 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -136,11 +136,11 @@ core_logic: {
         }
       }
     },
-    'Gluon estimator: GPU': {
+    'Nightly Python: GPU': {
       node(NODE_LINUX_GPU) {
-        ws('workspace/estimator-test-gpu') {
+        ws('workspace/nightly-python-test-gpu') {
           utils.unpack_and_init('gpu', mx_lib)
-          utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator', true)
+          utils.docker_run('ubuntu_nightly_gpu', 'nightly_python', true)
         }
       }
     }
diff --git a/tests/nightly/test_fixed_seed.py b/tests/nightly/test_fixed_seed.py
new file mode 100644
index 000000000000..859258c5d389
--- /dev/null
+++ b/tests/nightly/test_fixed_seed.py
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+import os
+curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'python', 'unittest'))
+from common import with_seed
+
+import mxnet as mx
+import numpy as np
+from mxnet.test_utils import assert_almost_equal
+
+@with_seed()
+def test_dropout_with_seed():
+    info = np.iinfo(np.int32)
+    seed = np.random.randint(info.min, info.max)
+    _test_dropout(seed, mx.cpu())
+    _test_dropout(seed, mx.gpu())
+
+def _test_dropout(seed, ctx):
+    data = mx.nd.ones((100, 100), ctx=ctx)
+    dropout = mx.gluon.nn.Dropout(0.5)
+
+    mx.random.seed(seed)
+    with mx.autograd.record():
+        result1 = dropout(data)
+
+    mx.random.seed(seed)
+    with mx.autograd.record():
+        result2 = dropout(data)
+    # dropout on gpu should return same result with fixed seed
+    assert_almost_equal(result1.asnumpy(), result2.asnumpy())
+
+
+@with_seed()
+def test_rnn_with_seed():
+    info = np.iinfo(np.int32)
+    seed = np.random.randint(info.min, info.max)
+    _test_rnn(seed, mx.cpu())
+    _test_rnn(seed, mx.gpu())
+
+def _test_rnn(seed, ctx):
+    data = mx.nd.ones((5, 3, 10), ctx=ctx)
+    rnn = mx.gluon.rnn.RNN(100, 3, dropout=0.5)
+    rnn.initialize(ctx=ctx)
+    mx.random.seed(seed)
+    with mx.autograd.record():
+        result1 = rnn(data)
+
+    mx.random.seed(seed)
+    with mx.autograd.record():
+        result2 = rnn(data)
+    # dropout on gpu should return same result with fixed seed
+    assert_almost_equal(result1.asnumpy(), result2.asnumpy())
\ No newline at end of file