diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 152c6dc2350a..c428beffafca 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -3,7 +3,7 @@ import mxnet as mx import copy import sys -sys.path.append("../../tests/python") +sys.path.append("../../tests/python/common") import get_data import time diff --git a/example/mnist/mlp_gpu.py b/example/mnist/mlp_gpu.py index a801476decfe..ef8cd3b84cdc 100644 --- a/example/mnist/mlp_gpu.py +++ b/example/mnist/mlp_gpu.py @@ -5,7 +5,7 @@ import os, gzip import pickle as pickle import sys -sys.path.append("../../tests/python") +sys.path.append("../../tests/python/common/") import get_data @@ -48,7 +48,7 @@ def CalAcc(out, label): # bind executer # TODO(bing): think of a better bind interface -executor = softmax.bind(mx.Context('gpu'), arg_narrays, grad_narrays) +executor = softmax.bind(mx.gpu(), arg_narrays, grad_narrays) # create gradient NArray out_narray = executor.outputs[0] grad_narray = mx.nd.zeros(out_narray.shape, ctx=mx.gpu()) diff --git a/mshadow b/mshadow index 208a198213ea..cf879e7cda2b 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit 208a198213ea011e42f91b128b14a7206cce62a5 +Subproject commit cf879e7cda2b248960a77d6e6973554bc7f3b812 diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 2319f074cc73..98445b629b9e 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -30,10 +30,10 @@ struct ActivationParam : public dmlc::Parameter { int act_type; DMLC_DECLARE_PARAMETER(ActivationParam) { DMLC_DECLARE_FIELD(act_type).set_default(kReLU) - .add_enum("relu", kReLU) - .add_enum("sigmoid", kSigmoid) - .add_enum("tanh", kTanh) - .describe("Activation function to be applied."); + .add_enum("relu", kReLU) + .add_enum("sigmoid", kSigmoid) + .add_enum("tanh", kTanh) + .describe("Activation function to be applied."); } }; @@ -91,11 +91,11 @@ class ActivationProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; - const TShape &dshape = in_shape->at(0); + const TShape &dshape = in_shape->at(kData); if (dshape.ndim() == 0) return false; out_shape->clear(); out_shape->push_back(dshape); @@ -114,27 +114,27 @@ class ActivationProp : public OperatorProperty { // decalre dependency and inplace optimization options std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { - #if MXNET_USE_CUDNN == 1 + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { +#if MXNET_USE_CUDNN == 1 return {out_grad[kOut], out_data[kOut], in_data[kData]}; - #else +#else return {out_grad[kOut], out_data[kOut]}; - #endif // MXNET_USE_CUDNN +#endif // MXNET_USE_CUDNN } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{out_grad[kOut], in_grad[kData]}}; } std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &in_data, + const std::vector &out_data) const override { return {{in_data[kData], out_data[kOut]}}; } diff --git a/src/operator/activation.cc b/src/operator/activation.cc index a3a246723171..04a8da24eed9 100644 --- a/src/operator/activation.cc +++ b/src/operator/activation.cc @@ -12,9 +12,12 @@ namespace op { template<> Operator *CreateOp(ActivationParam param) { switch (param.act_type) { - case kReLU: return new ActivationOp(); - case kSigmoid: return new ActivationOp(); - case kTanh: return new ActivationOp(); + case kReLU: + return new ActivationOp(); + case kSigmoid: + return new ActivationOp(); + case kTanh: + return new ActivationOp(); default: LOG(FATAL) << "unknown activation type"; return NULL; diff --git a/src/operator/activation.cu b/src/operator/activation.cu index 4325d4c53a46..2c9c29c04f45 100644 --- a/src/operator/activation.cu +++ b/src/operator/activation.cu @@ -14,18 +14,21 @@ namespace mxnet { namespace op { template<> Operator *CreateOp(ActivationParam param) { - #if MXNET_USE_CUDNN == 1 +#if MXNET_USE_CUDNN == 1 return new CuDNNActivationOp(param); - #else +#else switch(param.act_type) { - case kReLU: return new ActivationOp(); - case kSigmoid: return new ActivationOp(); - case kTanh: return new ActivationOp(); + case kReLU: + return new ActivationOp(); + case kSigmoid: + return new ActivationOp(); + case kTanh: + return new ActivationOp(); default: LOG(FATAL) << "unknown activation"; return NULL; } - #endif // MXNET_USE_CUDNN +#endif // MXNET_USE_CUDNN } } // op } // namespace mxnet diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h index 3c70810fd52e..9ab15d4abe7a 100644 --- a/src/operator/batch_norm-inl.h +++ b/src/operator/batch_norm-inl.h @@ -29,9 +29,9 @@ struct BatchNormParam : public dmlc::Parameter { float momentum; DMLC_DECLARE_PARAMETER(BatchNormParam) { DMLC_DECLARE_FIELD(eps).set_default(1e-10f) - .describe("Epsilon to prevent div 0"); + .describe("Epsilon to prevent div 0"); DMLC_DECLARE_FIELD(momentum).set_default(0.1f) - .describe("Momentum for moving average"); + .describe("Momentum for moving average"); } }; @@ -90,18 +90,19 @@ class BatchNormOp : public Operator { Tensor var = out_data[kVar].get(s); Assign(mean, req[kMean], scale * sumall_except_dim<1>(data)); Assign(var, req[kVar], scale * sumall_except_dim<1>( - F(data - broadcast<1>(mean, data.shape_)))); + F(data - broadcast<1>(mean, data.shape_)))); Assign(out_no_affine, req[kOutNoAffine], (data - broadcast<1>(mean, data.shape_)) / - F(broadcast<1>(var + param_.eps, data.shape_))); + F(broadcast<1>(var + param_.eps, data.shape_))); Assign(out, req[kOut], out_no_affine * broadcast<1>(slope, out.shape_) + - broadcast<1>(bias, out.shape_)); + broadcast<1>(bias, out.shape_)); moving_mean = moving_mean * param_.momentum + mean * (1 - param_.momentum); moving_var = moving_var * param_.momentum + var * (1 - param_.momentum); } else { Assign(out, req[kOut], broadcast<1>(slope / - F(moving_var + param_.eps), data.shape_) * data + - broadcast<1>(bias - (slope * moving_mean) / - F(moving_var + param_.eps), data.shape_)); + F(moving_var + param_.eps), + data.shape_) * data + + broadcast<1>(bias - (slope * moving_mean) / + F(moving_var + param_.eps), data.shape_)); } } @@ -153,9 +154,10 @@ class BatchNormOp : public Operator { Tensor tmp = workspace[2]; // cal gvar = sumall_except_dim<1>((grad * broadcast<1>(slope, data.shape_)) * - (data - broadcast<1>(mean, data.shape_)) * - -0.5f * - F(broadcast<1>(var + param_.eps, data.shape_), -1.5f)); + (data - broadcast<1>(mean, data.shape_)) * + -0.5f * + F(broadcast<1>(var + param_.eps, data.shape_), + -1.5f)); gmean = sumall_except_dim<1>(grad * broadcast<1>(slope, data.shape_)); gmean *= -1.0f / F(var + param_.eps); tmp = scale * sumall_except_dim<1>(-2.0f * (data - broadcast<1>(mean, data.shape_))); @@ -165,9 +167,10 @@ class BatchNormOp : public Operator { Assign(gslope, req[kGamma], sumall_except_dim<1>(grad * out_no_affine)); Assign(gbias, req[kBeta], sumall_except_dim<1>(grad)); Assign(grad_in, req[kData], (grad * broadcast<1>(slope, data.shape_)) * - broadcast<1>(1.0f / F(var + param_.eps), data.shape_) + - broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean, data.shape_)) + - broadcast<1>(gmean, data.shape_) * scale); + broadcast<1>(1.0f / F(var + param_.eps), data.shape_) + + broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean, + data.shape_)) + + broadcast<1>(gmean, data.shape_) * scale); } private: @@ -186,8 +189,8 @@ class BatchNormProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), 3) << "Input:[data, gamma, beta]"; const TShape &dshape = in_shape->at(0); @@ -216,19 +219,20 @@ class BatchNormProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {out_grad[kOut], out_data[kOut], out_data[kOutNoAffine], out_data[kMean], out_data[kVar], - in_data[kData], in_data[kGamma], in_data[kBeta]}; + in_data[kData], in_data[kGamma], in_data[kBeta] + }; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{out_grad[kOut], in_grad[kData]}}; } @@ -269,3 +273,4 @@ class BatchNormProp : public OperatorProperty { } // namespace op } // namespace mxnet #endif // MXNET_OPERATOR_BATCH_NORM_INL_H_ + diff --git a/src/operator/concat-inl.h b/src/operator/concat-inl.h index 3eaf47845292..dc18da329bf4 100644 --- a/src/operator/concat-inl.h +++ b/src/operator/concat-inl.h @@ -26,7 +26,7 @@ struct ConcatParam : public dmlc::Parameter { int num_args; DMLC_DECLARE_PARAMETER(ConcatParam) { DMLC_DECLARE_FIELD(num_args).set_range(1, 6) - .describe("Number of inputs to be concated."); + .describe("Number of inputs to be concated."); } }; // struct ConcatParam @@ -178,8 +178,8 @@ class ConcatProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), static_cast(param_.num_args)); TShape dshape = in_shape->at(kData0); @@ -193,10 +193,10 @@ class ConcatProp : public OperatorProperty { dshape[1] += tmp[1]; } else { CHECK_EQ(dshape[j], tmp[j]) - << "Incorrect shape[" << i << "]: " - << tmp << ". " - << "(first input shape: " - << dshape << ")"; + << "Incorrect shape[" << i << "]: " + << tmp << ". " + << "(first input shape: " + << dshape << ")"; } } } @@ -216,9 +216,9 @@ class ConcatProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return out_grad; } diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h index b69c412c80fa..4ce017270a68 100644 --- a/src/operator/convolution-inl.h +++ b/src/operator/convolution-inl.h @@ -23,6 +23,7 @@ namespace op { enum ConvolutionOpInputs {kData, kWeight, kBias}; enum ConvolutionOpOutputs {kOut}; +enum ConvolutionOpResource {kTempSpace}; struct ConvolutionParam : public dmlc::Parameter { TShape kernel; @@ -36,18 +37,18 @@ struct ConvolutionParam : public dmlc::Parameter { int shape[] = {1, 1}; DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (y, x)"); DMLC_DECLARE_FIELD(stride).set_default(TShape(shape, shape + 2)) - .describe("convolution stride: (y, x)"); + .describe("convolution stride: (y, x)"); shape[0] = shape[1] = 0; DMLC_DECLARE_FIELD(pad).set_default(TShape(shape, shape + 2)) - .describe("pad for convolution: (y, x)"); + .describe("pad for convolution: (y, x)"); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) - .describe("convolution filter(channel) number"); + .describe("convolution filter(channel) number"); DMLC_DECLARE_FIELD(num_group).set_default(1) - .describe("number of groups partition"); - DMLC_DECLARE_FIELD(workspace).set_default(128).set_range(1, 10000) - .describe("Tmp workspace for convolution (MB)"); + .describe("number of groups partition"); + DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(128, 4096) + .describe("Tmp workspace for convolution (MB)"); DMLC_DECLARE_FIELD(no_bias).set_default(false) - .describe("Whether to disable bias parameter."); + .describe("Whether to disable bias parameter."); } }; @@ -74,48 +75,52 @@ class ConvolutionOp : public Operator { Tensor data = in_data[kData].get(s); uint32_t ws[] = {param_.num_group, param_.num_filter / param_.num_group, - data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]}; + data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1] + }; TShape wmat_shape(ws, ws + 3); Tensor wmat = in_data[kWeight].get_with_shape(wmat_shape, s); Tensor out = out_data[kOut].get(s); - #if defined(__CUDACC__) +#if defined(__CUDACC__) CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; - #endif - this->InitTemp(ctx, data.shape_, out.shape_); + << "Must init CuBLAS handle in stream"; +#endif const index_t nbatch = data.size(0); + Tensor workspace = ctx.requested[kTempSpace].get_space( + Shape1(this->InitTemp(data.shape_, out.shape_)), s); for (index_t i = 0; i < nbatch; i += nstep_) { const index_t step = std::min(nstep_, nbatch - i); - temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * step)); - temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * step)); + Tensor temp_col = Tensor(workspace.dptr_, + Shape2(shape_colunit_[0], + shape_colunit_[1] * step), s); + Tensor temp_dst = Tensor(workspace.dptr_ + temp_col.shape_.Size(), + Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * step), s); if (param_.pad[0] == 0 && param_.pad[1] == 0) { - temp_col_ = unpack_patch2col(data.Slice(i, i + step), - param_.kernel[0], - param_.kernel[1], - param_.stride[0]); + temp_col = unpack_patch2col(data.Slice(i, i + step), + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); // TODO(bing): make mshadow support dual stride } else { - temp_col_ = unpack_patch2col(pad(data.Slice(i, i + step), - param_.pad[0], param_.pad[1]), - param_.kernel[0], - param_.kernel[1], - param_.stride[0]); + temp_col = unpack_patch2col(pad(data.Slice(i, i + step), + param_.pad[0], param_.pad[1]), + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); // TODO(bing): make mshadow support dual stride } - const index_t gstride = temp_col_.size(0) / param_.num_group; + const index_t gstride = temp_col.size(0) / param_.num_group; for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - mshadow::Tensor tmpc = temp_col_.Slice(gstride * gid, - gstride * (gid + 1)); - temp_dst_[gid] = dot(wmat[gid], tmpc); + mshadow::Tensor tmpc = temp_col.Slice(gstride * gid, + gstride * (gid + 1)); + temp_dst[gid] = dot(wmat[gid], tmpc); } - out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst_, - mshadow::Shape4(param_.num_filter, - step, - out.size(2), - out.size(3)))); + out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst, + mshadow::Shape4(param_.num_filter, + step, + out.size(2), + out.size(3)))); } if (!param_.no_bias) { // add bias, broadcast bias to dim 1: channel @@ -144,68 +149,73 @@ class ConvolutionOp : public Operator { Tensor data = in_data[kData].get(s); uint32_t ws[] = {param_.num_group, param_.num_filter / param_.num_group, - data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]}; + data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1] + }; TShape wmat_shape(ws, ws + 3); Tensor wmat = in_data[kWeight].get_with_shape(wmat_shape, s); Tensor grad = out_grad[kOut].get(s); Tensor gdata = in_grad[kData].get(s); Tensor gwmat = in_grad[kWeight].get_with_shape(wmat_shape, s); - #if defined(__CUDACC__) +#if defined(__CUDACC__) CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; - #endif - this->InitTemp(ctx, data.shape_, grad.shape_); + << "Must init CuBLAS handle in stream"; +#endif const index_t nbatch = data.size(0); + Tensor workspace = ctx.requested[kTempSpace].get_space( + Shape1(this->InitTemp(data.shape_, grad.shape_)), s); for (index_t i = 0; i < nbatch; i += nstep_) { const index_t step = std::min(nstep_, nbatch - i); - temp_col_.Resize(Shape2(shape_colunit_[0], - shape_colunit_[1] * step)); - temp_dst_.Resize(Shape3(shape_dstunit_[0], - shape_dstunit_[1], shape_dstunit_[2] * step)); - temp_dst_ = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), temp_dst_.shape_); + Tensor temp_col = Tensor(workspace.dptr_, + Shape2(shape_colunit_[0], + shape_colunit_[1] * step), s); + Tensor temp_dst = Tensor(workspace.dptr_ + temp_col.shape_.Size(), + Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * step), s); + temp_dst = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), temp_dst.shape_); if (param_.pad[0] == 0 && param_.pad[1] == 0) { // TODO(bing): dual stride - temp_col_ = unpack_patch2col(data.Slice(i, i + step), + temp_col = unpack_patch2col(data.Slice(i, i + step), param_.kernel[0], param_.kernel[1], param_.stride[0]); } else { // TODO(bing): dual stride - temp_col_ = unpack_patch2col(pad(data.Slice(i, i + step), param_.pad[0], param_.pad[1]), + temp_col = unpack_patch2col(pad(data.Slice(i, i + step), param_.pad[0], param_.pad[1]), param_.kernel[0], param_.kernel[1], param_.stride[0]); } - const index_t gstride = temp_col_.size(0) / param_.num_group; + const index_t gstride = temp_col.size(0) / param_.num_group; for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - Tensor tmpc = temp_col_.Slice(gstride * gid, gstride * (gid + 1)); + Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); if (i == 0) { Tensor tmp_gwmat = gwmat[gid]; - Assign(tmp_gwmat, req[kWeight], dot(temp_dst_[gid], tmpc.T())); + Assign(tmp_gwmat, req[kWeight], dot(temp_dst[gid], tmpc.T())); } else { - gwmat[gid] += dot(temp_dst_[gid], tmpc.T()); + gwmat[gid] += dot(temp_dst[gid], tmpc.T()); } } if (req[kData] == kWriteTo || req[kData] == kWriteInplace) { for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - Tensor tmpc = temp_col_.Slice(gstride * gid, gstride * (gid + 1)); - tmpc = dot(wmat[gid].T(), temp_dst_[gid]); + Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); + tmpc = dot(wmat[gid].T(), temp_dst[gid]); } if (param_.pad[0] == 0 && param_.pad[1] == 0) { - gdata.Slice(i, i + step) = pack_col2patch(temp_col_, - data.Slice(i, i + step).shape_, - param_.kernel[0], - param_.kernel[1], - param_.stride[0]); + gdata.Slice(i, i + step) = pack_col2patch(temp_col, + data.Slice(i, i + step).shape_, + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); } else { Shape<4> pshape = data.Slice(i, i + step).shape_; pshape[2] += 2 * param_.pad[0]; pshape[3] += 2 * param_.pad[1]; - gdata.Slice(i, i + step) = crop(pack_col2patch(temp_col_, - pshape, - param_.kernel[0], - param_.kernel[1], - param_.stride[0]), + gdata.Slice(i, i + step) = crop(pack_col2patch(temp_col, + pshape, + param_.kernel[0], + param_.kernel[1], + param_.stride[0]), gdata[i][0].shape_); } } @@ -217,10 +227,8 @@ class ConvolutionOp : public Operator { } private: - // TODO(bing): use global resource allocator - inline void InitTemp(const OpContext &ctx, - const mshadow::Shape<4> &ishape, - const mshadow::Shape<4> &oshape) { + inline index_t InitTemp(const mshadow::Shape<4> &ishape, + const mshadow::Shape<4> &oshape) { const int ksize_y = param_.kernel[0]; const int ksize_x = param_.kernel[1]; shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x, @@ -228,25 +236,23 @@ class ConvolutionOp : public Operator { shape_dstunit_ = mshadow::Shape3(param_.num_group, param_.num_filter / param_.num_group, oshape[2] * oshape[3]); - const uint64_t workspace_size = param_.workspace << 20; + const uint64_t workspace_size = param_.workspace; nstep_ = std::max(std::min(static_cast(workspace_size / shape_colunit_.Size()), - ishape[0]), 1U); + ishape[0]), 1U); int nop = (ishape[0] + nstep_ - 1) / nstep_; nstep_ = (ishape[0] + nop - 1) / nop; - mshadow::Stream *s = ctx.get_stream(); - temp_col_.set_stream(s); - temp_dst_.set_stream(s); - temp_col_.Resize(mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * nstep_)); - temp_dst_.Resize(mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * nstep_)); + mshadow::Shape<2> scol = mshadow::Shape2(shape_colunit_[0], + shape_colunit_[1] * nstep_); + mshadow::Shape<3> sdst = mshadow::Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * nstep_); + CHECK_GE(param_.workspace, scol.Size() + sdst.Size()) + << "\nMinimum workspace size: " << scol.Size() + sdst.Size() << "\n" + << "Given: " << param_.workspace; + return scol.Size() + sdst.Size(); } ConvolutionParam param_; - // TODO(bing): use global resource allocator - mshadow::TensorContainer temp_col_; - mshadow::TensorContainer temp_dst_; mshadow::Shape<2> shape_colunit_; mshadow::Shape<3> shape_dstunit_; index_t nstep_; @@ -268,11 +274,13 @@ class ConvolutionProp : public OperatorProperty { void Init(const std::vector >& kwargs) override { param_.Init(kwargs); + // convert MB to words + param_.workspace = (param_.workspace << 20) / sizeof(real_t); } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; if (!param_.no_bias) { CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; @@ -282,7 +290,7 @@ class ConvolutionProp : public OperatorProperty { const TShape &dshape = (*in_shape)[kData]; if (dshape.ndim() == 0) return false; CHECK_EQ(dshape.ndim(), 4) \ - << "Input data should be 4D in batch-num_filter-y-x"; + << "Input data should be 4D in batch-num_filter-y-x"; SHAPE_ASSIGN_CHECK(*in_shape, kWeight, Shape4(param_.num_filter, dshape[1], param_.kernel[0], param_.kernel[1])); @@ -296,17 +304,17 @@ class ConvolutionProp : public OperatorProperty { const index_t kstride = static_cast(param_.stride[0]); // TODO(bing) : support dual stride CHECK_EQ(param_.stride[0], param_.stride[1]) - << "Only support same stride now"; + << "Only support same stride now"; CHECK_EQ(dshape[1] % param_.num_group, 0) \ - << "input num_filter must divide group size"; + << "input num_filter must divide group size"; CHECK_EQ(param_.num_filter % param_.num_group, 0) \ - << "output num_filter must divide group size"; + << "output num_filter must divide group size"; CHECK_GE(param_.kernel.Size(), 0) \ - << "incorrect kernel size: " << param_.kernel; + << "incorrect kernel size: " << param_.kernel; CHECK_GE(param_.stride.Size(), 0) \ - << "incorrect stride size: " << param_.stride; + << "incorrect stride size: " << param_.stride; CHECK(ksize_x <= dshape[3] && ksize_y <= dshape[2]) - << "kernel size exceed input"; + << "kernel size exceed input"; (*out_shape)[kOut][1] = param_.num_filter; (*out_shape)[kOut][2] = (dshape[2] + 2 * param_.pad[0] - ksize_y) / kstride + 1; (*out_shape)[kOut][3] = (dshape[3] + 2 * param_.pad[1] - ksize_x) / kstride + 1; @@ -324,20 +332,30 @@ class ConvolutionProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {out_grad[kOut], in_data[kData], in_data[kWeight]}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{in_data[kData], in_grad[kData]}}; } + virtual std::vector ForwardResource( + const std::vector &in_shape) const { + return {{ResourceRequest::kTempSpace, param_.workspace}}; + } + + virtual std::vector BackwardResource( + const std::vector &in_shape) const { + return {{ResourceRequest::kTempSpace, param_.workspace}}; + } + Operator* CreateOperator(Context ctx) const; private: diff --git a/src/operator/convolution.cu b/src/operator/convolution.cu index 8127cec43fd6..8e8ae9994a8d 100644 --- a/src/operator/convolution.cu +++ b/src/operator/convolution.cu @@ -14,11 +14,11 @@ namespace mxnet { namespace op { template<> Operator* CreateOp(ConvolutionParam param) { - #if MXNET_USE_CUDNN == 1 +#if MXNET_USE_CUDNN == 1 return new CuDNNConvolutionOp(param); - #else +#else return new ConvolutionOp(param); - #endif // MXNET_USE_CUDNN +#endif // MXNET_USE_CUDNN } } // namespace op diff --git a/src/operator/cudnn_activation-inl.h b/src/operator/cudnn_activation-inl.h index 1158a1324128..99bbfe93e871 100644 --- a/src/operator/cudnn_activation-inl.h +++ b/src/operator/cudnn_activation-inl.h @@ -20,21 +20,25 @@ class CuDNNActivationOp : public Operator { init_cudnn_ = false; dtype_ = CUDNN_DATA_FLOAT; switch (param_.act_type) { - case kReLU: - mode_ = CUDNN_ACTIVATION_RELU; - break; - case kSigmoid: - mode_ = CUDNN_ACTIVATION_SIGMOID; - break; - case kTanh: - mode_ = CUDNN_ACTIVATION_TANH; - break; - default: - LOG(FATAL) << "Not implmented"; - break; + case kReLU: + mode_ = CUDNN_ACTIVATION_RELU; + break; + case kSigmoid: + mode_ = CUDNN_ACTIVATION_SIGMOID; + break; + case kTanh: + mode_ = CUDNN_ACTIVATION_TANH; + break; + default: + LOG(FATAL) << "Not implmented"; + break; } } + ~CuDNNActivationOp() { + CHECK_EQ(cudnnDestroyTensorDescriptor(shape_desc_), CUDNN_STATUS_SUCCESS); + } + virtual void Forward(const OpContext &ctx, const std::vector &in_data, const std::vector &req, @@ -45,13 +49,30 @@ class CuDNNActivationOp : public Operator { CHECK_EQ(in_data.size(), 1); CHECK_EQ(out_data.size(), 1); Stream *s = ctx.get_stream(); - Tensor data = in_data[kData].get(s); - Tensor out = out_data[kOut].get(s); + Tensor data; + Tensor out; + if (in_data[kData].ndim() == 2) { + uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1}; + TShape dshape(ds, ds + 4); + data = in_data[kData].get_with_shape(dshape, s); + out = out_data[kOut].get_with_shape(dshape, s); + } else { + data = in_data[kData].get(s); + out = out_data[kOut].get(s); + } float alpha = 1.0f; float beta = 0.0f; CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); if (!init_cudnn_) { - this->Init(s, in_data, out_data); + init_cudnn_ = true; + CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + data.shape_[0], + data.shape_[1], + data.shape_[2], + data.shape_[3]), CUDNN_STATUS_SUCCESS); } CHECK_EQ(cudnnActivationForward(s->dnn_handle_, mode_, @@ -80,10 +101,23 @@ class CuDNNActivationOp : public Operator { float alpha = 1.0f; float beta = 0.0f; Stream *s = ctx.get_stream(); - Tensor grad = out_grad[kOut].get(s); - Tensor data = in_data[kData].get(s); - Tensor output_data = out_data[kOut].get(s); - Tensor input_grad = in_grad[kData].get(s); + Tensor grad; + Tensor data; + Tensor output_data; + Tensor input_grad; + if (in_data[kData].ndim() == 2) { + uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1}; + TShape dshape(ds, ds + 4); + data = in_data[kData].get_with_shape(dshape, s); + grad = out_grad[kOut].get_with_shape(dshape, s); + output_data = out_data[kOut].get_with_shape(dshape, s); + input_grad = in_grad[kData].get_with_shape(dshape, s); + } else { + data = in_data[kData].get(s); + output_data = out_data[kOut].get(s); + grad = out_grad[kOut].get(s); + input_grad = in_grad[kData].get(s); + } CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); CHECK_EQ(cudnnActivationBackward(s->dnn_handle_, mode_, @@ -100,27 +134,6 @@ class CuDNNActivationOp : public Operator { } private: - inline void Init(mshadow::Stream *s, - const std::vector &in_data, - const std::vector &out_data) { - using namespace mshadow; - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - if (!init_cudnn_) { - init_cudnn_ = true; - Tensor data = in_data[kData].get(s); - Tensor out = out_data[kOut].get(s); - CHECK_EQ(data.shape_, out.shape_); - CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS); - CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_, - CUDNN_TENSOR_NCHW, - dtype_, - data.shape_[0], - data.shape_[1], - data.shape_[2], - data.shape_[3]), CUDNN_STATUS_SUCCESS); - } - } bool init_cudnn_; cudnnDataType_t dtype_; cudnnActivationMode_t mode_; diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h index 38397a931096..ad0324a811eb 100644 --- a/src/operator/cudnn_convolution-inl.h +++ b/src/operator/cudnn_convolution-inl.h @@ -54,6 +54,8 @@ class CuDNNConvolutionOp : public Operator { if (!init_cudnn_) { Init(s, in_data, out_data); } + Tensor workspace = ctx.requested[kTempSpace].get_space( + mshadow::Shape1(workspace_), s); CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_, &alpha, in_desc_, @@ -62,8 +64,8 @@ class CuDNNConvolutionOp : public Operator { wmat.dptr_, conv_desc_, algo_, - temp_.dptr_, - param_.workspace, + workspace.dptr_, + workspace_size_, &beta, out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); @@ -103,6 +105,8 @@ class CuDNNConvolutionOp : public Operator { Tensor gwmat = in_grad[kWeight].get(s); Tensor data = in_data[kData].get(s); Tensor gdata = in_grad[kData].get(s); + Tensor workspace = ctx.requested[kTempSpace].get_space( + mshadow::Shape1(workspace_), s); if (!param_.no_bias) { Tensor gbias = in_grad[kBias].get(s); CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_, @@ -114,31 +118,31 @@ class CuDNNConvolutionOp : public Operator { gbias.dptr_), CUDNN_STATUS_SUCCESS); } CHECK_EQ(cudnnConvolutionBackwardFilter_v3(s->dnn_handle_, - &alpha, - in_desc_, - data.dptr_, - out_desc_, - grad.dptr_, - conv_desc_, - back_algo_w_, - temp_.dptr_, - param_.workspace, - &beta, - filter_desc_, - gwmat.dptr_), CUDNN_STATUS_SUCCESS); + &alpha, + in_desc_, + data.dptr_, + out_desc_, + grad.dptr_, + conv_desc_, + back_algo_w_, + workspace.dptr_, + workspace_size_, + &beta, + filter_desc_, + gwmat.dptr_), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnConvolutionBackwardData_v3(s->dnn_handle_, - &alpha, - filter_desc_, - wmat.dptr_, - out_desc_, - grad.dptr_, - conv_desc_, - back_algo_, - temp_.dptr_, - param_.workspace, - &beta, - in_desc_, - gdata.dptr_), CUDNN_STATUS_SUCCESS); + &alpha, + filter_desc_, + wmat.dptr_, + out_desc_, + grad.dptr_, + conv_desc_, + back_algo_, + workspace.dptr_, + workspace_size_, + &beta, + in_desc_, + gdata.dptr_), CUDNN_STATUS_SUCCESS); } private: @@ -149,10 +153,9 @@ class CuDNNConvolutionOp : public Operator { size_t expected = param_.no_bias ? 2 : 3; CHECK_EQ(in_data.size(), expected); CHECK_EQ(out_data.size(), 1); - temp_.set_stream(s); if (!init_cudnn_) { init_cudnn_ = true; - size_t workspace = static_cast(param_.workspace); + size_t workspace = static_cast(param_.workspace * sizeof(real_t)); size_t back_size = 0; size_t back_size_w = 0; Tensor data = in_data[kData].get(s); @@ -169,13 +172,13 @@ class CuDNNConvolutionOp : public Operator { param_.kernel[0], param_.kernel[1]), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnSetConvolution2dDescriptor(conv_desc_, - param_.pad[0], - param_.pad[1], - param_.stride[0], - param_.stride[1], - 1, - 1, - CUDNN_CROSS_CORRELATION), CUDNN_STATUS_SUCCESS); + param_.pad[0], + param_.pad[1], + param_.stride[0], + param_.stride[1], + 1, + 1, + CUDNN_CROSS_CORRELATION), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnSetTensor4dDescriptor(in_desc_, CUDNN_TENSOR_NCHW, dtype_, @@ -202,59 +205,63 @@ class CuDNNConvolutionOp : public Operator { } CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); CHECK_EQ(cudnnGetConvolutionForwardAlgorithm(s->dnn_handle_, - in_desc_, - filter_desc_, - conv_desc_, - out_desc_, - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, - param_.workspace, - &algo_), CUDNN_STATUS_SUCCESS); + in_desc_, + filter_desc_, + conv_desc_, + out_desc_, + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, + workspace, + &algo_), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnGetConvolutionBackwardFilterAlgorithm(s->dnn_handle_, - in_desc_, - out_desc_, - conv_desc_, - filter_desc_, - CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, - param_.workspace, - &back_algo_w_), CUDNN_STATUS_SUCCESS); + in_desc_, + out_desc_, + conv_desc_, + filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, + workspace, + &back_algo_w_), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnGetConvolutionBackwardDataAlgorithm(s->dnn_handle_, - filter_desc_, - out_desc_, - conv_desc_, - in_desc_, - CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, - param_.workspace, - &back_algo_), CUDNN_STATUS_SUCCESS); + filter_desc_, + out_desc_, + conv_desc_, + in_desc_, + CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, + workspace, + &back_algo_), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_, - filter_desc_, - out_desc_, - conv_desc_, - in_desc_, - back_algo_, - &back_size), CUDNN_STATUS_SUCCESS); + filter_desc_, + out_desc_, + conv_desc_, + in_desc_, + back_algo_, + &back_size), CUDNN_STATUS_SUCCESS); CHECK_EQ(cudnnGetConvolutionBackwardFilterWorkspaceSize(s->dnn_handle_, - in_desc_, - out_desc_, - conv_desc_, - filter_desc_, - back_algo_w_, - &back_size_w), CUDNN_STATUS_SUCCESS); + in_desc_, + out_desc_, + conv_desc_, + filter_desc_, + back_algo_w_, + &back_size_w), CUDNN_STATUS_SUCCESS); back_size = std::max(back_size, back_size_w); CHECK_EQ(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_, - in_desc_, - filter_desc_, - conv_desc_, - out_desc_, - algo_, - &workspace), CUDNN_STATUS_SUCCESS); + in_desc_, + filter_desc_, + conv_desc_, + out_desc_, + algo_, + &workspace), CUDNN_STATUS_SUCCESS); workspace = std::max(workspace, back_size); - param_.workspace = workspace; - // TODO(bing): wait resource allocation - temp_.Resize(mshadow::Shape1(workspace / sizeof(real_t) + 1), 0.0f); + CHECK_GE(param_.workspace * sizeof(real_t), workspace + sizeof(real_t)) + << "\nMinimum workspace: " << workspace << "\n" + << "Given: " << param_.workspace * sizeof(real_t); + workspace_ = workspace / sizeof(real_t) + 1; + workspace_size_ = workspace_ * sizeof(real_t); } } bool init_cudnn_; + size_t workspace_; + size_t workspace_size_; cudnnDataType_t dtype_; cudnnTensorDescriptor_t in_desc_; cudnnTensorDescriptor_t out_desc_; @@ -265,8 +272,6 @@ class CuDNNConvolutionOp : public Operator { cudnnConvolutionBwdDataAlgo_t back_algo_; cudnnConvolutionBwdFilterAlgo_t back_algo_w_; ConvolutionParam param_; - // TODO(bing): remove when we have resource manager - mshadow::TensorContainer temp_; }; #endif // __CUDACC__ && CUDNN } // namespace op diff --git a/src/operator/cudnn_lrn-inl.h b/src/operator/cudnn_lrn-inl.h new file mode 100644 index 000000000000..eb520b2fbe68 --- /dev/null +++ b/src/operator/cudnn_lrn-inl.h @@ -0,0 +1,135 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file cudnn_lrn-inl.h + * \brief + * \author Bing Xu +*/ + +#ifndef MXNET_OPERATOR_CUDNN_LRN_INL_H_ +#define MXNET_OPERATOR_CUDNN_LRN_INL_H_ +#include +#include "./lrn-inl.h" + +namespace mxnet { +namespace op { +class CuDNNLocalResponseNormOp : public Operator { + public: + explicit CuDNNLocalResponseNormOp(LRNParam param) { + param_ = param; + init_cudnn_ = false; + // TODO(xxx): fp16 + dtype_ = CUDNN_DATA_FLOAT; + } + + ~CuDNNLocalResponseNormOp() { + CHECK_EQ(cudnnDestroyLRNDescriptor(lrn_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyTensorDescriptor(shape_desc_), CUDNN_STATUS_SUCCESS); + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + float alpha = 1.0f; + float beta = 0.0f; + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor out = out_data[kOut].get(s); + if (!init_cudnn_) { + this->Init(s, in_data, out_data); + } + CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + CHECK_EQ(cudnnLRNCrossChannelForward(s->dnn_handle_, + lrn_desc_, + CUDNN_LRN_CROSS_CHANNEL_DIM1, + &alpha, + shape_desc_, + data.dptr_, + &beta, + shape_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(out_grad.size(), 1); + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + CHECK_EQ(req.size(), 1); + CHECK_EQ(in_grad.size(), 1); + float alpha = 1.0f; + float beta = 0.0f; + Stream *s = ctx.get_stream(); + Tensor grad = out_grad[kOut].get(s); + Tensor data = in_data[kData].get(s); + Tensor output_data = out_data[kOut].get(s); + Tensor input_grad = in_grad[kData].get(s); + CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + CHECK_EQ(cudnnLRNCrossChannelBackward(s->dnn_handle_, + lrn_desc_, + CUDNN_LRN_CROSS_CHANNEL_DIM1, + &alpha, + shape_desc_, + output_data.dptr_, + shape_desc_, + grad.dptr_, + shape_desc_, + data.dptr_, + &beta, + shape_desc_, + input_grad.dptr_), CUDNN_STATUS_SUCCESS); + } + + private: + inline void Init(mshadow::Stream *s, + const std::vector &in_data, + const std::vector &out_data) { + using namespace mshadow; + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + if (!init_cudnn_) { + init_cudnn_ = true; + Tensor data = in_data[kData].get(s); + Tensor out = out_data[kOut].get(s); + unsigned lrn_n = param_.nsize; + double alpha = param_.alpha; + double beta = param_.beta; + double lrn_k = param_.knorm; + CHECK_EQ(data.shape_, out.shape_); + CHECK_EQ(cudnnCreateLRNDescriptor(&lrn_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetLRNDescriptor(lrn_desc_, + lrn_n, + alpha, + beta, + lrn_k), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnCreateTensorDescriptor(&shape_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetTensor4dDescriptor(shape_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + data.shape_[0], + data.shape_[1], + data.shape_[2], + data.shape_[3]), CUDNN_STATUS_SUCCESS); + } + } + bool init_cudnn_; + LRNParam param_; + cudnnDataType_t dtype_; + cudnnLRNDescriptor_t lrn_desc_; + cudnnTensorDescriptor_t shape_desc_; +}; // class CuDNNLocalResponseNormOp +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_CUDNN_LRN_INL_H_ diff --git a/src/operator/cudnn_pooling-inl.h b/src/operator/cudnn_pooling-inl.h index 83faeee70435..67958ed46f26 100644 --- a/src/operator/cudnn_pooling-inl.h +++ b/src/operator/cudnn_pooling-inl.h @@ -33,6 +33,12 @@ class CuDNNPoolingOp : public Operator { } } + ~CuDNNPoolingOp() { + CHECK_EQ(cudnnDestroyTensorDescriptor(in_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyTensorDescriptor(out_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyPoolingDescriptor(pooling_desc_), CUDNN_STATUS_SUCCESS); + } + virtual void Forward(const OpContext &ctx, const std::vector &in_data, const std::vector &req, diff --git a/src/operator/elementwise_binary_op-inl.h b/src/operator/elementwise_binary_op-inl.h index b3ae8adc3de1..f8136af7b156 100644 --- a/src/operator/elementwise_binary_op-inl.h +++ b/src/operator/elementwise_binary_op-inl.h @@ -138,10 +138,14 @@ class ElementWiseBinaryOp : public Operator { template inline Operator* CreateElementWiseBinaryOp_(ElementWiseBinaryOpType type) { switch (type) { - case kPlus: return new ElementWiseBinaryOp(); - case kMinus: return new ElementWiseBinaryOp(); - case kMul: return new ElementWiseBinaryOp(); - case kDiv: return new ElementWiseBinaryOp(); + case kPlus: + return new ElementWiseBinaryOp(); + case kMinus: + return new ElementWiseBinaryOp(); + case kMul: + return new ElementWiseBinaryOp(); + case kDiv: + return new ElementWiseBinaryOp(); } LOG(FATAL) << "uknown op type"; return NULL; @@ -192,37 +196,41 @@ class ElementWiseBinaryOpProp : public OperatorProperty { // decalre dependency and inplace optimization options std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { switch (GetOpType()) { case kPlus: - case kMinus: return {out_grad[kOut]}; + case kMinus: + return {out_grad[kOut]}; case kMul: - case kDiv: return {out_grad[kOut], in_data[kLhs], in_data[kRhs]}; + case kDiv: + return {out_grad[kOut], in_data[kLhs], in_data[kRhs]}; } LOG(FATAL) << "not reached"; return {}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { switch (GetOpType()) { case kPlus: - case kMinus: return {}; + case kMinus: + return {}; case kMul: - case kDiv: return {{out_grad[kOut], in_grad[kLhs]}}; + case kDiv: + return {{out_grad[kOut], in_grad[kLhs]}}; } LOG(FATAL) << "not reached"; return {}; } std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &in_data, + const std::vector &out_data) const override { return {{in_data[kLhs], out_data[kOut]}}; } diff --git a/src/operator/elementwise_sum-inl.h b/src/operator/elementwise_sum-inl.h index c2a890b2e976..4e73d7e77efd 100644 --- a/src/operator/elementwise_sum-inl.h +++ b/src/operator/elementwise_sum-inl.h @@ -28,7 +28,7 @@ struct ElementWiseSumParam : public dmlc::Parameter { int num_args; DMLC_DECLARE_PARAMETER(ElementWiseSumParam) { DMLC_DECLARE_FIELD(num_args).set_range(1, 100) - .describe("Number of inputs to be sumed."); + .describe("Number of inputs to be sumed."); } }; @@ -36,7 +36,7 @@ template class ElementWiseSumOp : public Operator { public: explicit ElementWiseSumOp(ElementWiseSumParam param) - : size_(param.num_args) {} + : size_(param.num_args) {} virtual void Forward(const OpContext &ctx, const std::vector &in_data, @@ -125,7 +125,8 @@ class ElementWiseSumProp : public OperatorProperty { int sidx = -1; for (int i = 0; i < param_.num_args; ++i) { if (in_shape->at(i).ndim() != 0) { - sidx = i; break; + sidx = i; + break; } } if (sidx == -1) return false; @@ -158,23 +159,23 @@ class ElementWiseSumProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return out_grad; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{out_grad[0], in_grad[0]}}; } std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &in_data, + const std::vector &out_data) const override { return {{in_data[0], out_data[0]}}; } diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 35cb035d16f8..bde719d4ed4e 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -31,9 +31,9 @@ struct FullyConnectedParam : public dmlc::Parameter { // TODO(bing) change to only set lower bound // add support for boolean DMLC_DECLARE_FIELD(num_hidden).set_range(1, 100000) - .describe("Number of hidden nodes of the output."); + .describe("Number of hidden nodes of the output."); DMLC_DECLARE_FIELD(no_bias).set_default(false) - .describe("Whether to disable bias parameter."); + .describe("Whether to disable bias parameter."); } }; @@ -63,10 +63,10 @@ class FullyConnectedOp : public Operator { // maybe need blas handle from context // TODO(bing): judge shape to remove flatten op Stream *s = ctx.get_stream(); - #if defined(__CUDACC__) +#if defined(__CUDACC__) CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; - #endif // __CUDACC__ + << "Must init CuBLAS handle in stream"; +#endif // __CUDACC__ Tensor data = in_data[kData].FlatTo2D(s); Tensor wmat = in_data[kWeight].get(s); Tensor out = out_data[kOut].FlatTo2D(s); @@ -96,10 +96,10 @@ class FullyConnectedOp : public Operator { Tensor data = in_data[kData].FlatTo2D(s); Tensor wmat = in_data[kWeight].get(s); Tensor grad = out_grad[kOut].FlatTo2D(s); - #if defined(__CUDACC__) +#if defined(__CUDACC__) CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; - #endif + << "Must init CuBLAS handle in stream"; +#endif // backprop CHECK_NE(req[kWeight], kWriteInplace) << "cannot write weight inplace"; // gradient of weight @@ -139,8 +139,8 @@ class FullyConnectedProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; if (!param_.no_bias) { CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; @@ -174,17 +174,17 @@ class FullyConnectedProp : public OperatorProperty { } // decalre dependency and inplace optimization options std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {out_grad[kOut], in_data[kData], in_data[kWeight]}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{in_data[kData], in_grad[kData]}}; } diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h new file mode 100644 index 000000000000..9a635d86e722 --- /dev/null +++ b/src/operator/leaky_relu-inl.h @@ -0,0 +1,241 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file leaky_relu-inl.h + * \brief leaky relu family operator + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_LEAKY_RELU_INL_H_ +#define MXNET_OPERATOR_LEAKY_RELU_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" +#include "./mshadow_op.h" + +namespace mxnet { +namespace op { + +enum LeakyReLUOpInputs {kData, kGamma}; +enum LeakyReLUOpOutputs {kOut}; +enum LeakyReLUOpType {kLeakyReLU, kPReLU, kRReLU}; + +struct LeakyReLUParam : public dmlc::Parameter { + // use int for enumeration + int act_type; + float slope; + float lower_bound; + float upper_bound; + DMLC_DECLARE_PARAMETER(LeakyReLUParam) { + DMLC_DECLARE_FIELD(act_type).set_default(kLeakyReLU) + .add_enum("rrelu", kRReLU) + .add_enum("leaky", kLeakyReLU) + .add_enum("prelu", kPReLU) + .describe("Activation function to be applied."); + DMLC_DECLARE_FIELD(slope).set_default(0.25f) + .describe("Init slope for the activation. (For leaky only)"); + DMLC_DECLARE_FIELD(lower_bound).set_default(0.125f) + .describe("Lower bound of random slope. (For rrelu only)"); + DMLC_DECLARE_FIELD(upper_bound).set_default(0.334f) + .describe("Upper bound of random slope. (For rrelu only)"); + } +}; + +struct prelu_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a > 0.0f ? 0.0f : a; + } +}; + +template +class LeakyReLUOp : public Operator { + public: + explicit LeakyReLUOp(LeakyReLUParam param) { + param_ = param; + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + size_t expected = param_.act_type == kPReLU ? 2 : 1; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor data; + Tensor out; + Tensor weight; + if (in_data[kData].ndim() == 2) { + uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1}; + TShape dshape(ds, ds + 4); + data = in_data[kData].get_with_shape(dshape, s); + out = out_data[kOut].get_with_shape(dshape, s); + } else { + data = in_data[kData].get(s); + out = out_data[kOut].get(s); + } + switch (param_.act_type) { + case kLeakyReLU: { + Assign(out, req[kOut], F(data, param_.slope)); + break; + } + case kPReLU: { + weight = in_data[kGamma].get(s); + Assign(out, req[kOut], F(data, broadcast<1>(weight, out.shape_))); + break; + } + case kRReLU: { + LOG(FATAL) << "Not implmented"; + break; + } + default: + LOG(FATAL) << "Not implmented"; + } + } + + virtual void Backward(const OpContext & ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + size_t expected = param_.act_type == kPReLU ? 2 : 1; + CHECK_EQ(out_grad.size(), 1); + CHECK_EQ(req.size(), expected); + CHECK_EQ(in_data.size(), expected); + Stream *s = ctx.get_stream(); + Tensor data, gdata; + Tensor grad; + Tensor weight; + Tensor grad_weight; + if (in_data[kData].ndim() == 2) { + uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1}; + TShape dshape(ds, ds + 4); + data = in_data[kData].get_with_shape(dshape, s); + grad = out_grad[kOut].get_with_shape(dshape, s); + gdata = in_grad[kData].get_with_shape(dshape, s); + } else { + data = in_data[kData].get(s); + grad = out_grad[kOut].get(s); + gdata = in_grad[kData].get(s); + } + switch (param_.act_type) { + case kLeakyReLU: { + Assign(gdata, req[kData], F(data, param_.slope) * grad); + break; + } + case kPReLU: { + weight = in_data[kGamma].get(s); + grad_weight = in_grad[kGamma].get(s); + grad_weight = sumall_except_dim<1>(F(data) * grad); + gdata = F(data, broadcast<1>(weight, data.shape_)) * grad; + break; + } + case kRReLU: { + LOG(FATAL) << "Not implmented"; + break; + } + default: + LOG(FATAL) << "Not implmented"; + } + } + + private: + LeakyReLUParam param_; +}; // class LeakyReLUOp + +template +Operator* CreateOp(LeakyReLUParam type); + +#if DMLC_USE_CXX11 +class LeakyReLUProp : public OperatorProperty { + public: + void Init(const std::vector >& kwargs) override { + param_.Init(kwargs); + } + + bool InferShape(std::vector *in_shape, + std::vector *out_shape, + std::vector *aux_shape) const override { + using namespace mshadow; + if (param_.act_type == kPReLU) { + CHECK_EQ(in_shape->size(), 2) << "Input:[data, gamma]"; + } else { + CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; + } + const TShape &dshape = in_shape->at(kData); + if (dshape.ndim() == 0) return false; + if (param_.act_type == kPReLU) { + in_shape->at(kGamma) = TShape(Shape1(dshape[1])); + } + out_shape->clear(); + out_shape->push_back(dshape); + return true; + } + + OperatorProperty* Copy() const override { + auto ptr = new LeakyReLUProp(); + ptr->param_ = param_; + return ptr; + } + + std::string TypeString() const override { + return "LeakyReLU"; + } + + // decalre dependency and inplace optimization options + std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { + if (param_.act_type == kPReLU) { + return {out_grad[kOut], in_data[kData], in_data[kGamma]}; + } else { + return {out_grad[kOut], in_data[kData]}; + } + } + + std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { + return {{out_grad[kOut], in_grad[kData]}}; + } + + std::vector > ForwardInplaceOption( + const std::vector &in_data, + const std::vector &out_data) const override { + return {}; + } + + std::vector ListArguments() const override { + if (param_.act_type == kPReLU) { + return {"data", "gamma"}; + } else { + return {"data"}; + } + } + + Operator* CreateOperator(Context ctx) const; + + private: + LeakyReLUParam param_; +}; +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_LEAKY_RELU_INL_H_ + diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc new file mode 100644 index 000000000000..af154f7a3c24 --- /dev/null +++ b/src/operator/leaky_relu.cc @@ -0,0 +1,30 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file leaky_relu.cc + * \brief + * \author Bing Xu +*/ + +#include "./leaky_relu-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(LeakyReLUParam param) { + return new LeakyReLUOp(param); +} + +Operator *LeakyReLUProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(LeakyReLUParam); + +MXNET_REGISTER_OP_PROPERTY(LeakyReLU, LeakyReLUProp) +.describe("Apply activation function to input.") +.add_argument("data", "Symbol", "Input data to activation function.") +.add_arguments(LeakyReLUParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/leaky_relu.cu b/src/operator/leaky_relu.cu new file mode 100644 index 000000000000..c9af119a96ed --- /dev/null +++ b/src/operator/leaky_relu.cu @@ -0,0 +1,19 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file leaky_relu.cc + * \brief + * \author Bing Xu +*/ + +#include "./leaky_relu-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(LeakyReLUParam param) { + return new LeakyReLUOp(param); +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/lrn-inl.h b/src/operator/lrn-inl.h new file mode 100644 index 000000000000..06476a4ce4ee --- /dev/null +++ b/src/operator/lrn-inl.h @@ -0,0 +1,184 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file lrn-inl.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_LRN_INL_H_ +#define MXNET_OPERATOR_LRN_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" +#include "./mshadow_op.h" + +namespace mxnet { +namespace op { +enum LRNInputs {kData}; +enum LRNOutputs {kOut, kTmpNorm}; + +struct LRNParam : public dmlc::Parameter { + float alpha; + float beta; + float knorm; + uint32_t nsize; + DMLC_DECLARE_PARAMETER(LRNParam) { + DMLC_DECLARE_FIELD(alpha).set_default(1e-4f) + .describe("value of the alpha variance scaling parameter in the normalization formula"); + DMLC_DECLARE_FIELD(beta).set_default(0.75f) + .describe("value of the beta power parameter in the normalization formula"); + DMLC_DECLARE_FIELD(knorm).set_default(2.0f) + .describe("value of the k parameter in normalization formula"); + DMLC_DECLARE_FIELD(nsize) + .describe("normalization window width in elements."); + } +}; // struct LRNParam + +template +class LocalResponseNormOp : public Operator { + public: + explicit LocalResponseNormOp(LRNParam param) { + param_ = param; + } + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_states) { + using namespace mshadow; + using namespace mshadow::expr; + // TODO(xxx): Test with gradient chceker + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 2); + // CHECK_EQ(req.size(), 2); + CHECK_EQ(param_.nsize % 2, 1) << "LRN only supports odd values for local_size"; + const real_t salpha = param_.alpha / param_.nsize; + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor out = out_data[kOut].get(s); + Tensor tmp_norm = out_data[kTmpNorm].get(s); + tmp_norm = chpool(F(data) , param_.nsize) * salpha + param_.knorm; + Assign(out, req[kOut], data * F(tmp_norm, -param_.beta)); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_states) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(out_grad.size(), 1); + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 2); + const real_t salpha = param_.alpha / param_.nsize; + Stream *s = ctx.get_stream(); + Tensor grad = out_grad[kOut].get(s); + Tensor tmp_norm = out_data[kTmpNorm].get(s); + Tensor data = in_data[kData].get(s); + Tensor grad_in = in_grad[kData].get(s); + grad_in = grad * F(tmp_norm, -param_.beta); + grad_in += (- 2.0f * param_.beta * salpha) * + chpool(grad * data * + F(tmp_norm, -param_.beta - 1.0f), + param_.nsize) * data; + } + + private: + LRNParam param_; +}; // class LocalResponseNormOp + +template +Operator *CreateOp(LRNParam param); + +#if DMLC_USE_CXX11 +class LocalResponseNormProp : public OperatorProperty { + public: + void Init(const std::vector >& kwargs) override { + param_.Init(kwargs); + } + + bool InferShape(std::vector *in_shape, + std::vector *out_shape, + std::vector *aux_shape) const override { + using namespace mshadow; + CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; + const TShape &dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + out_shape->clear(); + out_shape->push_back(dshape); +#if MXNET_USE_CUDNN != 1 + out_shape->push_back(dshape); +#endif + return true; + } + + OperatorProperty* Copy() const override { + auto ptr = new LocalResponseNormProp(); + ptr->param_ = param_; + return ptr; + } + + std::string TypeString() const override { + return "LRN"; + } + + std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { +#if MXNET_USE_CUDNN == 1 + return {out_grad[kOut], in_data[kData], out_data[kOut]}; +#else + return {out_grad[kOut], in_data[kData], out_data[kTmpNorm]}; +#endif + } + + std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { +#if MXNET_USE_CUDNN == 1 + return {}; +#else + return {{out_grad[kOut], in_grad[kData]}}; +#endif + } + + int NumVisibleOutputs() const override { + return 1; + } + + int NumOutputs() const override { + return MXNET_USE_CUDNN == 1 ? 1 : 2; + } + + std::vector ListArguments() const override { + return {"data"}; + } + + std::vector ListOutputs() const override { +#if MXNET_USE_CUDNN == 1 + return {"output"}; +#else + return {"output", "tmp_norm"}; +#endif + } + + Operator* CreateOperator(Context ctx) const; + + private: + LRNParam param_; +}; // LocalResponseNormProp +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_LRN_INL_H_ + diff --git a/src/operator/lrn.cc b/src/operator/lrn.cc new file mode 100644 index 000000000000..abaee01a7605 --- /dev/null +++ b/src/operator/lrn.cc @@ -0,0 +1,32 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file lrn.cc + * \brief + * \author Bing Xu +*/ + +#include "./lrn-inl.h" +#if MXNET_USE_CUDNN == 1 +#include "./cudnn_lrn-inl.h" +#endif + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(LRNParam param) { + return new LocalResponseNormOp(param); +} + +Operator* LocalResponseNormProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(LRNParam); + +MXNET_REGISTER_OP_PROPERTY(LRN, LocalResponseNormProp) +.add_argument("data", "Symbol", "Input data to the ConvolutionOp.") +.add_arguments(LRNParam::__FIELDS__()) +.describe("Apply convolution to input then add a bias."); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/lrn.cu b/src/operator/lrn.cu new file mode 100644 index 000000000000..1a68ea7b3da8 --- /dev/null +++ b/src/operator/lrn.cu @@ -0,0 +1,27 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file lrn.cu + * \brief + * \author Bing Xu +*/ + +#include "./lrn-inl.h" +#if MXNET_USE_CUDNN == 1 +#include "./cudnn_lrn-inl.h" +#endif + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(LRNParam param) { +#if MXNET_USE_CUDNN == 1 + return new CuDNNLocalResponseNormOp(param); +#else + return new LocalResponseNormOp(param); +#endif // MXNET_USE_CUDNN +} + +} // namespace op +} // namespace mxnet + + diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 1994006b2205..9238ee049c0b 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -58,13 +58,13 @@ struct relu_grad { /*! \brief Leaky ReLU Operation */ struct xelu { MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a > 0 ? a : a / b; + return a > 0.0f ? a : a * b; } }; struct xelu_grad { MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { - return a > 0 ? 1 : 1.0f / b; + return a > 0.0f ? 1.0f : b; } }; diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index eea731c8fbe6..8c341bada778 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -28,11 +28,17 @@ inline void Assign(OType &out, // NOLINT(*) OpReqType req, const Exp &exp) { switch (req) { - case kNullOp: break; + case kNullOp: + break; case kWriteTo: - case kWriteInplace: out = exp; break; - case kAddTo: out += exp; break; - default: LOG(FATAL) << "not reached"; + case kWriteInplace: + out = exp; + break; + case kAddTo: + out += exp; + break; + default: + LOG(FATAL) << "not reached"; } } @@ -44,7 +50,7 @@ struct InferShapeError { int index; // constructor InferShapeError(std::string msg, int index) - : msg(msg), index(index) {} + : msg(msg), index(index) {} }; /*! diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h index 5748325d5835..c13e1f70b6a6 100644 --- a/src/operator/pooling-inl.h +++ b/src/operator/pooling-inl.h @@ -32,24 +32,24 @@ struct PoolingParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(PoolingParam) { // TODO(bing) change to only set lower bound DMLC_DECLARE_FIELD(kernel) - .set_expect_ndim(2).enforce_nonzero() - .describe("pooling kernel size: (y, x)"); + .set_expect_ndim(2).enforce_nonzero() + .describe("pooling kernel size: (y, x)"); DMLC_DECLARE_FIELD(pool_type).set_default(kMaxPooling) - .add_enum("max", kMaxPooling) - .add_enum("avg", kAvgPooling) - .add_enum("sum", kSumPooling) - .describe("Pooling type to be applied."); + .add_enum("max", kMaxPooling) + .add_enum("avg", kAvgPooling) + .add_enum("sum", kSumPooling) + .describe("Pooling type to be applied."); int stride_shape[] = {1, 1}; DMLC_DECLARE_FIELD(stride).set_default(TShape(stride_shape, stride_shape + 2)) - .set_expect_ndim(2).enforce_nonzero() - .describe("stride: for pooling (y, x)"); + .set_expect_ndim(2).enforce_nonzero() + .describe("stride: for pooling (y, x)"); int pad_shape[] = {0, 0}; DMLC_DECLARE_FIELD(pad).set_default(TShape(pad_shape, pad_shape + 2)) - .set_expect_ndim(2) - .describe("pad for pooling: (y, x)"); + .set_expect_ndim(2) + .describe("pad for pooling: (y, x)"); } }; @@ -75,24 +75,24 @@ class PoolingOp : public Operator { mshadow::Shape<2> out_shape = Shape2(out.shape_[2], out.shape_[3]); // TODO(bing): dual stride in mshadow CHECK_EQ(param_.stride[0], param_.stride[1]) - << "Only same stride is supported now"; + << "Only same stride is supported now"; if (param_.pool_type == kMaxPooling || param_.pool_type == kSumPooling) { Assign(out, req[kOut], - pool(pad(data, param_.pad[0], param_.pad[1]), - out_shape, - param_.kernel[0], - param_.kernel[1], - param_.stride[0])); + pool(pad(data, param_.pad[0], param_.pad[1]), + out_shape, + param_.kernel[0], + param_.kernel[1], + param_.stride[0])); } else if (param_.pool_type == kAvgPooling) { Assign(out, req[kOut], (1.0f / (param_.kernel[0] * param_.kernel[1])) * \ pool(pad(data, param_.pad[0], param_.pad[1]), - out_shape, - param_.kernel[0], - param_.kernel[1], - param_.stride[0])); + out_shape, + param_.kernel[0], + param_.kernel[1], + param_.stride[0])); } } @@ -161,12 +161,12 @@ class PoolingProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { CHECK_EQ(in_shape->size(), 1); const TShape &dshape = (*in_shape)[0]; CHECK_EQ(dshape.ndim(), 4) << \ - "Pooling: Input data should be 4D in (batch, channel, y, x)"; + "Pooling: Input data should be 4D in (batch, channel, y, x)"; TShape oshape = dshape; if (dshape.ndim() == 0) return false; oshape[2] = std::min(dshape[2] + 2 * param_.pad[0] - param_.kernel[0] + param_.stride[0] - 1, @@ -190,22 +190,22 @@ class PoolingProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {out_grad[kOut], in_data[kData], out_data[kOut]}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { - #if MXNET_USE_CUDNN == 1 + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { +#if MXNET_USE_CUDNN == 1 return {}; - #else +#else return {{in_data[kData], in_grad[kData]}}; - #endif +#endif } Operator* CreateOperator(Context ctx) const; @@ -218,3 +218,4 @@ class PoolingProp : public OperatorProperty { } // namespace mxnet #endif // MXNET_OPERATOR_POOLING_INL_H_ + diff --git a/src/operator/pooling.cc b/src/operator/pooling.cc index cf7e965a802f..44f80f854468 100644 --- a/src/operator/pooling.cc +++ b/src/operator/pooling.cc @@ -11,9 +11,12 @@ namespace op { template<> Operator *CreateOp(PoolingParam param) { switch (param.pool_type) { - case kMaxPooling: return new PoolingOp(param); - case kAvgPooling: return new PoolingOp(param); - case kSumPooling: return new PoolingOp(param); + case kMaxPooling: + return new PoolingOp(param); + case kAvgPooling: + return new PoolingOp(param); + case kSumPooling: + return new PoolingOp(param); default: LOG(FATAL) << "unknown activation type"; return NULL; diff --git a/src/operator/pooling.cu b/src/operator/pooling.cu index df9547bf4a1e..9b59ffaba6ad 100644 --- a/src/operator/pooling.cu +++ b/src/operator/pooling.cu @@ -14,18 +14,21 @@ namespace mxnet { namespace op { template<> Operator *CreateOp(PoolingParam param) { - #if MXNET_USE_CUDNN == 1 - return new CuDNNPoolingOp(param); - #else +#if MXNET_USE_CUDNN == 1 + return new CuDNNPoolingOp(param); +#else switch (param.pool_type) { - case kMaxPooling: return new PoolingOp(param); - case kAvgPooling: return new PoolingOp(param); - case kSumPooling: return new PoolingOp(param); + case kMaxPooling: + return new PoolingOp(param); + case kAvgPooling: + return new PoolingOp(param); + case kSumPooling: + return new PoolingOp(param); default: LOG(FATAL) << "unknown activation type"; return NULL; } - #endif // MXNET_USE_CUDNN +#endif // MXNET_USE_CUDNN } } // namespace op diff --git a/src/operator/reshape-inl.h b/src/operator/reshape-inl.h index 8bd95c49927e..d992c21effcb 100644 --- a/src/operator/reshape-inl.h +++ b/src/operator/reshape-inl.h @@ -98,15 +98,15 @@ class ReshapeProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { CHECK_EQ(in_shape->size(), 1) << "Input: [data]"; const TShape &dshape = in_shape->at(kData); if (dshape.ndim() == 0) return false; CHECK(param_.target_shape.Size() == dshape.Size()) - << "Target shape size is different to source. " - << "Target: " << param_.target_shape.Size() - << "\nSource: " << dshape.Size(); + << "Target shape size is different to source. " + << "Target: " << param_.target_shape.Size() + << "\nSource: " << dshape.Size(); out_shape->clear(); out_shape->push_back(param_.target_shape); return true; @@ -119,23 +119,23 @@ class ReshapeProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {out_grad[kOut]}; } std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &in_data, + const std::vector &out_data) const override { return {{in_data[kData], out_data[kOut]}}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{out_grad[kOut], in_grad[kData]}}; } @@ -154,8 +154,8 @@ class FlattenProp : public ReshapeProp { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { CHECK_EQ(in_shape->size(), 1) << "Input: [data]"; const TShape &dshape = in_shape->at(kData); if (dshape.ndim() == 0) return false; diff --git a/src/operator/reshape.cu b/src/operator/reshape.cu index 34aa5e1754dd..b810862f3c73 100644 --- a/src/operator/reshape.cu +++ b/src/operator/reshape.cu @@ -11,7 +11,7 @@ namespace mxnet { namespace op { template<> - Operator *CreateOp() { +Operator *CreateOp() { return new ReshapeOp(); } diff --git a/src/operator/softmax-inl.h b/src/operator/softmax-inl.h index cf4e2671d719..ea0114217cac 100644 --- a/src/operator/softmax-inl.h +++ b/src/operator/softmax-inl.h @@ -27,7 +27,7 @@ struct SoftmaxParam : public dmlc::Parameter { float grad_scale; DMLC_DECLARE_PARAMETER(SoftmaxParam) { DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f) - .describe("Scale the gradient by a float factor"); + .describe("Scale the gradient by a float factor"); }; }; @@ -94,8 +94,8 @@ class SoftmaxProp : public OperatorProperty { } bool InferShape(std::vector *in_shape, - std::vector *out_shape, - std::vector *aux_shape) const override { + std::vector *out_shape, + std::vector *aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]"; const TShape &dshape = in_shape->at(0); @@ -117,23 +117,23 @@ class SoftmaxProp : public OperatorProperty { } std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { return {in_data[kLabel], out_data[kOut]}; } std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { return {{out_data[kOut], in_grad[kData]}}; } std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const override { + const std::vector &in_data, + const std::vector &out_data) const override { return {{in_data[kData], out_data[kOut]}}; }