Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/mxnet/ndarray/sparse_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ class RowSparseNDArray(SparseNDArray):
``dense[rsp.indices[i], :, :, :, ...] = rsp.values[i, :, :, :, ...]``

RowSparseNDArray is used principally in the definition of gradients for operations
that have sparse gradients (e.g. SparseEmbedding).
that have sparse gradients (e.g. dot with sparse inputs).

Examples
--------
Expand Down
43 changes: 0 additions & 43 deletions src/operator/tensor/indexing_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,49 +86,6 @@ NNVM_REGISTER_OP(_backward_Embedding)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FCompute>("FCompute<cpu>", EmbeddingOpBackward<cpu>);

NNVM_REGISTER_OP(SparseEmbedding)
.describe(R"doc(Represents words or other sparse inputs by dense continuous vectors.
It assumes that the input is in one-hot form. E.g., for a vocabulary size of 10,000,
each input vector is expected to have dimension 10,000.
The index of the non-zero entry is the index of the word or item it represents.

The corresponding embedding vectors are stored as rows of a matrix.
Hence, mapping an input word to its embedding is implemented as a matrix product.

The gradient of an embedding matrix has the form of gradient vectors that are only
non-zero for words seen in a minibatch.
)doc" ADD_FILELINE)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr_parser(ParamParser<EmbeddingParam>)
.set_attr<nnvm::FListInputNames>("FListInputNames",
[](const NodeAttrs& attrs) {
return std::vector<std::string>{"data", "weight"};
})
.set_attr<nnvm::FInferShape>("FInferShape", SparseEmbeddingShape)
.set_attr<nnvm::FInferType>("FInferType", EmbeddingOpType)
.set_attr<FInferStorageType>("FInferStorageType", SparseEmbeddingForwardStorageType)
.set_attr<FResourceRequest>("FResourceRequest",
[](const NodeAttrs& attrs) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseEmbeddingForwardEx<cpu>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
return MakeNonlossGradNode("_backward_SparseEmbedding", n, ograds,
{n->inputs[0]}, n->attrs.dict);
})
.add_argument("data", "NDArray-or-Symbol",
"The input array to the sparse embedding operator.")
.add_argument("weight", "NDArray-or-Symbol", "The embedding weight matrix.")
.add_arguments(EmbeddingParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_SparseEmbedding)
.set_num_inputs(2)
.set_num_outputs(2)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseEmbeddingBackwardEx<cpu>);

NNVM_REGISTER_OP(take)
.describe(R"code(Takes elements from an input array along the given axis.

Expand Down
98 changes: 0 additions & 98 deletions src/operator/tensor/indexing_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,79 +204,6 @@ void EmbeddingOpForward(const nnvm::NodeAttrs& attrs,
});
}

template<typename xpu>
void SparseEmbeddingForwardRspImpl(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const NDArray& data,
const NDArray& weight,
const OpReqType req,
NDArray *out) {
CHECK_RSP_ALL_ROWS_NON_ZERO(weight, "SparseEmbedding", "weight");
TBlob out_blob = out->data();
// forward to dns implementation when storage_shape equals shape
bool transpose_a = false;
DotCsrRspDnsImpl<xpu>(ctx.get_stream<xpu>(), data, weight, req, transpose_a, &out_blob);
}

template<typename xpu>
void SparseEmbeddingForwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(req[embedding::kOut], kWriteTo);
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);

NDArray output = outputs[embedding::kOut];
auto data_stype = inputs[embedding::kData].storage_type();
auto weight_stype = inputs[embedding::kWeight].storage_type();
auto out_stype = outputs[embedding::kOut].storage_type();
if (data_stype == kCSRStorage && weight_stype == kRowSparseStorage &&
out_stype == kDefaultStorage) {
NDArray ret = outputs[embedding::kOut];
SparseEmbeddingForwardRspImpl<xpu>(attrs, ctx, inputs[embedding::kData],
inputs[embedding::kWeight],
req[embedding::kOut], &ret);
} else {
LOG(FATAL) << "Not supported SparseEmbedding operation for data.storage_type = "
<< data_stype << ", weight.storage_type = " << weight_stype
<< ", out.storage_type = " << out_stype;
}
}

inline bool SparseEmbeddingForwardStorageType(const nnvm::NodeAttrs& attrs,
const Context& ctx,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, embedding::kData, kCSRStorage);
STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, embedding::kOut, kDefaultStorage);
// override the default storage type generated in nnvm
in_attrs->at(embedding::kWeight) = kRowSparseStorage;
return true;
}

inline bool SparseEmbeddingShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
using namespace mshadow;
const EmbeddingParam& param = nnvm::get<EmbeddingParam>(attrs.parsed);
const TShape &dshape = (*in_attrs)[embedding::kData];
CHECK_EQ(dshape.ndim(), 2)
<< "SparseEmbedding shape error: data is expected to be 2D.";
SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight,
Shape2(param.input_dim, param.output_dim));
out_attrs->clear();
std::vector<index_t> buf(2);
buf[0] = dshape[0];
buf[1] = param.output_dim;
out_attrs->emplace_back(buf.begin(), buf.end());
return true;
}

// Returns integer log2(a) rounded up
inline int ilog2(unsigned int a) {
int k = 1;
Expand Down Expand Up @@ -389,31 +316,6 @@ void EmbeddingOpBackward(const nnvm::NodeAttrs& attrs,
});
}

template<typename xpu>
void SparseEmbeddingBackwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 2U);
CHECK_EQ(req.size(), 2U);
// CHECK_EQ(req[embedding::kData], kNullOp)
// << "Embedding layer doesn't support calculate data gradient" << req[0] << " " << req[1];
// CHECK_NE(req[1], kWriteInplace) << "DotBackwardEx does not support WriteInplace";

auto data_stype = inputs[1].storage_type();
auto grad_stype = inputs[0].storage_type();
auto output_stype = outputs[1].storage_type();
if (data_stype == kCSRStorage && grad_stype == kDefaultStorage &&
output_stype == kDefaultStorage) {
TBlob ret = outputs[1].data();
DotCsrDnsDnsImpl(ctx.get_stream<xpu>(), inputs[1], inputs[0].data(), req[1], true, &ret);
} else {
LOG(FATAL) << "Not supported dot backward for sparse input(s) with sparse gradients";
}
}

namespace take_ { // to avoid name conflict
enum TakeOpInputs {kArr, kIdx};
enum TakeOpOutputs {kOut};
Expand Down
30 changes: 0 additions & 30 deletions tests/python/unittest/test_sparse_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,36 +142,6 @@ def test_dot_csr(lhs_shape, rhs_shape, rhs_stype, trans_lhs, density=1):
test_dot_csr(lhs_shape, (lhs_shape[0], rnd.randint(1, 10)), 'row_sparse', True, 0.05)


def test_sparse_embedding():
in_dim = 10
out_dim = 4
batch = 24

data = mx.sym.Variable("data", stype='csr')
embed = mx.sym.SparseEmbedding(data=data, input_dim=in_dim, output_dim=out_dim, name="embed")
exe_test = embed.simple_bind(default_context(), grad_req={'data': 'null', 'embed_weight': 'write'},
data=(batch, in_dim))

arg_map = dict(zip(embed.list_arguments(), exe_test.arg_arrays))
grad_map = dict(zip(embed.list_arguments(), exe_test.grad_arrays))
np_data = np.random.randint(low=0, high=in_dim, size=batch)
np_weight = np.random.uniform(-0.01, 0.01, arg_map["embed_weight"].shape)
np_onehot = np.zeros((batch, in_dim))
np_onehot[np.arange(batch), np_data] = 1.0
nd_onehot = mx.nd.array(np_onehot)._to_csr()
# forward
arg_map["data"][:] = nd_onehot
arg_map["embed_weight"][:] = np_weight
exe_test.forward(is_train=True)
assert_almost_equal(exe_test.outputs[0].asnumpy(), np.dot(np_onehot, np_weight))
# backward
np_grad = np.random.uniform(-1, 1, exe_test.outputs[0].shape)
grad = mx.nd.zeros(np_grad.shape)
grad[:] = np_grad
exe_test.backward([grad])
assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, np_grad), atol=1e-5)


def test_sparse_slice():
def check_csr_slice(shape, slice_input):
storage_type = 'csr'
Expand Down