diff --git a/benchmark/python/ffi/benchmark_ffi.py b/benchmark/python/ffi/benchmark_ffi.py index 67701020b205..6da4cd39d2a9 100644 --- a/benchmark/python/ffi/benchmark_ffi.py +++ b/benchmark/python/ffi/benchmark_ffi.py @@ -65,6 +65,7 @@ def prepare_workloads(): OpArgMngr.add_workload("linalg.eigh", pool['3x3']) OpArgMngr.add_workload("linalg.det", pool['3x3']) OpArgMngr.add_workload("linalg.slogdet", pool['3x3']) + OpArgMngr.add_workload("linalg.matrix_rank", pool['3x3'], pool['1'], hermitian=False) OpArgMngr.add_workload("linalg.svd", pool['3x3']) OpArgMngr.add_workload("linalg.cholesky", pool['1x1']) OpArgMngr.add_workload("linalg.eigvals", pool['1x1']) @@ -124,10 +125,10 @@ def prepare_workloads(): out=dnp.array([False, False], dtype=bool), keepdims=False) OpArgMngr.add_workload("roll", pool["2x2"], 1, axis=0) OpArgMngr.add_workload("rot90", pool["2x2"], 2) - OpArgMngr.add_workload("array_split", pool['2X2'], 2, axis=1) - OpArgMngr.add_workload("vsplit", pool['2X2'], 2) - OpArgMngr.add_workload("hsplit", pool['2X2'], 2) - OpArgMngr.add_workload("dsplit", pool['2X2x2'], 2) + OpArgMngr.add_workload("array_split", pool['2x2'], 2, axis=1) + OpArgMngr.add_workload("vsplit", pool['2x2'], 2) + OpArgMngr.add_workload("hsplit", pool['2x2'], 2) + OpArgMngr.add_workload("dsplit", pool['2x2x2'], 2) OpArgMngr.add_workload("arange", 10) OpArgMngr.add_workload("concatenate", (pool['1x2'], pool['1x2'], pool['1x2']), axis=0) OpArgMngr.add_workload("append", pool['2x2'], pool['1x2'], axis=0) diff --git a/python/mxnet/ndarray/numpy/linalg.py b/python/mxnet/ndarray/numpy/linalg.py index 7d7d6fc064c7..86bf11a00b02 100644 --- a/python/mxnet/ndarray/numpy/linalg.py +++ b/python/mxnet/ndarray/numpy/linalg.py @@ -23,7 +23,52 @@ from . import _api_internal __all__ = ['norm', 'svd', 'cholesky', 'qr', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve', - 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq'] + 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq', 'matrix_rank'] + + +def matrix_rank(M, tol=None, hermitian=False): + """ + Return matrix rank of array using SVD method + + Rank of the array is the number of singular values of the array that are + greater than `tol`. + + Parameters + M : {(M,), (..., M, N)} ndarray + Input vector or stack of matrices. + tol : (...) ndarray, float, optional + Threshold below which SVD values are considered zero. If `tol` is + None, and ``S`` is an array with singular values for `M`, and + ``eps`` is the epsilon value for datatype of ``S``, then `tol` is + set to ``S.max() * max(M.shape) * eps``. + hermitian : bool, optional + If True, `M` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + Returns + ------- + rank : (...) ndarray + Rank of M. + + Examples + -------- + >>> from mxnet import np + >>> np.matrix_rank(np.eye(4)) # Full rank matrix + 4 + >>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix + >>> np.matrix_rank(I) + 3 + >>> np.matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0 + 1 + >>> np.matrix_rank(np.zeros((4,))) + 0 + """ + finfo_eps_32 = _np.finfo(_np.float32).eps + finfo_eps_64 = _np.finfo(_np.float64).eps + if hermitian is True: + raise NotImplementedError("hermitian is not supported yet...") + return _api_internal.matrix_rank(M, tol, hermitian, finfo_eps_32, finfo_eps_64) def lstsq(a, b, rcond='warn'): diff --git a/python/mxnet/numpy/fallback_linalg.py b/python/mxnet/numpy/fallback_linalg.py index 5e06ff94a4ce..79d6b83062ec 100644 --- a/python/mxnet/numpy/fallback_linalg.py +++ b/python/mxnet/numpy/fallback_linalg.py @@ -24,11 +24,9 @@ __all__ = [ 'cond', 'matrix_power', - 'matrix_rank', 'multi_dot' ] cond = onp.linalg.cond matrix_power = onp.linalg.matrix_power -matrix_rank = onp.linalg.matrix_rank multi_dot = onp.linalg.multi_dot diff --git a/python/mxnet/numpy/linalg.py b/python/mxnet/numpy/linalg.py index 445adfdeaeae..d2756d531b7c 100644 --- a/python/mxnet/numpy/linalg.py +++ b/python/mxnet/numpy/linalg.py @@ -22,10 +22,51 @@ from . import fallback_linalg __all__ = ['norm', 'svd', 'cholesky', 'qr', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve', - 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq'] + 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq', 'matrix_rank'] __all__ += fallback_linalg.__all__ +def matrix_rank(M, tol=None, hermitian=False): + """ + Return matrix rank of array using SVD method + + Rank of the array is the number of singular values of the array that are + greater than `tol`. + + Parameters + M : {(M,), (..., M, N)} ndarray + Input vector or stack of matrices. + tol : (...) ndarray, float, optional + Threshold below which SVD values are considered zero. If `tol` is + None, and ``S`` is an array with singular values for `M`, and + ``eps`` is the epsilon value for datatype of ``S``, then `tol` is + set to ``S.max() * max(M.shape) * eps``. + hermitian : bool, optional + If True, `M` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + Returns + ------- + rank : (...) ndarray + Rank of M. + + Examples + -------- + >>> from mxnet import np + >>> np.matrix_rank(np.eye(4)) # Full rank matrix + 4 + >>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix + >>> np.matrix_rank(I) + 3 + >>> np.matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0 + 1 + >>> np.matrix_rank(np.zeros((4,))) + 0 + """ + return _mx_nd_np.linalg.matrix_rank(M, tol, hermitian) + + def lstsq(a, b, rcond='warn'): r""" Return the least-squares solution to a linear matrix equation. diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 9d973e492f04..e693a00ea1a5 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -166,6 +166,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'linalg.eigvalsh', 'linalg.eigh', 'linalg.qr', + 'linalg.matrix_rank', 'shape', 'trace', 'tril', diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py index 81740dd0ac2e..da7095520674 100644 --- a/python/mxnet/symbol/numpy/linalg.py +++ b/python/mxnet/symbol/numpy/linalg.py @@ -23,7 +23,40 @@ from . import _internal as _npi __all__ = ['norm', 'svd', 'cholesky', 'qr', 'inv', 'det', 'slogdet', 'solve', 'tensorinv', 'tensorsolve', - 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq'] + 'pinv', 'eigvals', 'eig', 'eigvalsh', 'eigh', 'lstsq', 'matrix_rank'] + + +def matrix_rank(M, tol=None, hermitian=False): + """ + Return matrix rank of array using SVD method + + Rank of the array is the number of singular values of the array that are + greater than `tol`. + + Parameters + M : {(M,), (..., M, N)} _Symbol + Input vector or stack of matrices. + tol : (...) _Symbol, float, optional + Threshold below which SVD values are considered zero. If `tol` is + None, and ``S`` is an array with singular values for `M`, and + ``eps`` is the epsilon value for datatype of ``S``, then `tol` is + set to ``S.max() * max(M.shape) * eps``. + hermitian : bool, optional + If True, `M` is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Defaults to False. + + Returns + ------- + rank : (...) _Symbol + Rank of M. + """ + finfo_eps_32 = _np.finfo(_np.float32).eps + finfo_eps_64 = _np.finfo(_np.float64).eps + if tol is None: + return _npi.matrix_rank_none_tol(M, finfo_eps_32, finfo_eps_64, hermitian) + else: + return _npi.matrix_rank(M, tol, hermitian) def lstsq(a, b, rcond='warn'): diff --git a/src/api/operator/numpy/linalg/np_matrix_rank.cc b/src/api/operator/numpy/linalg/np_matrix_rank.cc new file mode 100644 index 000000000000..4bfe66664ef8 --- /dev/null +++ b/src/api/operator/numpy/linalg/np_matrix_rank.cc @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_pinv.cc + * \brief Implementation of the API of functions in src/operator/numpy/linalg/np_matrix_rank.cc + */ +#include +#include +#include "../../utils.h" +#include "../../../../operator/numpy/linalg/np_matrix_rank-inl.h" + +namespace mxnet { + +inline static void _npi_matrix_rank_none_tol(runtime::MXNetArgs args, + runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_matrix_rank_none_tol"); + op::MatrixRankNoneTolParam param; + nnvm::NodeAttrs attrs; + param.hermitian = args[2].operator bool(); + param.finfoEps32 = args[3].operator double(); + param.finfoEps64 = args[4].operator double(); + attrs.parsed = param; + attrs.op = op; + SetAttrDict(&attrs); + int num_inputs = 1; + int num_outputs = 0; + NDArray* inputs[] = {args[0].operator mxnet::NDArray*()}; + auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr); + *ret = reinterpret_cast(ndoutputs[0]); +} + +inline static void _npi_matrix_rank(runtime::MXNetArgs args, + runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_matrix_rank"); + op::MatrixRankParam param; + nnvm::NodeAttrs attrs; + param.hermitian = args[2].operator bool(); + attrs.parsed = param; + attrs.op = op; + SetAttrDict(&attrs); + int num_inputs = 2; + int num_outputs = 0; + NDArray* inputs[] = {args[0].operator mxnet::NDArray*(), args[1].operator mxnet::NDArray*()}; + auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr); + *ret = reinterpret_cast(ndoutputs[0]); +} + +MXNET_REGISTER_API("_npi.matrix_rank") +.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { + if (args[1].type_code() == kNull) { + _npi_matrix_rank_none_tol(args, ret); + } else { + _npi_matrix_rank(args, ret); + } +}); + +} // namespace mxnet diff --git a/src/operator/numpy/linalg/np_matrix_rank-inl.h b/src/operator/numpy/linalg/np_matrix_rank-inl.h new file mode 100644 index 000000000000..8ccecb57db11 --- /dev/null +++ b/src/operator/numpy/linalg/np_matrix_rank-inl.h @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file np_matrix_rank-inl.h + * \brief Placeholder for matrix_rank + */ +#ifndef MXNET_OPERATOR_NUMPY_LINALG_NP_MATRIX_RANK_INL_H_ +#define MXNET_OPERATOR_NUMPY_LINALG_NP_MATRIX_RANK_INL_H_ + +#include +#include +#include +#include +#include +#include "../../operator_common.h" +#include "../../mshadow_op.h" +#include "./np_pinv-inl.h" + +namespace mxnet { +namespace op { + +using namespace mshadow; + +struct MatrixRankNoneTolParam : public dmlc::Parameter { + float finfoEps32; + double finfoEps64; + bool hermitian; + DMLC_DECLARE_PARAMETER(MatrixRankNoneTolParam) { + DMLC_DECLARE_FIELD(finfoEps32) + .set_default(0) + .describe("Machine limits for float32 type"); + DMLC_DECLARE_FIELD(finfoEps64) + .set_default(0) + .describe("Machine limits for float64 type"); + DMLC_DECLARE_FIELD(hermitian) + .set_default(false) + .describe("If True, M is assumed to be Hermitian (symmetric if real-valued)."); + } + void SetAttrDict(std::unordered_map* dict) { + std::ostringstream finfoEps32_s, finfoEps64_s, hermitian_s; + finfoEps32_s << finfoEps32; + finfoEps64_s << finfoEps64; + hermitian_s << hermitian; + (*dict)["finfoEps32"] = finfoEps32_s.str(); + (*dict)["finfoEps64"] = finfoEps64_s.str(); + (*dict)["hermitian"] = hermitian_s.str(); + } +}; + +struct MatrixRankParam : public dmlc::Parameter { + bool hermitian; + DMLC_DECLARE_PARAMETER(MatrixRankParam) { + DMLC_DECLARE_FIELD(hermitian) + .set_default(false) + .describe("If True, M is assumed to be Hermitian (symmetric if real-valued)."); + } + void SetAttrDict(std::unordered_map* dict) { + std::ostringstream hermitian_s; + hermitian_s << hermitian; + (*dict)["hermitian"] = hermitian_s.str(); + } +}; + +template +struct VectorRankKernel { + template + MSHADOW_XINLINE static void Map(int i, const DType *in_data, + int64_t *out_data, const int& data_size) { + bool all_nozero = true; + for (int j = 0; j < data_size; ++j) { + if (!((in_data[j] > 0 ? in_data[j] : -in_data[j]) > 0)) { + all_nozero = false; + break; + } + } + KERNEL_ASSIGN(*out_data, req, static_cast(all_nozero ? 1 : 0)); + } +}; + +template +struct MatrixRankNoneTolKernel { + template + MSHADOW_XINLINE static void Map(int i, const DType *in_data, int64_t *out_data, + const int& nrow, const int& ncol, const double& finfoEps, + const int& data_size, const int& batch_size) { + if (i < batch_size) { + DType max_singular_value = 0; + for (int j = 0; j < data_size; ++j) { + DType sv = in_data[j + i * data_size]; + max_singular_value = sv > max_singular_value ? sv : max_singular_value; + } + double tol = (nrow > ncol ? nrow : ncol) * static_cast(max_singular_value) * finfoEps; + int64_t rank_num = 0; + for (int j = 0; j < data_size; ++j) { + rank_num += in_data[j + i * data_size] > tol ? 1 : 0; + } + KERNEL_ASSIGN(out_data[i], req, rank_num); + } + } +}; + +template +struct MatrixRankKernel { + template + MSHADOW_XINLINE static void Map(int i, const DType *in_data, int64_t *out_data, + const int& data_size, const int& batch_size) { + if (i < batch_size) { + int64_t rank_num = 0; + for (int j = 0; j < data_size; ++j) { + rank_num += in_data[j + i * data_size] > 0 ? 1 : 0; + } + KERNEL_ASSIGN(out_data[i], req, rank_num); + } + } +}; + +struct SVDWrapper { + template + static void op(const TBlob& a, const TBlob& s, + const TBlob& u, const mxnet::TShape& ut_shape, + const TBlob& v, const mxnet::TShape& vt_shape, + const TBlob& work, const OpContext& ctx) { + Stream *s_xpu = ctx.get_stream(); + const mxnet::TShape& a_shape = a.shape_; + const mxnet::TShape& ut_axis = GetTransAxis(u.shape_); + const int a_ndim = a.ndim(); + const int nrow = a_shape[a_ndim - 2]; + const int ncol = a_shape[a_ndim - 1]; + if (nrow > ncol) { + const_cast(u) = u.reshape(ut_shape); + const_cast(v) = v.reshape(vt_shape); + mxnet::op::TransposeImpl(ctx.run_ctx, a, u, ut_axis); + BatchSVDImpl(ncol, nrow, + v.FlatToKD(s_xpu), + s.FlatToKD(s_xpu), + u.FlatToKD(s_xpu), + work.FlatToKD(s_xpu), s_xpu); + } else { + if (a.dptr() != v.dptr()) { + Copy(v.FlatToKD(s_xpu), a.FlatToKD(s_xpu), s_xpu); + } + BatchSVDImpl(nrow, ncol, + u.FlatToKD(s_xpu), + s.FlatToKD(s_xpu), + v.FlatToKD(s_xpu), + work.FlatToKD(s_xpu), s_xpu); + } + } +}; + +inline void GetOrCheckBroadcastShape(const nnvm::NodeAttrs& attrs, + const mxnet::TShape& a_shape, + const mxnet::TShape& tol_shape, + mxnet::TShape *broadcast_shape = nullptr, + mxnet::TShape *new_tol_shape = nullptr) { + CHECK_GE(a_shape.ndim(), 2); + const int a_ndim = a_shape.ndim(); + const int tol_ndim = tol_shape.ndim(); + const int nrow = a_shape[a_ndim - 2]; + const int ncol = a_shape[a_ndim - 1]; + // Get new tol shape. + mxnet::TShape temp_new_tol_shape(tol_ndim + 1, 1); + for (int i = 0; i < tol_ndim; ++i) { temp_new_tol_shape[i] = tol_shape[i]; } + // Get singular value shape. + mxnet::TShape temp_s_shape(a_ndim - 1, 0); + for (int i = 0; i < a_ndim - 2; ++i) { + temp_s_shape[i] = a_shape[i]; + } + temp_s_shape[a_ndim - 2] = std::min(nrow, ncol); + // Check binary broadcast shape. + mxnet::ShapeVector in_shape_vec({ temp_s_shape, temp_new_tol_shape }); + mxnet::ShapeVector out_shape_vec(1, mxnet::TShape()); + mxnet::op::BinaryBroadcastShape(attrs, &in_shape_vec, &out_shape_vec); + // Assign shape. + if (broadcast_shape) { + *broadcast_shape = out_shape_vec[0]; + } + if (new_tol_shape) { + *new_tol_shape = temp_new_tol_shape; + } +} + +template +struct WSQ { + static size_t SVDWorkspaceSizeQuery(const TBlob& a, + const mxnet::TShape& u_shape, + const mxnet::TShape& s_shape, + const mxnet::TShape& v_shape, + const OpContext& ctx) { + size_t workspace_size = 0; + Stream *s = ctx.get_stream(); + const int a_ndim = a.shape_.ndim(); + const int u_ndim = u_shape.ndim(); + const int s_ndim = s_shape.ndim(); + const int v_ndim = v_shape.ndim(); + mxnet::TShape u_shape2 = Shape2(u_shape[u_ndim - 2], u_shape[u_ndim - 1]); + mxnet::TShape s_shape1 = Shape1(s_shape[s_ndim - 1]); + mxnet::TShape v_shape2 = Shape2(v_shape[v_ndim - 2], v_shape[v_ndim - 1]); + if (xpu::kDevCPU) { + std::vector u_vec(u_shape2.Size(), 0); + std::vector s_vec(s_shape1.Size(), 0); + std::vector v_vec(v_shape2.Size(), 0); + // Get workspace size in linalg_gesdd. + workspace_size += linalg_gesdd_workspace_query( + a.shape_[a_ndim - 2], a.shape_[a_ndim - 1], + TBlob(u_vec.data(), u_shape2, a.dev_mask(), a.dev_id()).get(s), + TBlob(s_vec.data(), s_shape1, a.dev_mask(), a.dev_id()).get(s), + TBlob(v_vec.data(), v_shape2, a.dev_mask(), a.dev_id()).get(s), s); + } else { + Storage::Handle u_handle = + Storage::Get()->Alloc(sizeof(DType) * u_shape2.Size(), Context::GPU()); + Storage::Handle s_handle = + Storage::Get()->Alloc(sizeof(DType) * s_shape1.Size(), Context::GPU()); + Storage::Handle v_handle = + Storage::Get()->Alloc(sizeof(DType) * v_shape2.Size(), Context::GPU()); + TBlob u_data(static_cast(u_handle.dptr), u_shape2, a.dev_mask(), a.dev_id()); + TBlob s_data(static_cast(s_handle.dptr), s_shape1, a.dev_mask(), a.dev_id()); + TBlob v_data(static_cast(v_handle.dptr), v_shape2, a.dev_mask(), a.dev_id()); + // Get workspace size in linalg_gesvd. + if (a.shape_[a_ndim - 2] >= a.shape_[a_ndim - 1]) { + workspace_size += linalg_gesvd_workspace_query(v_data.get(s), + s_data.get(s), + u_data.get(s), s); + } else { + workspace_size += linalg_gesvd_workspace_query(u_data.get(s), + s_data.get(s), + v_data.get(s), s); + } + Storage::Get()->Free(u_handle); + Storage::Get()->Free(s_handle); + Storage::Get()->Free(v_handle); + } + return workspace_size; + } + + static size_t MatrixRankNoneTolForwardWSQ(size_t *svd_workspace_size, + const TBlob& a, + const OpContext& ctx) { + size_t workspace_size = 0; + mxnet::TShape u_shape, s_shape, v_shape; + GetPinvShape(a.shape_, &u_shape, &s_shape, &v_shape); + *svd_workspace_size = SVDWorkspaceSizeQuery(a, u_shape, s_shape, v_shape, ctx); + workspace_size += *svd_workspace_size; // For #gesdd_ or #gesvd work space. + workspace_size += u_shape.Size(); // For UT. + workspace_size += s_shape.Size(); // For S. + workspace_size += v_shape.Size(); // For V. + return workspace_size * sizeof(DType); + } + + static size_t MatrixRankForwardWSQ(size_t *svd_workspace_size, + const TBlob& a, + const TBlob& tol, + const nnvm::NodeAttrs& attrs, + const OpContext& ctx) { + const mxnet::TShape a_shape = a.shape_; + const mxnet::TShape tol_shape = tol.shape_; + size_t workspace_size = 0; + mxnet::TShape u_shape, s_shape, v_shape; + GetPinvShape(a.shape_, &u_shape, &s_shape, &v_shape); + mxnet::TShape broadcast_shape, new_tol_shape; + GetOrCheckBroadcastShape(attrs, a_shape, tol_shape, &broadcast_shape, &new_tol_shape); + *svd_workspace_size = SVDWorkspaceSizeQuery(a, u_shape, s_shape, v_shape, ctx); + workspace_size += *svd_workspace_size; // For #gesdd_ or #gesvd work space. + workspace_size += u_shape.Size(); // For UT. + workspace_size += s_shape.Size(); // For S. + workspace_size += v_shape.Size(); // For V. + workspace_size += new_tol_shape.Size(); // For tol with newaxis. + workspace_size += broadcast_shape.Size(); // For binary broadcast shape. + return workspace_size * sizeof(DType); + } +}; + +template +void MatrixRankNoneTolForwardImpl(const TBlob& a, + const TBlob& rank, + const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& req) { + Stream *s = ctx.get_stream(); + const mxnet::TShape& a_shape = a.shape_; + const int a_ndim = a.ndim(); + MSHADOW_SGL_DBL_TYPE_SWITCH(a.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (a_ndim < 2) { + mxnet_op::Kernel, xpu>::Launch( + s, 1, a.dptr(), rank.dptr(), a.Size()); + return; + } + // a_ndim >= 2 + const int nrow = a_shape[a_ndim - 2]; + const int ncol = a_shape[a_ndim - 1]; + const MatrixRankNoneTolParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(param.hermitian, false) + << "matrix_rank not support param.hermitian = true at present."; + double finfoEps = a.type_flag_ == mshadow::kFloat32 ? param.finfoEps32 : param.finfoEps64; + // Step1: Calculate workspace size. + size_t svd_workspace_size = 0; + size_t workspace_size = + WSQ::MatrixRankNoneTolForwardWSQ(&svd_workspace_size, a, ctx); + Tensor workspace = + ctx.requested[0].get_space_typed(Shape1(workspace_size), s); + // Step2: Allocate memory. + mxnet::TShape s_shape, u_shape, v_shape, ut_shape, vt_shape; + GetPinvShape(a_shape, &u_shape, &s_shape, &v_shape, &ut_shape, &vt_shape); + DType *s_ptr = reinterpret_cast(workspace.dptr_); + DType *u_ptr = s_ptr + s_shape.Size(); + DType *v_ptr = u_ptr + u_shape.Size(); + DType *work_ptr = v_ptr + v_shape.Size(); + TBlob s_data(s_ptr, s_shape, a.dev_mask(), a.dev_id()); + TBlob u_data(u_ptr, u_shape, a.dev_mask(), a.dev_id()); + TBlob v_data(v_ptr, v_shape, a.dev_mask(), a.dev_id()); + TBlob work_data(work_ptr, Shape1(svd_workspace_size), a.dev_mask(), a.dev_id()); + // Step3: SVD. + SVDWrapper::op(a, s_data, u_data, ut_shape, v_data, vt_shape, work_data, ctx); + // Step4: Calculate rank. + const int data_size = s_data.size(s_data.ndim() - 1); + const int batch_size = a_ndim == 2 ? 1 : s_shape.ProdShape(0, s_shape.ndim() - 1); + mxnet_op::Kernel, xpu>::Launch(s, batch_size, + s_data.dptr(), + rank.dptr(), + nrow, ncol, finfoEps, + data_size, batch_size); + }); + }); +} + +template +void MatrixRankNoneTolForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + if (kNullOp == req[0]) { return; } + CHECK(req[0] == kWriteTo || req[0] == kWriteInplace); + + const TBlob& a = inputs[0]; + const TBlob& rank = outputs[0]; + MatrixRankNoneTolForwardImpl(a, rank, attrs, ctx, req); +} + +template +void MatrixRankForwardImpl(const TBlob& a, + const TBlob& tol, + const TBlob& rank, + const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& req) { + Stream *s = ctx.get_stream(); + const mxnet::TShape& a_shape = a.shape_; + const mxnet::TShape& tol_shape = tol.shape_; + const int a_ndim = a.ndim(); + MSHADOW_SGL_DBL_TYPE_SWITCH(a.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (a_ndim < 2) { + mxnet_op::Kernel, xpu>::Launch( + s, 1, a.dptr(), rank.dptr(), a.Size()); + return; + } + // a_ndim >= 2 + const MatrixRankParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(param.hermitian, false) + << "matrix_rank not support param.hermitian = true at present."; + mxnet::TShape s_shape, u_shape, v_shape, ut_shape, vt_shape; + GetPinvShape(a_shape, &u_shape, &s_shape, &v_shape, &ut_shape, &vt_shape); + mxnet::TShape broadcast_shape, new_tol_shape; + GetOrCheckBroadcastShape(attrs, a_shape, tol_shape, &broadcast_shape, &new_tol_shape); + // Step1: Calculate workspace size. + size_t svd_workspace_size = 0; + size_t workspace_size = + WSQ::MatrixRankForwardWSQ(&svd_workspace_size, a, tol, attrs, ctx); + Tensor workspace = + ctx.requested[0].get_space_typed(Shape1(workspace_size), s); + // Step2: Allocate memory. + DType *s_ptr = reinterpret_cast(workspace.dptr_); + DType *u_ptr = s_ptr + s_shape.Size(); + DType *v_ptr = u_ptr + u_shape.Size(); + DType *work_ptr = v_ptr + v_shape.Size(); + DType *new_tol_ptr = work_ptr + svd_workspace_size; + DType *broadcast_ptr = new_tol_ptr + new_tol_shape.Size(); + TBlob s_data(s_ptr, s_shape, a.dev_mask(), a.dev_id()); + TBlob u_data(u_ptr, u_shape, a.dev_mask(), a.dev_id()); + TBlob v_data(v_ptr, v_shape, a.dev_mask(), a.dev_id()); + TBlob work_data(work_ptr, Shape1(svd_workspace_size), a.dev_mask(), a.dev_id()); + TBlob new_tol_data(new_tol_ptr, new_tol_shape, a.dev_mask(), a.dev_id()); + TBlob broadcast_data(broadcast_ptr, broadcast_shape, a.dev_mask(), a.dev_id()); + // Step3: SVD. + SVDWrapper::op(a, s_data, u_data, ut_shape, v_data, vt_shape, work_data, ctx); + // Step4: Calculate broadcast data. + if (new_tol_data.dptr() != tol.dptr()) { + Copy(new_tol_data.FlatTo1D(s), tol.FlatTo1D(s), s); + } + mxnet::op::BinaryBroadcastCompute(attrs, ctx, + {s_data, new_tol_data}, + {kWriteTo}, {broadcast_data}); + // Step5: Calculate rank. + const int b_ndim = broadcast_shape.ndim(); + const int data_size = broadcast_data.size(b_ndim - 1); + const int batch_size = b_ndim == 1 ? 1 : broadcast_shape.ProdShape(0, b_ndim - 1); + mxnet_op::Kernel, xpu>::Launch(s, batch_size, + broadcast_data.dptr(), + rank.dptr(), + data_size, batch_size); + }); + }); +} + +template +void MatrixRankForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + if (kNullOp == req[0]) { return; } + CHECK(req[0] == kWriteTo || req[0] == kWriteInplace); + + const TBlob& a = inputs[0]; + const TBlob& tol = inputs[1]; + const TBlob& rank = outputs[0]; + MatrixRankForwardImpl(a, tol, rank, attrs, ctx, req); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_LINALG_NP_MATRIX_RANK_INL_H_ diff --git a/src/operator/numpy/linalg/np_matrix_rank.cc b/src/operator/numpy/linalg/np_matrix_rank.cc new file mode 100644 index 000000000000..d3794a1de0e9 --- /dev/null +++ b/src/operator/numpy/linalg/np_matrix_rank.cc @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file np_matrix_rank.cc + * \brief CPU implementation of the matrix_rank Operator + */ +#include "./np_matrix_rank-inl.h" + +namespace mxnet { +namespace op { + +inline bool MatrixRankNoneTolShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const mxnet::TShape& a_shape = (*in_attrs)[0]; + const int a_ndim = a_shape.ndim(); + + if (shape_is_known(a_shape)) { + CHECK_GT(a_shape.Size(), 0U) + << "Not support zero-size input array which has no identity"; + if (a_ndim < 2) { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(0, 0)); + } else { + mxnet::TShape rank_shape(a_ndim - 2, 0); + for (int i = 0; i < a_ndim - 2; ++i) { rank_shape[i] = a_shape[i]; } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, rank_shape); + } + } + return shape_is_known(*in_attrs) && shape_is_known(*out_attrs); +} + +inline bool MatrixRankNoneTolType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + int a_type = in_attrs->at(0); + + CHECK_NE(a_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg."; + CHECK(a_type == mshadow::kFloat32 || a_type == mshadow::kFloat64) + << "array type should be float32 or float64."; + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt64); + return out_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(MatrixRankNoneTolParam); + +NNVM_REGISTER_OP(_npi_matrix_rank_none_tol) +.describe(R"code()code" ADD_FILELINE) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", [](const NodeAttrs& attrs){ + return std::vector{"M"}; +}) +.set_attr("FInferShape", MatrixRankNoneTolShape) +.set_attr("FInferType", MatrixRankNoneTolType) +.set_attr("FResourceRequest", [](const NodeAttrs& attrs){ + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("FCompute", MatrixRankNoneTolForward) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("M", "NDArray-or-Symbol", "Tensor of matrix") +.add_arguments(MatrixRankNoneTolParam::__FIELDS__()); + +inline bool MatrixRankShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + const mxnet::TShape& a_shape = (*in_attrs)[0]; + const mxnet::TShape& tol_shape = (*in_attrs)[1]; + const int a_ndim = a_shape.ndim(); + const int tol_ndim = tol_shape.ndim(); + + if (shape_is_known(a_shape) && shape_is_known(tol_shape)) { + CHECK_GT(a_shape.Size(), 0U) + << "Not support zero-size input array which has no identity"; + if (a_ndim < 2) { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(0, 0)); + } else { + mxnet::TShape broadcast_shape; + GetOrCheckBroadcastShape(attrs, a_shape, tol_shape, &broadcast_shape); + if (broadcast_shape.ndim() == 1) { + if (tol_ndim == 0) { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(0, 0)); + } else { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1)); + } + } else { + mxnet::TShape rank_shape(broadcast_shape.ndim() - 1, 0); + for (int i = 0; i < broadcast_shape.ndim() - 1; ++i) { + rank_shape[i] = broadcast_shape[i]; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, rank_shape); + } + } + } + return shape_is_known(*in_attrs) && shape_is_known(*out_attrs); +} + +inline bool MatrixRankType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + int a_type = in_attrs->at(0); + int tol_type = in_attrs->at(1); + + CHECK_NE(a_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg."; + CHECK(a_type == mshadow::kFloat32 || a_type == mshadow::kFloat64) + << "array type should be float32 or float64."; + CHECK(tol_type == mshadow::kFloat32 || tol_type == mshadow::kFloat64) + << "tol type should be float32 or float64."; + CHECK_EQ(a_type, tol_type) + << "array type and tol type should be the same."; + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt64); + return out_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(MatrixRankParam); + +NNVM_REGISTER_OP(_npi_matrix_rank) +.describe(R"code()code" ADD_FILELINE) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FListInputNames", [](const NodeAttrs& attrs){ + return std::vector{"M", "tol"}; +}) +.set_attr("FInferShape", MatrixRankShape) +.set_attr("FInferType", MatrixRankType) +.set_attr("FResourceRequest", [](const NodeAttrs& attrs){ + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("FCompute", MatrixRankForward) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("M", "NDArray-or-Symbol", "Tensor of matrix") +.add_argument("tol", "NDArray-or-Symbol", "Tensor of matrix") +.add_arguments(MatrixRankParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/linalg/np_matrix_rank.cu b/src/operator/numpy/linalg/np_matrix_rank.cu new file mode 100644 index 000000000000..9528f698d35c --- /dev/null +++ b/src/operator/numpy/linalg/np_matrix_rank.cu @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file np_matrix_rank.cu + * \brief GPU implementation of the matrix_rank Operator + */ +#include +#include "./np_matrix_rank-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_matrix_rank_none_tol) +.set_attr("FCompute", MatrixRankNoneTolForward); + +NNVM_REGISTER_OP(_npi_matrix_rank) +.set_attr("FCompute", MatrixRankForward); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 1f795721b820..79a45f5e46f6 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -2113,12 +2113,29 @@ def _add_workload_linalg_matrix_power(): def _add_workload_linalg_matrix_rank(): - a = np.eye(4) - b = a; b[-1,-1] = 0 - c = np.ones((4,)) - OpArgMngr.add_workload('linalg.matrix_rank', a) - OpArgMngr.add_workload('linalg.matrix_rank', b) - OpArgMngr.add_workload('linalg.matrix_rank', c) + shapes = [ + ((4, 3), ()), + ((4, 3), (1,)), + ((4, 3), (2, 3,)), + ((2, 1, 1), (1,)), + ((2, 3, 3), (2,)), + ((2, 3, 1, 1), ()), + ((2, 3, 4, 4), (1, 3)), + ((2, 3, 4, 5), (2, 3)), + ((2, 3, 5, 4), (2, 3)), + ] + dtypes = (np.float32, np.float64) + for dtype in dtypes: + for a_shape, tol_shape in shapes: + for tol_is_none in [True, False]: + a_np = _np.asarray(_np.random.uniform(-10., 10., a_shape)) + a = np.array(a_np, dtype=dtype) + if tol_is_none: + OpArgMngr.add_workload('linalg.matrix_rank', a, None, False) + else: + tol_np = _np.random.uniform(10., 20., tol_shape) + tol = np.array(tol_np, dtype=dtype) + OpArgMngr.add_workload('linalg.matrix_rank', a, tol, False) def _add_workload_linalg_multi_dot(): diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 063c3b7a58a6..111f0282283e 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -5625,6 +5625,83 @@ def check_lstsq(a_np, b_np, rcond_np, x, residuals, rank, s): check_lstsq(a_np, b_np, rcond, x, residuals, rank, s) +@with_seed() +@use_np +def test_np_linalg_matrix_rank(): + class TestMatrixRank(HybridBlock): + def __init__(self, hermitian): + super(TestMatrixRank, self).__init__() + self._hermitian = hermitian + + def hybrid_forward(self, F, M, tol=None): + return F.np.linalg.matrix_rank(M, tol, hermitian=self._hermitian) + + def check_matrix_rank(rank, a_np, tol, hermitian): + try: + rank_expected = _np.linalg.matrix_rank(a_np, tol=tol, hermitian=hermitian) + except Exception as e: + print("a:", a_np) + print("a shape:", a_np.shape) + print(e) + else: + if a_np.ndim < 2: + assert rank.shape == _np.asarray(rank_expected).shape + else: + assert rank.shape == rank_expected.shape + assert_almost_equal(rank.asnumpy(), rank_expected, rtol=rtol, atol=atol) + + shapes = [ + ((), ()), + ((1,), (1,)), + ((3,), (1,)), + ((1, 1), ()), + ((1, 1), (1,)), + ((3, 3), (1,)), + ((3, 4), (1,)), + ((4, 3), ()), + ((4, 3), (1,)), + ((4, 3), (2,)), + ((4, 3), (2, 3,)), + ((2, 1, 1), ()), + ((2, 1, 1), (1,)), + ((2, 3, 3), (2,)), + ((2, 3, 4), (1,)), + ((2, 4, 3), (2,)), + ((2, 3, 1, 1), ()), + ((2, 3, 1, 1), (1, 1)), + ((2, 3, 1, 1), (2, 1)), + ((2, 3, 4, 4), (1, 3)), + ((2, 3, 4, 5), (2, 1)), + ((2, 3, 5, 4), (1, 3)), + ((2, 3, 1, 1), (2, 3)), + ((2, 3, 4, 4), (2, 3)), + ((2, 3, 4, 5), (2, 3)), + ((2, 3, 5, 4), (2, 3)), + ] + dtypes = ['float32', 'float64'] + for dtype in dtypes: + for a_shape, tol_shape in shapes: + for tol_is_none, hybridize in itertools.product([True, False], [True, False]): + rtol = 1e-3 + atol = 1e-5 + test_matrix_rank = TestMatrixRank(hermitian=False) + if hybridize: + test_matrix_rank.hybridize() + + a_np = _np.asarray(_np.random.uniform(-10., 10., a_shape)) + a = np.array(a_np, dtype=dtype) + if tol_is_none: + rank = test_matrix_rank(a) + # check matrix_rank validity + check_matrix_rank(rank, a.asnumpy(), tol=None, hermitian=False) + else: + tol_np = _np.random.uniform(10., 20., tol_shape) + tol = np.array(tol_np, dtype=dtype) + rank = test_matrix_rank(a, tol) + # check matrix_rank validity + check_matrix_rank(rank, a.asnumpy(), tol.asnumpy(), hermitian=False) + + @with_seed() @use_np def test_np_linalg_pinv():