diff --git a/Makefile b/Makefile index d758c443241e..74c71a9cbc07 100644 --- a/Makefile +++ b/Makefile @@ -13,10 +13,19 @@ ifndef DMLC_CORE endif +ifneq ($(USE_OPENMP_ITER), 1) + export NO_OPENMP = 1 +endif + +ifneq ($(USE_OPENMP_ITER), 1) + export NO_OPENMP = 1 +endif + # use customized config file include $(config) include mshadow/make/mshadow.mk include $(DMLC_CORE)/make/dmlc.mk +unexport NO_OPENMP # all tge possible warning tread WARNFLAGS= -Wall @@ -39,10 +48,21 @@ endif # setup opencv ifeq ($(USE_OPENCV),1) - CFLAGS+= -DCXXNET_USE_OPENCV=1 + CFLAGS+= -DMXNET_USE_OPENCV=1 LDFLAGS+= `pkg-config --libs opencv` else - CFLAGS+= -DCXXNET_USE_OPENCV=0 + CFLAGS+= -DMXNET_USE_OPENCV=0 +endif + +# setup opencv +ifeq ($(USE_OPENCV_DECODER),1) + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=1 +else + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=0 +endif + +ifeq ($(USE_OPENMP_ITER), 1) + CFLAGS += -fopenmp endif ifeq ($(USE_CUDNN), 1) @@ -62,7 +82,7 @@ endif ENGINE=naive_engine.o BIN = tests/test_simple_engine OBJ = narray_function_cpu.o -OBJCXX11 = narray.o c_api.o operator.o symbol.o storage.o static_graph.o graph_executor.o io.o iter_mnist.o $(ENGINE) +OBJCXX11 = narray.o c_api.o operator.o symbol.o storage.o static_graph.o graph_executor.o io.o iter_mnist.o iter_image_recordio.o $(ENGINE) CUOBJ = narray_function_gpu.o SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -92,6 +112,7 @@ operator.o: src/operator/operator.cc c_api.o: src/c_api.cc io.o: src/io/io.cc iter_mnist.o: src/io/iter_mnist.cc src/io/*.h +iter_image_recordio.o: src/io/iter_image_recordio.cc src/io/*.h # Rules for operators OPERATOR_HDR=$(wildcard src/operator/*-inl.h) diff --git a/dmlc-core b/dmlc-core index 75f1950d386d..7d3c78428819 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit 75f1950d386d033b0b64919017515d27e698962a +Subproject commit 7d3c78428819dc84c4da8ae1f302ba6c6a235a5d diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 20694b7064da..14d9bd1b8971 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -144,8 +144,109 @@ def RandomInit(narray): flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") loss = mx.symbol.Softmax(data=fc, name="softmax") +args_list = loss.list_arguments() data_shape = (128, 3, 28, 28) arg_shapes, out_shapes, aux_shapes = loss.infer_shape(data=data_shape) +arg_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +grad_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +inputs = dict(zip(args_list, arg_narrays)) + +name2shape = dict(zip(args_list, arg_shapes)) +pred = mx.narray.create(out_shapes[0]) + +np.random.seed(0) +# set random weight +for name, narray in inputs.items(): + if "weight" in name: + tmp = mx.narray.create(name2shape[name]) + tmp.numpy[:] = np.random.uniform(-0.07, 0.07, name2shape[name]) + tmp.copyto(narray) + if "bias" in name: + narray[:] = 0.0 + +# bind executer +# TODO(bing): think of a better bind interface +executor = loss.bind(mx.Context('gpu'), arg_narrays, grad_narrays) +# update + +out_narray = executor.heads()[0] +grad_narray = mx.narray.create(out_narray.shape) + +epoch = 9 +lr = 0.1 +wd = 0.0004 + +def Update(grad, weight): + weight[:] -= lr * grad / batch_size + +block = list(zip(grad_narrays, arg_narrays)) + +#check data +get_data.GetCifar10() +train_dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/train.rec", + mean_img="data/cifar/cifar_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=128, + nthread=1) +test_dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/test.rec", + mean_img="data/cifar/cifar_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=100, + nthread=1) + +tmp_label = mx.narray.create(name2shape["sm_label"]) + +def test_cifar(): + acc_train = 0. + acc_val = 0. + for i in range(epoch): + # train + print("Epoch %d" % i) + train_acc = 0.0 + val_acc = 0.0 + train_nbatch = 0 + val_nbatch = 0 + for data, label in train_dataiter: + data = data + tmp_label.numpy[:] = label.numpy.reshape(tmp_label.shape) + data.copyto(inputs["data"]) + tmp_label.copyto(inputs["sm_label"]) + executor.forward() + out_narray.copyto(pred) + train_acc += CalAcc(pred.numpy, label.numpy.flatten()) + train_nbatch += 1 + out_narray.copyto(grad_narray) + executor.backward([grad_narray]) + + for grad, weight in block: + Update(grad, weight) + + # evaluate + for data, label in val_dataiter: + data = data + label = label.numpy.flatten() + data.copyto(inputs["data"]) + executor.forward() + out_narray.copyto(pred) + val_acc += CalAcc(pred.numpy, label) + val_nbatch += 1 + acc_train = train_acc / train_nbatch + acc_val = val_acc / val_nbatch + print("Train Acc: ", train_acc / train_nbatch) + print("Valid Acc: ", val_acc / val_nbatch) + train_dataiter.reset() + val_dataiter.reset() + assert(acc_train > 0.98) + assert(acc_val > 0.97) + +if __name__ == "__main__": + test_cifar() diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 47a59eec54fe..7bb86f4eece3 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -109,5 +109,21 @@ struct DataIteratorReg } \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ .set_body(__create__ ## DataIteratorType ## __) +/*! + * \brief Macro to register chained Iterators + * + * \code + * // example of registering a imagerec iterator + * MXNET_REGISTER_IO_CHAINED_ITERATOR(ImageRec, ImageRecordIter, BatchIter) + * .describe("batched image record data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_CHAINED_ITER(name, ChainedDataIterType, HoldingDataIterType) \ + static ::mxnet::IIterator* __create__ ## ChainedDataIteratorType ## __() { \ + return new HoldingDataIterType(new ChainedDataIterType); \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## ChainedDataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/make/config.mk b/make/config.mk index cd04b146180c..3e93e240e493 100644 --- a/make/config.mk +++ b/make/config.mk @@ -27,8 +27,8 @@ USE_CUDA_PATH = NONE # whether use opencv during compilation # you can disable it, however, you will not able to use # imbin iterator -USE_OPENCV = 0 -USE_OPENCV_DECODER = 0 +USE_OPENCV = 1 +USE_OPENCV_DECODER = 1 # whether use CUDNN R3 library USE_CUDNN = 0 # add the path to CUDNN libary to link and compile flag diff --git a/src/common/utils.h b/src/common/utils.h index cf1fd2f1bb36..29cb9f0e2f2a 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -21,6 +21,11 @@ namespace common { * \brief Random Engine */ typedef std::mt19937 RANDOM_ENGINE; +// Get a double float, prnd is the pointer to a Random Engine +#define NextDouble(prnd) std::generate_canonical(*prnd) +// Get a random int in [0, range) +#define NextUInt32(range, prnd) static_cast \ +(floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h new file mode 100644 index 000000000000..a4b77f5a41df --- /dev/null +++ b/src/io/image_augmenter.h @@ -0,0 +1,410 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file image_augmenter_opencv.hpp + * \brief threaded version of page iterator + * \author Naiyan Wang, Tianqi Chen, Tianjun Xiao + */ +#ifndef MXNET_IO_IMAGE_AUGMENTER_H_ +#define MXNET_IO_IMAGE_AUGMENTER_H_ + +#include +#include +#include +#include +#include +#include "../common/utils.h" + +namespace mxnet { +namespace io { +/*! \brief image augmentation parameters*/ +struct ImageAugmentParam : public dmlc::Parameter { + /*! \brief whether we do random cropping */ + bool rand_crop; + /*! \brief whether we do nonrandom croping */ + int crop_y_start; + /*! \brief whether we do nonrandom croping */ + int crop_x_start; + /*! \brief [-max_rotate_angle, max_rotate_angle] */ + int max_rotate_angle; + /*! \brief max aspect ratio */ + float max_aspect_ratio; + /*! \brief random shear the image [-max_shear_ratio, max_shear_ratio] */ + float max_shear_ratio; + /*! \brief max crop size */ + int max_crop_size; + /*! \brief min crop size */ + int min_crop_size; + /*! \brief max scale ratio */ + float max_random_scale; + /*! \brief min scale_ratio */ + float min_random_scale; + /*! \brief min image size */ + float min_img_size; + /*! \brief max image size */ + float max_img_size; + /*! \brief rotate angle */ + int rotate; + /*! \brief filled color while padding */ + int fill_value; + // The following are params for tensor process + /*! \brief whether to mirror the image */ + bool mirror; + /*! \brief whether to perform rand mirror the image */ + bool rand_mirror; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief mean value for r channel */ + float mean_r; + /*! \brief mean value for g channel */ + float mean_g; + /*! \brief mean value for b channel */ + float mean_b; + /*! \brief shape of the image data*/ + TShape input_shape; + /*! \brief scale on color space */ + float scale; + /*! \brief maximum ratio of contrast variation */ + float max_random_contrast; + /*! \brief maximum value of illumination variation */ + float max_random_illumination; + /*! \brief whether to print augment info */ + bool silent; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageAugmentParam) { + DMLC_DECLARE_FIELD(rand_crop).set_default(true) + .describe("Whether we de random cropping"); + DMLC_DECLARE_FIELD(crop_y_start).set_default(-1) + .describe("Where to nonrandom crop on y"); + DMLC_DECLARE_FIELD(crop_x_start).set_default(-1) + .describe("Where to nonrandom crop on x"); + DMLC_DECLARE_FIELD(max_rotate_angle).set_default(0.0f) + .describe("Rotate can be [-max_rotate_angle, max_rotate_angle]"); + DMLC_DECLARE_FIELD(max_aspect_ratio).set_default(0.0f) + .describe("Max aspect ratio"); + DMLC_DECLARE_FIELD(max_shear_ratio).set_default(0.0f) + .describe("Shear rotate can be made between [-max_shear_ratio_, max_shear_ratio_]"); + DMLC_DECLARE_FIELD(max_crop_size).set_default(-1) + .describe("Maximum crop size"); + DMLC_DECLARE_FIELD(min_crop_size).set_default(-1) + .describe("Minimum crop size"); + DMLC_DECLARE_FIELD(max_random_scale).set_default(1.0f) + .describe("Maxmum scale ratio"); + DMLC_DECLARE_FIELD(min_random_scale).set_default(1.0f) + .describe("Minimum scale ratio"); + DMLC_DECLARE_FIELD(max_img_size).set_default(1e10f) + .describe("Maxmum image size"); + DMLC_DECLARE_FIELD(min_img_size).set_default(0.0f) + .describe("Minimum image size"); + DMLC_DECLARE_FIELD(rotate).set_default(-1.0f) + .describe("Rotate angle"); + DMLC_DECLARE_FIELD(fill_value).set_default(255) + .describe("Filled value while padding"); + DMLC_DECLARE_FIELD(mirror).set_default(false) + .describe("Whether to mirror the image"); + DMLC_DECLARE_FIELD(rand_mirror).set_default(false) + .describe("Whether to mirror the image randomly"); + DMLC_DECLARE_FIELD(mean_img).set_default("") + .describe("Mean Image to be subtracted"); + DMLC_DECLARE_FIELD(mean_r).set_default(0.0f) + .describe("Mean value on R channel"); + DMLC_DECLARE_FIELD(mean_g).set_default(0.0f) + .describe("Mean value on G channel"); + DMLC_DECLARE_FIELD(mean_b).set_default(0.0f) + .describe("Mean value on B channel"); + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(scale).set_default(1.0f) + .describe("Scale in color space"); + DMLC_DECLARE_FIELD(max_random_contrast).set_default(0.0f) + .describe("Maximum ratio of contrast variation"); + DMLC_DECLARE_FIELD(max_random_illumination).set_default(0.0f) + .describe("Maximum value of illumination variation"); + } +}; + +/*! \brief helper class to do image augmentation */ +class ImageAugmenter { + public: + // contructor + ImageAugmenter(void) + : tmpres_(false), + rotateM_(2, 3, CV_32F) { + } + virtual ~ImageAugmenter() { + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + kwargs_left = param_.InitAllowUnknown(kwargs); + for (size_t i = 0; i < kwargs_left.size(); i++) { + if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { + const char* val = kwargs_left[i].second.c_str(); + const char *end = val + strlen(val); + char buf[128]; + while (val < end) { + sscanf(val, "%[^,]", buf); + val += strlen(buf) + 1; + rotate_list_.push_back(atoi(buf)); + } + } + } + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + meanfile_ready_ = false; + } else { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual cv::Mat OpencvProcess(const cv::Mat &src, + common::RANDOM_ENGINE *prnd) { + // shear + float s = NextDouble(prnd) * param_.max_shear_ratio * 2 - param_.max_shear_ratio; + // rotate + int angle = NextUInt32(param_.max_rotate_angle * 2, prnd) - param_.max_rotate_angle; + if (param_.rotate > 0) angle = param_.rotate; + if (rotate_list_.size() > 0) { + angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; + } + float a = cos(angle / 180.0 * M_PI); + float b = sin(angle / 180.0 * M_PI); + // scale + float scale = NextDouble(prnd) * \ + (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; + // aspect ratio + float ratio = NextDouble(prnd) * \ + param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; + float hs = 2 * scale / (1 + ratio); + float ws = ratio * hs; + // new width and height + float new_width = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.cols)); + float new_height = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.rows)); + cv::Mat M(2, 3, CV_32F); + M.at(0, 0) = hs * a - s * b * ws; + M.at(1, 0) = -b * ws; + M.at(0, 1) = hs * b + s * a * ws; + M.at(1, 1) = a * ws; + float ori_center_width = M.at(0, 0) * src.cols + M.at(0, 1) * src.rows; + float ori_center_height = M.at(1, 0) * src.cols + M.at(1, 1) * src.rows; + M.at(0, 2) = (new_width - ori_center_width) / 2; + M.at(1, 2) = (new_height - ori_center_height) / 2; + cv::warpAffine(src, temp_, M, cv::Size(new_width, new_height), + cv::INTER_LINEAR, + cv::BORDER_CONSTANT, + cv::Scalar(param_.fill_value, param_.fill_value, param_.fill_value)); + cv::Mat res = temp_; + // crop + if (param_.max_crop_size != -1 || param_.min_crop_size != -1) { + CHECK(res.cols >= param_.max_crop_size && res.rows >= \ + param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) + << "input image size smaller than max_crop_size"; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size \ + - param_.min_crop_size+1, prnd)+ param_.min_crop_size; + mshadow::index_t y = res.rows - rand_crop_size; + mshadow::index_t x = res.cols - rand_crop_size; + if (param_.rand_crop != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, rand_crop_size, rand_crop_size); + cv::resize(res(roi), res, cv::Size(param_.input_shape[1], param_.input_shape[2])); + } else { + CHECK(static_cast(res.cols) >= param_.input_shape[1] \ + && static_cast(res.rows) >= param_.input_shape[2]) + << "input image size smaller than input shape"; + mshadow::index_t y = res.rows - param_.input_shape[2]; + mshadow::index_t x = res.cols - param_.input_shape[1]; + if (param_.rand_crop != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); + res = res(roi); + } + return res; + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual mshadow::Tensor OpencvProcess(mshadow::Tensor data, + common::RANDOM_ENGINE *prnd) { + if (!NeedOpencvProcess()) return data; + cv::Mat res(data.size(1), data.size(2), CV_8UC3); + for (index_t i = 0; i < data.size(1); ++i) { + for (index_t j = 0; j < data.size(2); ++j) { + res.at(i, j)[0] = data[2][i][j]; + res.at(i, j)[1] = data[1][i][j]; + res.at(i, j)[2] = data[0][i][j]; + } + } + res = this->OpencvProcess(res, prnd); + tmpres_.Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < tmpres_.size(1); ++i) { + for (index_t j = 0; j < tmpres_.size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + tmpres_[0][i][j] = bgr[2]; + tmpres_[1][i][j] = bgr[1]; + tmpres_[2][i][j] = bgr[0]; + } + } + return tmpres_; + } + + void TensorProcess(mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + // Check Newly Created mean image + if (meanfile_ready_ == false && param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi != NULL) { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } + img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); + if (param_.input_shape[1] == 1) { + img_ = (*p_data) * param_.scale; + } else { + CHECK(p_data->size(1) >= param_.input_shape[1] && p_data->size(2) >= param_.input_shape[2]) + << "Data size must be bigger than the input size to net."; + mshadow::index_t yy = p_data->size(1) - param_.input_shape[1]; + mshadow::index_t xx = p_data->size(2) - param_.input_shape[2]; + if (param_.rand_crop != 0 && (yy != 0 || xx != 0)) { + yy = NextUInt32(yy + 1, prnd); + xx = NextUInt32(xx + 1, prnd); + } else { + yy /= 2; xx /= 2; + } + if (p_data->size(1) != param_.input_shape[1] && param_.crop_y_start != -1) { + yy = param_.crop_y_start; + } + if (p_data->size(2) != param_.input_shape[2] && param_.crop_x_start != -1) { + xx = param_.crop_x_start; + } + float contrast = NextDouble(prnd) * param_.max_random_contrast \ + * 2 - param_.max_random_contrast + 1; + float illumination = NextDouble(prnd) * param_.max_random_illumination \ + * 2 - param_.max_random_illumination; + if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { + // substract mean value + (*p_data)[0] -= param_.mean_b; + (*p_data)[1] -= param_.mean_g; + (*p_data)[2] -= param_.mean_r; + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; + } + } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { + // do not substract anything + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data), img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale; + } + } else { + // substract mean image + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + if (p_data->shape_ == meanimg_.shape_) { + img_ = mirror(crop(((*p_data) - meanimg_) * contrast \ + + illumination, img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) \ + * contrast + illumination) * param_.scale; + } + } else { + if (p_data->shape_ == meanimg_.shape_) { + img_ = crop(((*p_data) - meanimg_) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; + } else { + img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * \ + contrast + illumination) * param_.scale; + } + } + } + } + (*p_data) = img_; + } + + virtual void Process(unsigned char *dptr, size_t sz, + mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + cv::Mat buf(1, sz, CV_8U, dptr); + cv::Mat res = cv::imdecode(buf, 1); + if (NeedOpencvProcess()) + res = this->OpencvProcess(res, prnd); + p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < p_data->size(1); ++i) { + for (index_t j = 0; j < p_data->size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + (*p_data)[0][i][j] = bgr[2]; + (*p_data)[1][i][j] = bgr[1]; + (*p_data)[2][i][j] = bgr[0]; + } + } + res.release(); + this->TensorProcess(p_data, prnd); + } + + private: + // whether skip opencv processing + inline bool NeedOpencvProcess(void) const { + if (param_.max_rotate_angle > 0 || param_.max_shear_ratio > 0.0f + || param_.rotate > 0 || rotate_list_.size() > 0) return true; + if (param_.min_crop_size > 0 && param_.max_crop_size > 0) return true; + return false; + } + // temp input space + mshadow::TensorContainer tmpres_; + // mean image + mshadow::TensorContainer meanimg_; + /*! \brief temp space */ + mshadow::TensorContainer img_; + // temporal space + cv::Mat temp_; + // rotation param + cv::Mat rotateM_; + // whether the mean file is ready + bool meanfile_ready_; + // parameters + ImageAugmentParam param_; + /*! \brief input shape */ + mshadow::Shape<4> shape_; + /*! \brief list of possible rotate angle */ + std::vector rotate_list_; +}; +} // namespace io +} // namespace mxnet +#endif // MXNET_IO_IMAGE_AUGMENTER_H_ diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h new file mode 100644 index 000000000000..3b4fa0302435 --- /dev/null +++ b/src/io/image_recordio.h @@ -0,0 +1,77 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file image_recordio.h + * \brief image recordio struct + */ +#ifndef MXNET_IO_IMAGE_RECORDIO_H_ +#define MXNET_IO_IMAGE_RECORDIO_H_ + +#include +#include +#include + +namespace mxnet { +namespace io { +/*! \brief image recordio struct */ +struct ImageRecordIO { + /*! \brief header in image recordio */ + struct Header { + /*! + * \brief flag of the header, + * used for future extension purposes + */ + uint32_t flag; + /*! + * \brief label field that returns label of images + * when image list was not presented, + * + * NOTE: user do not need to repack recordio just to + * change label field, just supply a list file that + * maps image id to new labels + */ + float label; + /*! + * \brief unique image index + * image_id[1] is always set to 0, + * reserved for future purposes for 128bit id + * image_id[0] is used to store image id + */ + uint64_t image_id[2]; + }; + /*! \brief header of image recordio */ + Header header; + /*! \brief pointer to data content */ + uint8_t *content; + /*! \brief size of the content */ + size_t content_size; + /*! \brief constructor */ + ImageRecordIO(void) + : content(NULL), content_size(0) { + memset(&header, 0, sizeof(header)); + } + /*! \brief get image id from record */ + inline uint64_t image_index(void) const { + return header.image_id[0]; + } + /*! + * \brief load header from a record content + * \param buf the head of record + * \param size the size of the entire record + */ + inline void Load(void *buf, size_t size) { + CHECK(size >= sizeof(header)); + std::memcpy(&header, buf, sizeof(header)); + content = reinterpret_cast(buf) + sizeof(header); + content_size = size - sizeof(header); + } + /*! + * \brief save the record header + */ + inline void SaveHeader(std::string *blob) const { + blob->resize(sizeof(header)); + std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); + } +}; +} // namespace io +} // namespace mxnet +#endif // MXNET_IO_IMAGE_RECORDIO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 1ae734631680..ed560fc2b5da 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -1,17 +1,21 @@ /*! * Copyright (c) 2015 by Contributors - * \inst_vector.h + * \file inst_vector.h * \brief holder of a sequence of DataInst in CPU * that are not necessarily of same shape */ + #ifndef MXNET_IO_INST_VECTOR_H_ #define MXNET_IO_INST_VECTOR_H_ + +#include +#include #include #include #include -#include -#include "./data.h" + namespace mxnet { +namespace io { /*! * \brief tensor vector that can store sequence of tensor * in a memory compact way, tensors do not have to be of same shape @@ -28,7 +32,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); // NOLINT(*) } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -59,35 +63,6 @@ class TensorVector { std::vector > shape_; }; -/*! - * \brief tblob vector that can store sequence of tblob - * in a memory compact way, tblobs do not have to be of same shape - */ -template -class TBlobVector { - public: - TBlobVector(void) { - this->Clear(); - } - // get i-th tblob - inline TBlob operator[](size_t i) const; - // get the last tblob - inline TBlob Back(); - // return the size of the vector - inline size_t Size(void) const; - // push a tensor of certain shape - // return the reference of the pushed tensor - inline void Push(TShape shape_); - inline void Clear(void); - private: - // offset of the data content - std::vector offset_; - // data content - std::vector content_; - // shape of data - std::vector shape_; -}; - /*! * \brief instance vector that can holds * non-uniform shape data instance in a shape efficient way @@ -98,20 +73,38 @@ class InstVector { return index_.size(); } // instance - inline DataInst operator[](size_t i) const; + inline DataInst operator[](size_t i) const { + DataInst inst; + inst.index = index_[i]; + inst.data.push_back(TBlob(data_[i])); + inst.data.push_back(TBlob(label_[i])); + return inst; + } // get back of instance vector - inline DataInst Back() const; - // clear the container - inline void Clear(void); - // push the newly coming instance - inline void Push(unsigned index, TBlob data_); + inline DataInst Back() const { + return (*this)[Size() - 1]; + } + inline void Clear(void) { + index_.clear(); + data_.Clear(); + label_.Clear(); + } + inline void Push(unsigned index, + mshadow::Shape<3> dshape, + mshadow::Shape<1> lshape) { + index_.push_back(index); + data_.Push(dshape); + label_.Push(lshape); + } private: /*! \brief index of the data */ std::vector index_; + // label + TensorVector<3, real_t> data_; // data - std::vector > data_; - // extra data - std::vector extra_data_; + TensorVector<1, real_t> label_; }; +} // namespace io +} // namespace mxnet #endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc index bd5b78dda643..8bfb5dbdd570 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -4,7 +4,18 @@ #include #include +#include "./image_augmenter.h" +#include "./iter_batch.h" +// Registers namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); } // namespace dmlc + +namespace mxnet { +namespace io { +// Register parameters in header files +DMLC_REGISTER_PARAMETER(BatchParam); +DMLC_REGISTER_PARAMETER(ImageAugmentParam); +} // namespace io +} // namespace mxnet diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h new file mode 100644 index 000000000000..b45dfd3328e1 --- /dev/null +++ b/src/io/iter_batch.h @@ -0,0 +1,172 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_batch_proc-inl.hpp + * \brief definition of preprocessing iterators that takes an iterator and do some preprocessing + * \author Tianqi Chen, Tianjun Xiao + */ +#ifndef MXNET_IO_ITER_BATCH_H_ +#define MXNET_IO_ITER_BATCH_H_ + +#include +#include +#include +#include +#include +#include +#include + +namespace mxnet { +namespace io { +// Batch parameters +struct BatchParam : public dmlc::Parameter { + /*! \brief label width */ + index_t batch_size; + /*! \brief input shape */ + TShape input_shape; + /*! \brief label width */ + index_t label_width; + /*! \brief use round roubin to handle overflow batch */ + bool round_batch; + /*! \brief skip read */ + bool test_skipread; + /*! \brief silent */ + bool silent; + // declare parameters + DMLC_DECLARE_PARAMETER(BatchParam) { + DMLC_DECLARE_FIELD(batch_size) + .describe("Batch size."); + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(label_width).set_default(1) + .describe("Label width."); + DMLC_DECLARE_FIELD(round_batch).set_default(true) + .describe("Use round robin to handle overflow batch."); + DMLC_DECLARE_FIELD(test_skipread).set_default(false) + .describe("Skip read for testing."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print batch information."); + } +}; + +/*! \brief create a batch iterator from single instance iterator */ +class BatchAdaptIter: public IIterator { + public: + explicit BatchAdaptIter(IIterator *base): base_(base), num_overflow_(0) {} + virtual ~BatchAdaptIter(void) { + delete base_; + FreeSpaceDense(); + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init batch param, it could have similar param with + kwargs_left = param_.InitAllowUnknown(kwargs); + // init base iterator + base_->Init(kwargs); + data_shape_[1] = param_.input_shape[0]; + data_shape_[2] = param_.input_shape[1]; + data_shape_[3] = param_.input_shape[2]; + data_shape_[0] = param_.batch_size; + AllocSpaceDense(false); + } + virtual void BeforeFirst(void) { + if (param_.round_batch == 0 || num_overflow_ == 0) { + // otherise, we already called before first + base_->BeforeFirst(); + } else { + num_overflow_ = 0; + } + head_ = 1; + } + virtual bool Next(void) { + out_.num_batch_padd = 0; + + // skip read if in head version + if (param_.test_skipread != 0 && head_ == 0) + return true; + else + this->head_ = 0; + + // if overflow from previous round, directly return false, until before first is called + if (num_overflow_ != 0) return false; + index_t top = 0; + + while (base_->Next()) { + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + + if (++ top >= param_.batch_size) { + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); + return true; + } + } + if (top != 0) { + if (param_.round_batch != 0) { + num_overflow_ = 0; + base_->BeforeFirst(); + for (; top < param_.batch_size; ++top, ++num_overflow_) { + CHECK(base_->Next()) << "number of input must be bigger than batch size"; + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + } + out_.num_batch_padd = num_overflow_; + } else { + out_.num_batch_padd = param_.batch_size - top; + } + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); + return true; + } + return false; + } + virtual const DataBatch &Value(void) const { + CHECK(head_ == 0) << "must call Next to get value"; + return out_; + } + + private: + /*! \brief batch parameters */ + BatchParam param_; + /*! \brief base iterator */ + IIterator *base_; + /*! \brief output data */ + DataBatch out_; + /*! \brief on first */ + int head_; + /*! \brief number of overflow instances that readed in round_batch mode */ + int num_overflow_; + /*! \brief label information of the data*/ + mshadow::Tensor label; + /*! \brief content of dense data, if this DataBatch is dense */ + mshadow::Tensor data; + /*! \brief data shape */ + mshadow::Shape<4> data_shape_; + // Functions that allocate and free tensor space + inline void AllocSpaceDense(bool pad = false) { + data = mshadow::NewTensor(data_shape_, 0.0f, pad); + mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); + label = mshadow::NewTensor(lshape, 0.0f, pad); + out_.inst_index = new unsigned[param_.batch_size]; + out_.batch_size = param_.batch_size; + out_.data.resize(2); + } + /*! \brief auxiliary function to free space, if needed, dense only */ + inline void FreeSpaceDense(void) { + if (label.dptr_ != NULL) { + delete [] out_.inst_index; + mshadow::FreeSpace(&label); + mshadow::FreeSpace(&data); + label.dptr_ = NULL; + } + } +}; // class BatchAdaptIter +} // namespace io +} // namespace mxnet +#endif // MXNET_IO_ITER_BATCH_H_ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc new file mode 100644 index 000000000000..701c28deb4c9 --- /dev/null +++ b/src/io/iter_image_recordio.cc @@ -0,0 +1,422 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_image_recordio-inl.hpp + * \brief recordio data +iterator + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "./inst_vector.h" +#include "./image_recordio.h" +#include "./image_augmenter.h" +#include "./iter_batch.h" +namespace mxnet { +namespace io { +/*! \brief data structure to hold labels for images */ +class ImageLabelMap { + public: + /*! + * \brief initialize the label list into memory + * \param path_imglist path to the image list + * \param label_width predefined label_width + */ + explicit ImageLabelMap(const char *path_imglist, + mshadow::index_t label_width, + bool silent) { + this->label_width = label_width; + image_index_.clear(); + label_.clear(); + idx2label_.clear(); + dmlc::InputSplit *fi = dmlc::InputSplit::Create + (path_imglist, 0, 1, "text"); + dmlc::InputSplit::Blob rec; + while (fi->NextRecord(&rec)) { + // quick manual parsing + char *p = reinterpret_cast(rec.dptr); + char *end = p + rec.size; + // skip space + while (isspace(*p) && p != end) ++p; + image_index_.push_back(static_cast(atol(p))); + for (size_t i = 0; i < label_width; ++i) { + // skip till space + while (!isspace(*p) && p != end) ++p; + // skip space + while (isspace(*p) && p != end) ++p; + CHECK(p != end) << "Bad ImageList format"; + label_.push_back(static_cast(atof(p))); + } + } + delete fi; + // be careful not to resize label_ afterwards + idx2label_.reserve(image_index_.size()); + for (size_t i = 0; i < image_index_.size(); ++i) { + idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width; + } + if (!silent) { + LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' + << image_index_.size() << " Image records"; + } + } + /*! \brief find a label for corresponding index */ + inline mshadow::Tensor Find(size_t imid) const { + std::unordered_map::const_iterator it + = idx2label_.find(imid); + CHECK(it != idx2label_.end()) << "fail to find imagelabel for id " << imid; + return mshadow::Tensor(it->second, mshadow::Shape1(label_width)); + } + + private: + // label with_ + mshadow::index_t label_width; + // image index of each record + std::vector image_index_; + // real label content + std::vector label_; + // map index to label + std::unordered_map idx2label_; +}; + +// Define image record parser parameters +struct ImageRecParserParam : public dmlc::Parameter { + /*! \brief path to image list */ + std::string path_imglist; + /*! \brief path to image recordio */ + std::string path_imgrec; + /*! \brief number of threads */ + int nthread; + /*! \brief whether to remain silent */ + bool silent; + /*! \brief number of distributed worker */ + int dist_num_worker, dist_worker_rank; + /*! \brief label-width */ + int label_width; + /*! \brief input shape */ + TShape input_shape; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecParserParam) { + DMLC_DECLARE_FIELD(path_imglist).set_default("") + .describe("Path to image list."); + DMLC_DECLARE_FIELD(path_imgrec).set_default("./data/imgrec.rec") + .describe("Path to image record file."); + DMLC_DECLARE_FIELD(nthread).set_lower_bound(1).set_default(4) + .describe("Number of thread to do parsing."); + DMLC_DECLARE_FIELD(label_width).set_lower_bound(1).set_default(1) + .describe("How many labels for an image."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to output parser information."); + DMLC_DECLARE_FIELD(dist_num_worker).set_lower_bound(1).set_default(1) + .describe("Dist worker number."); + DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) + .describe("Dist worker rank."); + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + } +}; + +// parser to parse image recordio +class ImageRecordIOParser { + public: + ImageRecordIOParser(void) + : source_(NULL), + label_map_(NULL) { + } + ~ImageRecordIOParser(void) { + // can be NULL + delete label_map_; + delete source_; + for (size_t i = 0; i < augmenters_.size(); ++i) { + delete augmenters_[i]; + } + for (size_t i = 0; i < prnds_.size(); ++i) { + delete prnds_[i]; + } + } + // initialize the parser + inline void Init(const std::vector >& kwargs); + + // set record to the head + inline void BeforeFirst(void) { + return source_->BeforeFirst(); + } + // parse next set of records, return an array of + // instance vector to the user + inline bool ParseNext(std::vector *out); + + private: + // magic nyumber to see prng + static const int kRandMagic = 111; + /*! \brief parameters */ + ImageRecParserParam param_; + /*! \brief augmenters */ + std::vector augmenters_; + /*! \brief random samplers */ + std::vector prnds_; + /*! \brief data source */ + dmlc::InputSplit *source_; + /*! \brief label information, if any */ + ImageLabelMap *label_map_; + /*! \brief temp space */ + mshadow::TensorContainer img_; +}; + +inline void ImageRecordIOParser::Init( + const std::vector >& kwargs) { + // initialize parameter + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + int maxthread, threadget; + #pragma omp parallel + { + maxthread = std::max(omp_get_num_procs() / 2 - 1, 1); + } + param_.nthread = std::min(maxthread, param_.nthread); + #pragma omp parallel num_threads(param_.nthread) + { + threadget = omp_get_num_threads(); + } + param_.nthread = threadget; + // setup decoders + for (int i = 0; i < threadget; ++i) { + augmenters_.push_back(new ImageAugmenter()); + augmenters_[i]->Init(kwargs); + prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); + } + + // handling for hadoop + const char *ps_rank = getenv("PS_RANK"); + if (ps_rank != NULL) { + param_.dist_worker_rank = atoi(ps_rank); + } + + if (param_.path_imglist.length() != 0) { + label_map_ = new ImageLabelMap(param_.path_imglist.c_str(), + param_.label_width, param_.silent != 0); + } else { + param_.label_width = 1; + } + CHECK(param_.path_imgrec.length() != 0) + << "ImageRecordIOIterator: must specify image_rec"; +#if MSHADOW_DIST_PS + param_.dist_num_worker = ::ps::RankSize(); + param_.dist_worker_rank = ::ps::MyRank(); + LOG(INFO) << "rank " << param_.dist_worker_rank + << " in " << param_.dist_num_worker; +#endif + source_ = dmlc::InputSplit::Create + (param_.path_imgrec.c_str(), param_.dist_worker_rank, + param_.dist_num_worker, "recordio"); + // use 64 MB chunk when possible + source_->HintChunkSize(8 << 20UL); +} + +inline bool ImageRecordIOParser:: +ParseNext(std::vector *out_vec) { + CHECK(source_ != NULL); + dmlc::InputSplit::Blob chunk; + if (!source_->NextChunk(&chunk)) return false; + out_vec->resize(param_.nthread); + #pragma omp parallel num_threads(param_.nthread) + { + CHECK(omp_get_num_threads() == param_.nthread); + int tid = omp_get_thread_num(); + dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread); + ImageRecordIO rec; + dmlc::InputSplit::Blob blob; + // image data + InstVector &out = (*out_vec)[tid]; + out.Clear(); + while (reader.NextRecord(&blob)) { + rec.Load(blob.dptr, blob.size); + out.Push(static_cast(rec.image_index()), + mshadow::Shape3(param_.input_shape[0], param_.input_shape[1], param_.input_shape[2]), + mshadow::Shape1(param_.label_width)); + DataInst inst = out.Back(); + // turn datainst into tensor + mshadow::Tensor data = inst.data[0].get(); + mshadow::Tensor label = inst.data[1].get(); + augmenters_[tid]->Process(rec.content, rec.content_size, &img_, prnds_[tid]); + mshadow::Copy(data, img_); + if (label_map_ != NULL) { + mshadow::Copy(label, label_map_->Find(rec.image_index())); + } else { + label[0] = rec.header.label; + } + } + } + return true; +} + +// Define image record parameters +struct ImageRecordParam: public dmlc::Parameter { + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief random seed */ + int seed; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief whether to remain silent */ + bool silent; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecordParam) { + DMLC_DECLARE_FIELD(shuffle).set_default(true) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(seed).set_default(0) + .describe("Random Seed."); + DMLC_DECLARE_FIELD(mean_img).set_default("./data/mean.bin") + .describe("Path to image mean file."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to output information."); + } +}; + +// iterator on image recordio +class ImageRecordIter : public IIterator { + public: + ImageRecordIter() + : data_(NULL) { + } + virtual ~ImageRecordIter(void) { + iter_.Destroy(); + // data can be NULL + delete data_; + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + // use the kwarg to init parser + parser_.Init(kwargs); + // init thread iter + iter_.set_max_capacity(4); + iter_.Init([this](std::vector **dptr) { + if (*dptr == NULL) { + *dptr = new std::vector(); + } + return parser_.ParseNext(*dptr); + }, + [this]() { parser_.BeforeFirst(); }); + // Check Meanfile + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + this->CreateMeanImg(); + } else { + delete fi; + } + } + inst_ptr_ = 0; + } + virtual void BeforeFirst(void) { + iter_.BeforeFirst(); + inst_order_.clear(); + inst_ptr_ = 0; + } + virtual bool Next(void) { + while (true) { + if (inst_ptr_ < inst_order_.size()) { + std::pair p = inst_order_[inst_ptr_]; + out_ = (*data_)[p.first][p.second]; + ++inst_ptr_; + return true; + } else { + if (data_ != NULL) iter_.Recycle(&data_); + if (!iter_.Next(&data_)) return false; + inst_order_.clear(); + for (unsigned i = 0; i < data_->size(); ++i) { + const InstVector &tmp = (*data_)[i]; + for (unsigned j = 0; j < tmp.Size(); ++j) { + inst_order_.push_back(std::make_pair(i, j)); + } + } + // shuffle instance order if needed + if (shuffle_ != 0) { + std::shuffle(inst_order_.begin(), inst_order_.end(), \ + common::RANDOM_ENGINE(kRandMagic + param_.seed)); + } + inst_ptr_ = 0; + } + } + return false; + } + virtual const DataInst &Value(void) const { + return out_; + } + + private: + inline void CreateMeanImg(void) { + if (param_.silent == 0) { + printf("cannot find %s: create mean image, this will take some time...\n", + param_.mean_img.c_str()); + } + time_t start = time(NULL); + uint64_t elapsed = 0; + size_t imcnt = 1; + this->BeforeFirst(); + CHECK(this->Next()) << "input iterator failed."; + // Get the first data + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_.Resize(img_tensor.shape_); + mshadow::Copy(meanimg_, img_tensor); + while (this->Next()) { + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_ += img_tensor; imcnt += 1; + elapsed = (uint64_t)(time(NULL) - start); + if (imcnt % 1000 == 0 && param_.silent == 0) { + printf("\r \r"); + printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); + fflush(stdout); + } + } + meanimg_ *= (1.0f / imcnt); + + dmlc::Stream *fo = dmlc::Stream::Create(param_.mean_img.c_str(), "w"); + meanimg_.SaveBinary(*fo); + delete fo; + if (param_.silent == 0) { + printf("save mean image to %s..\n", param_.mean_img.c_str()); + } + } + + // random magic + static const int kRandMagic = 111; + // output instance + DataInst out_; + // whether shuffle data + int shuffle_; + // data ptr + size_t inst_ptr_; + // internal instance order + std::vector > inst_order_; + // data + std::vector *data_; + // internal parser + ImageRecordIOParser parser_; + // backend thread + dmlc::ThreadedIter > iter_; + // parameters + ImageRecordParam param_; + // mean image + mshadow::TensorContainer meanimg_; +}; +DMLC_REGISTER_PARAMETER(ImageRecParserParam); +DMLC_REGISTER_PARAMETER(ImageRecordParam); +MXNET_REGISTER_IO_CHAINED_ITER(ImageRecordIter, ImageRecordIter, BatchAdaptIter) + .describe("Create iterator for dataset packed in recordio.") + .add_arguments(ImageRecordParam::__FIELDS__()) + .add_arguments(ImageRecParserParam::__FIELDS__()) + .add_arguments(BatchParam::__FIELDS__()) + .add_arguments(ImageAugmentParam::__FIELDS__()); +} // namespace io +} // namespace mxnet diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 93195061b278..77ac3a479f75 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -31,7 +31,7 @@ struct MNISTParam : public dmlc::Parameter { bool flat; /*! \brief random seed */ int seed; - // declare parameters in header file + // declare parameters DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(image).set_default("./train-images-idx3-ubyte") .describe("Mnist image path."); @@ -155,7 +155,7 @@ class MNISTIter: public IIterator { delete stdlabel; } inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param_.seed)); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { diff --git a/tests/python/get_data.py b/tests/python/get_data.py index 82d25d9072fb..828809f3e757 100644 --- a/tests/python/get_data.py +++ b/tests/python/get_data.py @@ -27,3 +27,10 @@ def GetMNIST_ubyte(): os.system("wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz -P data/") os.system("gunzip data/t10k-labels-idx1-ubyte.gz") +# download cifar +def GetCifar10(): + if not os.path.isdir("data/"): + os.system("mkdir data/") + if not os.path.exists('data/cifar10.zip'): + os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip -P data/") + os.system("unzip data/cifar10.zip") diff --git a/tests/python/test_io.py b/tests/python/test_io.py index dfeb3f67c293..1156782bdfef 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -5,28 +5,29 @@ import pickle as pickle import sys import get_data +#from PIL import Image -# prepare data -get_data.GetMNIST_ubyte() -batch_size = 100 -train_dataiter = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) -val_dataiter = mx.io.MNISTIter( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - batch_size=batch_size, shuffle=0, flat=1, silent=0) +def test_MNISTIter(): + # prepare data + get_data.GetMNIST_ubyte() -def test_MNISTIter_loop(): + batch_size = 100 + train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) + val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=0, flat=1, silent=0) + # test_loop nbatch = 60000 / batch_size batch_count = 0 for data, label in train_dataiter: batch_count += 1 assert(nbatch == batch_count) - -def test_MNISTIter_reset(): + # test_reset train_dataiter.reset() train_dataiter.iter_next() label_0 = train_dataiter.getlabel().numpy.flatten() @@ -39,3 +40,65 @@ def test_MNISTIter_reset(): label_1 = train_dataiter.getlabel().numpy.flatten() assert(sum(label_0 - label_1) == 0) +''' +def test_ImageRecIter(): + dataiter = mx.io.ImageRecordIter( + path_imgrec="data/val_cxxnet.rec", + mean_img="data/smallset/image_net_mean.bin", + rand_crop=True, + mirror=True, + input_shape=(3,227,227), + batch_size=100, + nthread=1, + seed=10) + labelcount = [0 for i in range(1000)] + batchcount = 0 + for data, label in dataiter: + npdata = data.numpy + print npdata[0,:,:,:] + imgdata = np.zeros([227, 227, 3], dtype=np.uint8) + imgdata[:,:,0] = npdata[10,2,:,:] + imgdata[:,:,1] = npdata[10,1,:,:] + imgdata[:,:,2] = npdata[10,0,:,:] + img = Image.fromarray(imgdata) + imgpath = "data/smallset/test_3.jpg" + img.save(imgpath, format='JPEG') + exit(0) + print batchcount + sys.stdout.flush() + batchcount += 1 + nplabel = label.numpy + for i in range(nplabel.shape[0]): + labelcount[int(nplabel[i])] += 1 + +def test_Cifar10Rec(): + dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/test.rec", + mean_img="data/cifar/cifar10_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=100, + nthread=1) + labelcount = [0 for i in range(10)] + batchcount = 0 + for data, label in dataiter: + npdata = data.numpy + print npdata[0,:,:,:] + imgdata = np.zeros([28, 28, 3], dtype=np.uint8) + imgdata[:,:,0] = npdata[0,2,:,:] + imgdata[:,:,1] = npdata[0,1,:,:] + imgdata[:,:,2] = npdata[0,0,:,:] + img = Image.fromarray(imgdata) + imgpath = "data/cifar/test.jpg" + img.save(imgpath, format='JPEG') + exit(0) + print "Batch: ", batchcount + sys.stdout.flush() + batchcount += 1 + nplabel = label.numpy + for i in range(nplabel.shape[0]): + labelcount[int(nplabel[i])] += 1 + for i in range(10): + assert(labelcount[i] == 1000) +'''