最近在基于caffe做目标检测的问题,需要利用caffe来训练一个回归网络,用来预测object在图像中的位置(x1,y1,width,height)。但是现有的caffe版本(happynear版本)只适用于二分类问题的数据集转换,所以需要修改caffe源码,使之也可以转换回归问题的数据集。
主要是参照 http://blog.csdn.NET/baobei0112/article/details/47606559 进行修改。但是这份博客使用的不是happynear的caffe版本,所以源码改动的地方差异较大。下面我会记录我改动的地方。
一.源码修改
1.修改caffe.proto,位于/src/caffe/proto
36行改成 repeated float label = 5;,然后运行extract_proto.bat
2.修改data_layer.hpp
- #ifndef CAFFE_DATA_LAYERS_HPP_
- #define CAFFE_DATA_LAYERS_HPP_
- #include <string>
- #include <utility>
- #include <vector>
- #include "hdf5/hdf5.h"
- #include "caffe/blob.hpp"
- #include "caffe/common.hpp"
- #include "caffe/data_reader.hpp"
- #include "caffe/data_transformer.hpp"
- #include "caffe/filler.hpp"
- #include "caffe/internal_thread.hpp"
- #include "caffe/layer.hpp"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/blocking_queue.hpp"
- #include "caffe/util/db.hpp"
- #define HDF5_DATA_DATASET_NAME "data"
- #define HDF5_DATA_LABEL_NAME "label"
- namespace caffe {
- /**
- * @brief Provides base for data layers that feed blobs to the Net.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class BaseDataLayer : public Layer<Dtype> {
- public:
- explicit BaseDataLayer(const LayerParameter& param);
- // LayerSetUp: implements common data layer setup functionality, and calls
- // DataLayerSetUp to do special data layer setup for individual layer types.
- // This method may not be overridden except by the BasePrefetchingDataLayer.
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Data layers should be shared by multiple solvers in parallel
- virtual inline bool ShareInParallel() const { return true; }
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- }
- // Data layers have no bottoms, so reshaping is trivial.
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- }
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- protected:
- TransformationParameter transform_param_;
- shared_ptr<DataTransformer<Dtype> > data_transformer_;
- bool output_labels_;
- };
- template <typename Dtype>
- class Batch {
- public:
- Blob<Dtype> data_, label_;
- };
- template <typename Dtype>
- class BasePrefetchingDataLayer :
- public BaseDataLayer<Dtype>, public InternalThread {
- public:
- explicit BasePrefetchingDataLayer(const LayerParameter& param);
- // LayerSetUp: implements common data layer setup functionality, and calls
- // DataLayerSetUp to do special data layer setup for individual layer types.
- // This method may not be overridden.
- void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Prefetches batches (asynchronously if to GPU memory)
- static const int PREFETCH_COUNT = 3;
- protected:
- virtual void InternalThreadEntry();
- virtual void load_batch(Batch<Dtype>* batch) = 0;
- Batch<Dtype> prefetch_[PREFETCH_COUNT];
- BlockingQueue<Batch<Dtype>*> prefetch_free_;
- BlockingQueue<Batch<Dtype>*> prefetch_full_;
- Blob<Dtype> transformed_data_;
- };
- template <typename Dtype>
- class DataLayer : public BasePrefetchingDataLayer<Dtype> {
- public:
- explicit DataLayer(const LayerParameter& param);
- virtual ~DataLayer();
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // DataLayer uses DataReader instead for sharing for parallelism
- virtual inline bool ShareInParallel() const { return false; }
- virtual inline const char* type() const { return "Data"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int MinTopBlobs() const { return 1; }
- virtual inline int MaxTopBlobs() const { return 2; }
- protected:
- virtual void load_batch(Batch<Dtype>* batch);
- DataReader reader_;
- };
- /**
- * @brief Provides data to the Net generated by a Filler.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class DummyDataLayer : public Layer<Dtype> {
- public:
- explicit DummyDataLayer(const LayerParameter& param)
- : Layer<Dtype>(param) {}
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Data layers should be shared by multiple solvers in parallel
- virtual inline bool ShareInParallel() const { return true; }
- // Data layers have no bottoms, so reshaping is trivial.
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- }
- virtual inline const char* type() const { return "DummyData"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int MinTopBlobs() const { return 1; }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- vector<shared_ptr<Filler<Dtype> > > fillers_;
- vector<bool> refill_;
- };
- /**
- * @brief Provides data to the Net from HDF5 files.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class HDF5DataLayer : public Layer<Dtype> {
- public:
- explicit HDF5DataLayer(const LayerParameter& param)
- : Layer<Dtype>(param) {}
- virtual ~HDF5DataLayer();
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Data layers should be shared by multiple solvers in parallel
- virtual inline bool ShareInParallel() const { return true; }
- // Data layers have no bottoms, so reshaping is trivial.
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- }
- virtual inline const char* type() const { return "HDF5Data"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int MinTopBlobs() const { return 1; }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- }
- virtual void LoadHDF5FileData(const char* filename);
- std::vector<std::string> hdf_filenames_;
- unsigned int num_files_;
- unsigned int current_file_;
- hsize_t current_row_;
- std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;
- std::vector<unsigned int> data_permutation_;
- std::vector<unsigned int> file_permutation_;
- };
- /**
- * @brief Write blobs to disk as HDF5 files.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class HDF5OutputLayer : public Layer<Dtype> {
- public:
- explicit HDF5OutputLayer(const LayerParameter& param)
- : Layer<Dtype>(param), file_opened_(false) {}
- virtual ~HDF5OutputLayer();
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Data layers should be shared by multiple solvers in parallel
- virtual inline bool ShareInParallel() const { return true; }
- // Data layers have no bottoms, so reshaping is trivial.
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- }
- virtual inline const char* type() const { return "HDF5Output"; }
- // TODO: no limit on the number of blobs
- virtual inline int ExactNumBottomBlobs() const { return 2; }
- virtual inline int ExactNumTopBlobs() const { return 0; }
- inline std::string file_name() const { return file_name_; }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- virtual void SaveBlobs();
- bool file_opened_;
- std::string file_name_;
- hid_t file_id_;
- Blob<Dtype> data_blob_;
- Blob<Dtype> label_blob_;
- };
- /**
- * @brief Provides data to the Net from image files.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
- public:
- explicit ImageDataLayer(const LayerParameter& param)
- : BasePrefetchingDataLayer<Dtype>(param) {}
- virtual ~ImageDataLayer();
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "ImageData"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int ExactNumTopBlobs() const { return 2; }
- vector<std::pair<std::string, std:: vector<float>> > lines_;
- shared_ptr<Caffe::RNG> prefetch_rng_;
- virtual void ShuffleImages();
- virtual void load_batch(Batch<Dtype>* batch);
- int lines_id_;
- };
- /**
- * @brief Provides data to the Net from memory.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class MemoryDataLayer : public BaseDataLayer<Dtype> {
- public:
- explicit MemoryDataLayer(const LayerParameter& param)
- : BaseDataLayer<Dtype>(param), has_new_data_(false) {}
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "MemoryData"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int ExactNumTopBlobs() const { return 2; }
- virtual void AddDatumVector(const vector<Datum>& datum_vector);
- #ifdef USE_OPENCV
- virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
- const vector<int>& labels);
- #endif // USE_OPENCV
- // Reset should accept const pointers, but can't, because the memory
- // will be given to Blob, which is mutable
- void Reset(Dtype* data, Dtype* label, int n);
- void set_batch_size(int new_size);
- int batch_size() { return batch_size_; }
- int channels() { return channels_; }
- int height() { return height_; }
- int width() { return width_; }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- int batch_size_, channels_, height_, width_, size_;
- Dtype* data_;
- Dtype* labels_;
- int n_;
- size_t pos_;
- Blob<Dtype> added_data_;
- Blob<Dtype> added_label_;
- bool has_new_data_;
- };
- /**
- * @brief Provides data to the Net from windows of images files, specified
- * by a window data file.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
- public:
- explicit WindowDataLayer(const LayerParameter& param)
- : BasePrefetchingDataLayer<Dtype>(param) {}
- virtual ~WindowDataLayer();
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "WindowData"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int ExactNumTopBlobs() const { return 2; }
- protected:
- virtual unsigned int PrefetchRand();
- virtual void load_batch(Batch<Dtype>* batch);
- shared_ptr<Caffe::RNG> prefetch_rng_;
- vector<std::pair<std::string, vector<int> > > image_database_;
- enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
- vector<vector<float> > fg_windows_;
- vector<vector<float> > bg_windows_;
- Blob<Dtype> data_mean_;
- vector<Dtype> mean_values_;
- bool has_mean_file_;
- bool has_mean_values_;
- bool cache_images_;
- vector<std::pair<std::string, Datum > > image_database_cache_;
- };
- /**
- * @brief Provides data to the Net from image files.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
- template <typename Dtype>
- class MultiLabelImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
- public:
- explicit MultiLabelImageDataLayer(const LayerParameter& param)
- : BasePrefetchingDataLayer<Dtype>(param) {}
- virtual ~MultiLabelImageDataLayer();
- virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "MultiLabelImageData"; }
- virtual inline int ExactNumBottomBlobs() const { return 0; }
- virtual inline int ExactNumTopBlobs() const { return 2; }
- protected:
- shared_ptr<Caffe::RNG> prefetch_rng_;
- virtual void ShuffleImages();
- virtual void load_batch(Batch<Dtype>* batch);
- vector<std::pair<std::string, shared_ptr<vector<Dtype> > > > lines_;
- int label_count;
- int lines_id_;
- };
- } // namespace caffe
- #endif // CAFFE_DATA_LAYERS_HPP_
3.改动data_layer.cpp
- #ifdef USE_OPENCV
- #include <opencv2/core/core.hpp>
- #endif // USE_OPENCV
- #include <stdint.h>
-
- #include <vector>
-
- #include "caffe/data_layers.hpp"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/benchmark.hpp"
-
- namespace caffe {
-
- template <typename Dtype>
- DataLayer<Dtype>::DataLayer(const LayerParameter& param)
- : BasePrefetchingDataLayer<Dtype>(param),
- reader_(param) {
- }
-
- template <typename Dtype>
- DataLayer<Dtype>::~DataLayer() {
- this->StopInternalThread();
- }
-
- template <typename Dtype>
- void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- const int batch_size = this->layer_param_.data_param().batch_size();
- // Read a data point, and use it to initialize the top blob.
- Datum& datum = *(reader_.full().peek());
-
- // Use data_transformer to infer the expected blob shape from datum.
- vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
- this->transformed_data_.Reshape(top_shape);
- // Reshape top[0] and prefetch_data according to the batch_size.
- top_shape[0] = batch_size;
- top[0]->Reshape(top_shape);
- for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
- this->prefetch_[i].data_.Reshape(top_shape);
- }
- LOG(INFO) << "output data size: " << top[0]->num() << ","
- << top[0]->channels() << "," << top[0]->height() << ","
- << top[0]->width();
- // label
- if (this->output_labels_) {
- /*
- vector<int> label_shape(1, batch_size);
- top[1]->Reshape(label_shape);
- for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
- this->prefetch_[i].label_.Reshape(label_shape);
- }
- */
- top[1]->Reshape(batch_size,4,1,1);
- for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
- this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);
- }
- }
- }
-
- // This function is called on prefetch thread
- template<typename Dtype>
- void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
- CPUTimer batch_timer;
- batch_timer.Start();
- double read_time = 0;
- double trans_time = 0;
- CPUTimer timer;
- CHECK(batch->data_.count());
- CHECK(this->transformed_data_.count());
-
- // Reshape according to the first datum of each batch
- // on single input batches allows for inputs of varying dimension.
- const int batch_size = this->layer_param_.data_param().batch_size();
- Datum& datum = *(reader_.full().peek());
- // Use data_transformer to infer the expected blob shape from datum.
- vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
- this->transformed_data_.Reshape(top_shape);
- // Reshape batch according to the batch_size.
- top_shape[0] = batch_size;
- batch->data_.Reshape(top_shape);
-
- Dtype* top_data = batch->data_.mutable_cpu_data();
- Dtype* top_label = NULL; // suppress warnings about uninitialized variables
- if (this->output_labels_) {
- top_label = batch->label_.mutable_cpu_data();
- }
- /*
- if (this->output_labels_) {
- for (int label_i = 0; label_i < datum.label_size(); label_i++){
- top_label[item_id*datum.label_size() + label_i] = datum.label(label_i);
- }
- }
- */
- for (int item_id = 0; item_id < batch_size; ++item_id) {
- timer.Start();
- // get a datum
- Datum& datum = *(reader_.full().pop("Waiting for data"));
- read_time += timer.MicroSeconds();
- timer.Start();
- // Apply data transformations (mirror, scale, crop...)
- int offset = batch->data_.offset(item_id);
- this->transformed_data_.set_cpu_data(top_data + offset);
- this->data_transformer_->Transform(datum, &(this->transformed_data_));
- // Copy label.
- if (this->output_labels_) {
- // top_label[item_id] = datum.label();
- for (int label_i = 0; label_i < datum.label_size(); label_i++){
- top_label[item_id*datum.label_size()+label_i] = datum.label(label_i);
- }
- }
- trans_time += timer.MicroSeconds();
-
- reader_.free().push(const_cast<Datum*>(&datum));
- }
- timer.Stop();
- batch_timer.Stop();
- DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
- DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
- DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
- }
-
- INSTANTIATE_CLASS(DataLayer);
- REGISTER_LAYER_CLASS(Data);
-
- } // namespace caffe
4.修改image_data_layer.cpp中label部分
- #ifdef USE_OPENCV
- #include <opencv2/core/core.hpp>
-
- #include <fstream> // NOLINT(readability/streams)
- #include <iostream> // NOLINT(readability/streams)
- #include <string>
- #include <utility>
- #include <vector>
-
- #include "caffe/data_layers.hpp"
- #include "caffe/util/benchmark.hpp"
- #include "caffe/util/io.hpp"
- #include "caffe/util/math_functions.hpp"
- #include "caffe/util/rng.hpp"
-
- namespace caffe {
-
- template <typename Dtype>
- ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
- this->StopInternalThread();
- }
-
- template <typename Dtype>
- void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- const int new_height = this->layer_param_.image_data_param().new_height();
- const int new_width = this->layer_param_.image_data_param().new_width();
- const bool is_color = this->layer_param_.image_data_param().is_color();
- string root_folder = this->layer_param_.image_data_param().root_folder();
-
- CHECK((new_height == 0 && new_width == 0) ||
- (new_height > 0 && new_width > 0)) << "Current implementation requires "
- "new_height and new_width to be set at the same time.";
- // Read the file with filenames and labels
- const string& source = this->layer_param_.image_data_param().source();
- LOG(INFO) << "Opening file " << source;
- std::ifstream infile(source.c_str());
- string filename;
- //int label;
- float x1, y1, x2, y2;
- while (infile >> filename >> x1 >> y1 >> x2 >> y2) {
- std::vector<float> vec_label;
- vec_label.push_back(x1);
- vec_label.push_back(y1);
- vec_label.push_back(x2);
- vec_label.push_back(y2);
- lines_.push_back(std::make_pair(filename, vec_label));
- }
-
- if (this->layer_param_.image_data_param().shuffle()) {
- // randomly shuffle data
- LOG(INFO) << "Shuffling data";
- const unsigned int prefetch_rng_seed = caffe_rng_rand();
- prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
- ShuffleImages();
- }
- LOG(INFO) << "A total of " << lines_.size() << " images.";
-
- lines_id_ = 0;
- // Check if we would need to randomly skip a few data points
- if (this->layer_param_.image_data_param().rand_skip()) {
- unsigned int skip = caffe_rng_rand() %
- this->layer_param_.image_data_param().rand_skip();
- LOG(INFO) << "Skipping first " << skip << " data points.";
- CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
- lines_id_ = skip;
- }
- // Read an image, and use it to initialize the top blob.
- cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
- new_height, new_width, is_color);
- CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
- // Use data_transformer to infer the expected blob shape from a cv_image.
- vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
- this->transformed_data_.Reshape(top_shape);
- // Reshape prefetch_data and top[0] according to the batch_size.
- const int batch_size = this->layer_param_.image_data_param().batch_size();
- CHECK_GT(batch_size, 0) << "Positive batch size required";
- top_shape[0] = batch_size;
- for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
- this->prefetch_[i].data_.Reshape(top_shape);
- }
- top[0]->Reshape(top_shape);
-
- LOG(INFO) << "output data size: " << top[0]->num() << ","
- << top[0]->channels() << "," << top[0]->height() << ","
- << top[0]->width();
- // label
- vector<int> label_shape(1, batch_size);
- top[1]->Reshape(batch_size,4,1,1);
- for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
- this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);
- }
- }
-
- template <typename Dtype>
- void ImageDataLayer<Dtype>::ShuffleImages() {
- caffe::rng_t* prefetch_rng =
- static_cast<caffe::rng_t*>(prefetch_rng_->generator());
- shuffle(lines_.begin(), lines_.end(), prefetch_rng);
- }
-
- // This function is called on prefetch thread
- template <typename Dtype>
- void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
- CPUTimer batch_timer;
- batch_timer.Start();
- double read_time = 0;
- double trans_time = 0;
- CPUTimer timer;
- CHECK(batch->data_.count());
- CHECK(this->transformed_data_.count());
- ImageDataParameter image_data_param = this->layer_param_.image_data_param();
- const int batch_size = image_data_param.batch_size();
- const int new_height = image_data_param.new_height();
- const int new_width = image_data_param.new_width();
- const bool is_color = image_data_param.is_color();
- string root_folder = image_data_param.root_folder();
-
- // Reshape according to the first image of each batch
- // on single input batches allows for inputs of varying dimension.
- cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
- new_height, new_width, is_color);
- CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
- // Use data_transformer to infer the expected blob shape from a cv_img.
- vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
- this->transformed_data_.Reshape(top_shape);
- // Reshape batch according to the batch_size.
- top_shape[0] = batch_size;
- batch->data_.Reshape(top_shape);
-
- Dtype* prefetch_data = batch->data_.mutable_cpu_data();
- //Dtype* prefetch_label = batch->label_.mutable_cpu_data();
- Dtype* prefetch_label = NULL;
- // datum scales
- const int lines_size = lines_.size();
- for (int item_id = 0; item_id < batch_size; ++item_id) {
- // get a blob
- timer.Start();
- CHECK_GT(lines_size, lines_id_);
- cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
- new_height, new_width, is_color);
- CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
- read_time += timer.MicroSeconds();
- timer.Start();
- // Apply transformations (mirror, crop...) to the image
- int offset = batch->data_.offset(item_id);
- this->transformed_data_.set_cpu_data(prefetch_data + offset);
- this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
- trans_time += timer.MicroSeconds();
- for (int label_i = 0; label_i < (lines_[lines_id_].second).size(); label_i++){
- prefetch_label[item_id*(lines_[lines_id_].second).size() + label_i] = (lines_[lines_id_].second)[label_i];
- }
- //prefetch_label[item_id] = lines_[lines_id_].second;
- // go to the next iter
- lines_id_++;
- if (lines_id_ >= lines_size) {
- // We have reached the end. Restart from the first.
- DLOG(INFO) << "Restarting data prefetching from start.";
- lines_id_ = 0;
- if (this->layer_param_.image_data_param().shuffle()) {
- ShuffleImages();
- }
- }
- }
- batch_timer.Stop();
- DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
- DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
- DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
- }
-
- INSTANTIATE_CLASS(ImageDataLayer);
- REGISTER_LAYER_CLASS(ImageData);
-
- } // namespace caffe
- #endif // USE_OPENCV
5.修改memory_data_layer.cpp
- #ifdef USE_OPENCV
- #include <opencv2/core/core.hpp>
- #endif // USE_OPENCV
-
- #include <vector>
-
- #include "caffe/data_layers.hpp"
-
- namespace caffe {
-
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- batch_size_ = this->layer_param_.memory_data_param().batch_size();
- channels_ = this->layer_param_.memory_data_param().channels();
- height_ = this->layer_param_.memory_data_param().height();
- width_ = this->layer_param_.memory_data_param().width();
- size_ = channels_ * height_ * width_;
- CHECK_GT(batch_size_ * size_, 0) <<
- "batch_size, channels, height, and width must be specified and"
- " positive in memory_data_param";
- vector<int> label_shape(1, batch_size_);
- top[0]->Reshape(batch_size_, channels_, height_, width_);
- top[1]->Reshape(label_shape);
- added_data_.Reshape(batch_size_, channels_, height_, width_);
- added_label_.Reshape(label_shape);
- data_ = NULL;
- labels_ = NULL;
- added_data_.cpu_data();
- added_label_.cpu_data();
- }
-
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
- CHECK(!has_new_data_) <<
- "Can't add data until current data has been consumed.";
- size_t num = datum_vector.size();
- CHECK_GT(num, 0) << "There is no datum to add.";
- CHECK_EQ(num % batch_size_, 0) <<
- "The added data must be a multiple of the batch size.";
- added_data_.Reshape(num, channels_, height_, width_);
- added_label_.Reshape(num, 1, 1, 1);
- // Apply data transformations (mirror, scale, crop...)
- this->data_transformer_->Transform(datum_vector, &added_data_);
- // Copy Labels
- Dtype* top_label = added_label_.mutable_cpu_data();
- for (int item_id = 0; item_id < num; ++item_id) {
- //top_label[item_id] = datum_vector[item_id].label();
- int label_num = datum_vector[item_id].label_size();
- for (int label_i = 0; label_i < label_num; label_i++){
- top_label[item_id*label_num + label_i] = datum_vector[item_id].label(label_i);
- }
- }
- // num_images == batch_size_
- Dtype* top_data = added_data_.mutable_cpu_data();
- Reset(top_data, top_label, num);
- has_new_data_ = true;
- }
-
- #ifdef USE_OPENCV
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
- const vector<int>& labels) {
- size_t num = mat_vector.size();
- CHECK(!has_new_data_) <<
- "Can't add mat until current data has been consumed.";
- CHECK_GT(num, 0) << "There is no mat to add";
- CHECK_EQ(num % batch_size_, 0) <<
- "The added data must be a multiple of the batch size.";
- added_data_.Reshape(num, channels_, height_, width_);
- added_label_.Reshape(num, 1, 1, 1);
- // Apply data transformations (mirror, scale, crop...)
- this->data_transformer_->Transform(mat_vector, &added_data_);
- // Copy Labels
- Dtype* top_label = added_label_.mutable_cpu_data();
- for (int item_id = 0; item_id < num; ++item_id) {
- top_label[item_id] = labels[item_id];
- }
- // num_images == batch_size_
- Dtype* top_data = added_data_.mutable_cpu_data();
- Reset(top_data, top_label, num);
- has_new_data_ = true;
- }
- #endif // USE_OPENCV
-
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
- CHECK(data);
- CHECK(labels);
- CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
- // Warn with transformation parameters since a memory array is meant to
- // be generic and no transformations are done with Reset().
- if (this->layer_param_.has_transform_param()) {
- LOG(WARNING) << this->type() << " does not transform array data on Reset()";
- }
- data_ = data;
- labels_ = labels;
- n_ = n;
- pos_ = 0;
- }
-
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {
- CHECK(!has_new_data_) <<
- "Can't change batch_size until current data has been consumed.";
- batch_size_ = new_size;
- added_data_.Reshape(batch_size_, channels_, height_, width_);
- added_label_.Reshape(batch_size_, 1, 1, 1);
- }
-
- template <typename Dtype>
- void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";
- top[0]->Reshape(batch_size_, channels_, height_, width_);
- top[1]->Reshape(batch_size_, 1, 1, 1);
- top[0]->set_cpu_data(data_ + pos_ * size_);
- top[1]->set_cpu_data(labels_ + pos_);
- pos_ = (pos_ + batch_size_) % n_;
- if (pos_ == 0)
- has_new_data_ = false;
- }
-
- INSTANTIATE_CLASS(MemoryDataLayer);
- REGISTER_LAYER_CLASS(MemoryData);
-
- } // namespace caffe
6.修改convet_imaget.cpp
- // This program converts a set of images to a lmdb/leveldb by storing them
- // as Datum proto buffers.
- // Usage:
- // convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
- //
- // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
- // should be a list of files as well as their labels, in the format as
- // subfolder1/file1.JPEG 7
- // ....
-
- #include <algorithm>
- #include <fstream> // NOLINT(readability/streams)
- #include <string>
- #include <utility>
- #include <vector>
-
- #include "boost/scoped_ptr.hpp"
- #include "gflags/gflags.h"
- #include "glog/logging.h"
-
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/db.hpp"
- #include "caffe/util/io.hpp"
- #include "caffe/util/rng.hpp"
-
- using namespace caffe; // NOLINT(build/namespaces)
- using std::pair;
- using boost::scoped_ptr;
-
- DEFINE_bool(gray, false,
- "When this option is on, treat images as grayscale ones");
- DEFINE_bool(shuffle, false,
- "Randomly shuffle the order of images and their labels");
- DEFINE_string(backend, "lmdb",
- "The backend {lmdb, leveldb} for storing the result");
- DEFINE_int32(resize_width, 0, "Width images are resized to");
- DEFINE_int32(resize_height, 0, "Height images are resized to");
- DEFINE_bool(check_size, false,
- "When this option is on, check that all the datum have the same size");
- DEFINE_bool(encoded, false,
- "When this option is on, the encoded image will be save in datum");
- DEFINE_string(encode_type, "",
- "Optional: What type should we encode the image as ('png','jpg',...).");
-
- int main(int argc, char** argv) {
- #ifdef USE_OPENCV
- //::google::InitGoogleLogging(argv[0]);
- // Print output to stderr (while still logging)
- FLAGS_alsologtostderr = 1;
-
- #ifndef GFLAGS_GFLAGS_H_
- namespace gflags = google;
- #endif
-
- gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
- "format used as input for Caffe.\n"
- "Usage:\n"
- " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
- "The ImageNet dataset for the training demo is at\n"
- " http://www.image-net.org/download-images\n");
- caffe::GlobalInit(&argc, &argv);
-
- if (argc < 4) {
- gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
- return 1;
- }
-
- const bool is_color = !FLAGS_gray;
- const bool check_size = FLAGS_check_size;
- const bool encoded = FLAGS_encoded;
- const string encode_type = FLAGS_encode_type;
-
- std::ifstream infile(argv[2]);
- std::vector<std::pair<std::string, vector<float>> > lines;
- std::string filename;
- /*
- int label;
- while (infile >> filename >> label) {
- lines.push_back(std::make_pair(filename, label));
- }
- */
- float x1, y1, x2, y2;
- while (infile >> filename >> x1 >> y1 >> x2 >> y2) {
- std::vector<float> vec_label;
- vec_label.push_back(x1);
- vec_label.push_back(y1);
- vec_label.push_back(x2);
- vec_label.push_back(y2);
- lines.push_back(std::make_pair(filename, vec_label));
- }
- if (FLAGS_shuffle) {
- // randomly shuffle data
- LOG(INFO) << "Shuffling data";
- shuffle(lines.begin(), lines.end());
- }
- LOG(INFO) << "A total of " << lines.size() << " images.";
-
- if (encode_type.size() && !encoded)
- LOG(INFO) << "encode_type specified, assuming encoded=true.";
-
- int resize_height = std::max<int>(0, FLAGS_resize_height);
- int resize_width = std::max<int>(0, FLAGS_resize_width);
-
- // Create new DB
- scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
- db->Open(argv[3], db::NEW);
- scoped_ptr<db::Transaction> txn(db->NewTransaction());
-
- // Storing to db
- std::string root_folder(argv[1]);
- Datum datum;
- int count = 0;
- const int kMaxKeyLength = 256;
- char key_cstr[kMaxKeyLength];
- int data_size = 0;
- bool data_size_initialized = false;
-
- for (int line_id = 0; line_id < lines.size(); ++line_id) {
- bool status;
- std::string enc = encode_type;
- if (encoded && !enc.size()) {
- // Guess the encoding type from the file name
- string fn = lines[line_id].first;
- size_t p = fn.rfind('.');
- if ( p == fn.npos )
- LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
- enc = fn.substr(p);
- std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
- }
- status = ReadImageToDatum(root_folder + lines[line_id].first,
- lines[line_id].second, resize_height, resize_width, is_color,
- enc, &datum);
- if (status == false) continue;
- if (check_size) {
- if (!data_size_initialized) {
- data_size = datum.channels() * datum.height() * datum.width();
- data_size_initialized = true;
- } else {
- const std::string& data = datum.data();
- CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
- << data.size();
- }
- }
- // sequential
- int length = sprintf_s(key_cstr, kMaxKeyLength, "%08d_%s", line_id,
- lines[line_id].first.c_str());
-
- // Put in db
- string out;
- CHECK(datum.SerializeToString(&out));
- txn->Put(string(key_cstr, length), out);
-
- if (++count % 1000 == 0) {
- // Commit db
- txn->Commit();
- txn.reset(db->NewTransaction());
- LOG(INFO) << "Processed " << count << " files.";
- }
- }
- // write the last batch
- if (count % 1000 != 0) {
- txn->Commit();
- LOG(INFO) << "Processed " << count << " files.";
- }
- #else
- LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
- #endif // USE_OPENCV
- return 0;
- }
7.修改io.cpp (只贴了部分需要修改的程序)
- bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const int height, const int width, const bool is_color,
- const std::string & encoding, Datum* datum) {
- cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
- if (cv_img.data) {
- if (encoding.size()) {
- if ( (cv_img.channels() == 3) == is_color && !height && !width &&
- matchExt(filename, encoding) )
- return ReadFileToDatum(filename, labels, datum);
- std::vector<uchar> buf;
- cv::imencode("."+encoding, cv_img, buf);
- datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
- buf.size()));
- // datum->set_label(label);
- datum->mutable_label()->Clear();
- for (int label_i = 0; label_i < labels.size(); label_i++){
- datum->add_label(labels[label_i]);
- }
- datum->set_encoded(true);
- return true;
- }
- CVMatToDatum(cv_img, datum);
- // datum->set_label(label);
- datum->mutable_label()->Clear();
- for (int label_i = 0; label_i < labels.size(); label_i++){
- datum->add_label(labels[label_i]);
- }
- return true;
- } else {
- return false;
- }
- }
- #endif // USE_OPENCV
-
- bool ReadFileToDatum(const string& filename, const std::vector<float> labels,
- Datum* datum) {
- std::streampos size;
-
- fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
- if (file.is_open()) {
- size = file.tellg();
- std::string buffer(size, ' ');
- file.seekg(0, ios::beg);
- file.read(&buffer[0], size);
- file.close();
- datum->set_data(buffer);
- // datum->set_label(label);
- datum->mutable_label()->Clear();
- for (int label_i = 0; label_i < labels.size(); label_i++){
- datum->add_label(labels[label_i]);
- }
- datum->set_encoded(true);
- return true;
- } else {
- return false;
- }
- }
8.修改io.hpp (只贴了部分需要修改的程序)
- bool ReadFileToDatum(const string& filename, const std::vector<float> labels, Datum* datum);
-
- inline bool ReadFileToDatum(const string& filename, Datum* datum) {
- // return ReadFileToDatum(filename, -1, datum);
- return 0;
- }
-
- bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const int height, const int width, const bool is_color,
- const std::string & encoding, Datum* datum);
-
- inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const int height, const int width, const bool is_color, Datum* datum) {
- return ReadImageToDatum(filename, labels, height, width, is_color,
- "", datum);
- }
-
- inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const int height, const int width, Datum* datum) {
- return ReadImageToDatum(filename, labels, height, width, true, datum);
- }
-
- inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const bool is_color, Datum* datum) {
- return ReadImageToDatum(filename, labels, 0, 0, is_color, datum);
- }
-
- inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- Datum* datum) {
- return ReadImageToDatum(filename, labels, 0, 0, true, datum);
- }
-
- inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
- const std::string & encoding, Datum* datum) {
- return ReadImageToDatum(filename, labels, 0, 0, true, encoding, datum);
- }
完成上述修改之后即可进行编译得到新的convert_image_set等可执行程序。
二.将自己的数据集转成leveldb格式
基本跟http://blog.csdn.net/messiran10/article/details/49159559的流程一样,主要是以下两点需要变化:
1.样本说明文件
train_samples/10007.jpg 0.491667 0.529412 0.450000 0.352941 需要把一维的label转成4维的label
2.模型配置文件
需要把softmax loss层换成 平方损失层
需要去掉accuracy层(否则会出错)
本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请
点击举报。