很多情况下,在训练卷积神经网络时,需要将自己的图片作为卷积神经网络的输入。
将自己的图片数据集导入h5py,所占空间小,使用方便
条件:自己的图片,eg:cats VS dogs,并将两类图片分别放置于两个文件夹(我这里是yes_tumble与not_tumble)
- import os
- import numpy as np
- from PIL import Image
- import tensorflow as tf
- import matplotlib.pyplot as plt
- import sklearn
- from sklearn import preprocessing
- import h5py
- import scipy
- #导入必要的包
- def get_files(file_dir):
- cats = []
- label_cats = []
- dogs = []
- label_dogs = []
- for file in os.listdir(file_dir+'/not_tumble'):
- cats.append(file_dir +'/not_tumble'+'/'+ file)
- label_cats.append(0) #添加标签,该类标签为0,此为2分类例子,多类别识别问题自行添加
- for file in os.listdir(file_dir+'/yes_tumble'):
- dogs.append(file_dir +'/yes_tumble'+'/'+file)
- label_dogs.append(1)
- #把cat和dog合起来组成一个list(img和lab)
- image_list = np.hstack((cats, dogs))
- label_list = np.hstack((label_cats, label_dogs))
- #利用shuffle打乱顺序
- temp = np.array([image_list, label_list])
- temp = temp.transpose()
- np.random.shuffle(temp)
- #从打乱的temp中再取出list(img和lab)
- image_list = list(temp[:, 0])
- label_list = list(temp[:, 1])
- label_list = [int(i) for i in label_list]
- return image_list,label_list
- #返回两个list 分别为图片文件名及其标签 顺序已被打乱
- train_dir = 'F:/CSISA_Picture'
- image_list,label_list = get_files(train_dir)
- print(len(image_list))
- print(len(label_list))
- #450为数据长度的20%
- Train_image = np.random.rand(len(image_list)-450, 64, 64, 3).astype('float32')
- Train_label = np.random.rand(len(image_list)-450, 1).astype('float32')
- Test_image = np.random.rand(450, 64, 64, 3).astype('float32')
- Test_label = np.random.rand(450, 1).astype('float32')
- for i in range(len(image_list)-450):
- Train_image[i] = np.array(plt.imread(image_list[i]))
- Train_label[i] = np.array(label_list[i])
- for i in range(len(image_list)-450, len(image_list)):
- Test_image[i+450-len(image_list)] = np.array(plt.imread(image_list[i]))
- Test_label[i+450-len(image_list)] = np.array(label_list[i])
- # Create a new file
- f = h5py.File('data.h5', 'w')
- f.create_dataset('X_train', data=Train_image)
- f.create_dataset('y_train', data=Train_label)
- f.create_dataset('X_test', data=Test_image)
- f.create_dataset('y_test', data=Test_label)
- f.close()
- # Load hdf5 dataset
- train_dataset = h5py.File('data.h5', 'r')
- train_set_x_orig = np.array(train_dataset['X_train'][:]) # your train set features
- train_set_y_orig = np.array(train_dataset['y_train'][:]) # your train set labels
- test_set_x_orig = np.array(train_dataset['X_test'][:]) # your train set features
- test_set_y_orig = np.array(train_dataset['y_test'][:]) # your train set labels
- f.close()
- print(train_set_x_orig.shape)
- print(train_set_y_orig.shape)
- print(train_set_x_orig.max())
- print(train_set_x_orig.min())
- print(test_set_x_orig.shape)
- print(test_set_y_orig.shape)
- #测试
- plt.imshow(train_set_x_orig[222])
- print(train_set_y_orig[222])
联系客服