概述
仙人掌的图片识别
数据来自kaggle的Aerial Cactus Identification项目(https://www.kaggle.com/c/aerial-cactus-identification)
下载数据
# 终端运行
kaggle competitions download -c aerial-cactus-identification
unzip train.zip
unzip test.zip
mkdir test_input
cp -r test/ test_input
# ls
# train.zip test.zip train test train.csv sample_submission.csv
train.csv是训练集的标签信息表
sample_submission.csv是kaggle最后上传结果的文件夹
导入tensorflow包
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import os
from shutil import copyfile, move
from tqdm import tqdm
import h5py
查看tensorflow版本和gpu是否可用
print(tf.__version__)
print(tf.test.is_gpu_available())
# 2.0.0-beta0
# False
整理数据
读取训练集信息表
training_df = pd.read_csv("train.csv")
将训练集数据分类,分为正负两个文件夹
src = "train/"
dst = "sorted_training/"
os.mkdir(dst)
os.mkdir(dst+"true")
os.mkdir(dst+"false")
with tqdm(total=len(list(training_df.iterrows()))) as pbar:
for idx, row in training_df.iterrows():
pbar.update(1)
if row["has_cactus"] == 1:
copyfile(src+row["id"], dst+"true/"+row["id"])
else:
copyfile(src+row["id"], dst+"false/"+row["id"])
将训练集的十分之一分到验证集,来验证结果
src = "sorted_training/"
dst = "sorted_validation/"
os.mkdir(dst)
os.mkdir(dst+"true")
os.mkdir(dst+"false")
validation_df = training_df.sample(n=int(len(training_df)/10))
with tqdm(total=len(list(validation_df.iterrows()))) as pbar:
for idx, row in validation_df.iterrows():
pbar.update(1)
if row["has_cactus"] == 1:
move(src+"true/"+row["id"], dst+"true/"+row["id"])
else:
move(src+"false/"+row["id"], dst+"false/"+row["id"])
构建模型
导入模型包
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Input
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, Reshape, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
使用ImageDataGenerator导入数据要求把数据整理为
data/
sorted_training/
true/
1.jpg
2.jpg
3.jpg
false/
1.jpg
2.jpg
3.jpg
sorted_validation/
true/
1.jpg
2.jpg
3.jpg
false/
1.jpg
2.jpg
3.jpg
test_input/
test/
1.jpg
2.jpg
3.jpg
数据读取
batch_size = 64
train_datagen = ImageDataGenerator(
rescale=1. / 255,
horizontal_flip=True,
vertical_flip=True)
train_data_dir = "sorted_training"
train_generator = train_datagen.flow_from_directory(
train_data_dir,
shuffle=True,
target_size=(32, 32),
batch_size=batch_size,
class_mode='binary')
validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_data_dir = "sorted_validation"
validation_generator = validation_datagen.flow_from_directory(
validation_data_dir,
target_size=(32, 32),
batch_size=batch_size,
class_mode='binary')
input_shape = (32,32,3)
num_classes = 2
模型构建(引自https://www.kaggle.com/frlemarchand/simple-cnn-using-keras/notebook)
dropout_dense_layer = 0.6
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))
model.add(Dense(1))
model.add(Activation('sigmoid'))
设定损失函数和优化器,并用准确度作为判读标准
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=['accuracy'])
25次epoch,loss没有降低,停止模型训练
callbacks = [EarlyStopping(monitor='val_loss', patience=25),
ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]
设定迭代100次
epochs = 100
history = model.fit_generator(train_generator,
validation_data=validation_generator,
epochs=epochs,
verbose=1,
shuffle=True,
callbacks=callbacks)
绘图loss和val_loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()
绘图准确度和验证集准确度
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.show()
模型训练完毕
测试模型
载入刚才获得的模型
model.load_weights("best_model.h5")
然后载入测试集
test_folder = "test_input/"
test_datagen = ImageDataGenerator(
rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(
directory=test_folder,
target_size=(32,32),
batch_size=1,
class_mode='binary',
shuffle=False
)
预测测试集
pred=model.predict_generator(test_generator,verbose=1)
pred_binary = [0 if value<0.50 else 1 for value in pred]
最后把预测的结果记录在csv文件里
最后
以上就是凶狠龙猫为你收集整理的仙人掌的图片识别仙人掌的图片识别的全部内容,希望文章能够帮你解决仙人掌的图片识别仙人掌的图片识别所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复