如果没有gpu的同学可以使用google 的 google colab 有免费的gpu算力 ,简单好用,教程百度一大把
residual network on cifar 10
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225import tensorflow as tf import numpy as np import pickle as p from tqdm import tqdm import os import cv2 import time from tensorflow.keras import models, optimizers, regularizers from tensorflow.keras.layers import Conv2D, AveragePooling2D, BatchNormalization, Flatten, Dense, Input, add, Activation os.environ['CUDA_VISIBLE_DEVICES'] = '0' # network config stack_n = 18 # layers = stack_n * 6 + 2 weight_decay = 1e-4 # training config batch_size = 128 train_num = 50000 iterations_per_epoch = int(train_num / batch_size) learning_rate = [0.1, 0.01, 0.001] boundaries = [80 * iterations_per_epoch, 120 * iterations_per_epoch] epoch_num = 200 # test config test_batch_size = 200 test_num = 10000 test_iterations = int(test_num / test_batch_size) def load_CIFAR_batch(filename): """ load single batch of cifar """ with open(filename, 'rb')as f: datadict = p.load(f, encoding='iso-8859-1') X = datadict['data'] Y = datadict['labels'] X = X.reshape(10000, 3, 32, 32) Y = np.array(Y) return X, Y def load_CIFAR(Foldername): train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32) train_label = np.zeros([50000, 10], dtype=np.float32) test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32) test_label = np.zeros([10000, 10], dtype=np.float32) for sample in range(5): X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1)) for i in range(3): train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :] for i in range(10000): train_label[i + 10000 * sample][Y[i]] = 1 X, Y = load_CIFAR_batch(Foldername + "/test_batch") for i in range(3): test_data[:, :, :, i] = X[:, i, :, :] for i in range(10000): test_label[i][Y[i]] = 1 return train_data, train_label, test_data, test_label def color_normalize(train_images, test_images): mean = [np.mean(train_images[:, :, :, i]) for i in range(3)] # [125.307, 122.95, 113.865] std = [np.std(train_images[:, :, :, i]) for i in range(3)] # [62.9932, 62.0887, 66.7048] for i in range(3): train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i] test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i] return train_images, test_images def images_augment(images): output = [] for img in images: img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0]) x = np.random.randint(0, 8) y = np.random.randint(0, 8) if np.random.randint(0, 2): img = cv2.flip(img, 1) output.append(img[x: x+32, y:y+32, :]) return np.ascontiguousarray(output, dtype=np.float32) def residual_block(inputs, channels, strides=(1, 1)): net = BatchNormalization(momentum=0.9, epsilon=1e-5)(inputs) net = Activation('relu')(net) if strides == (1, 1): shortcut = inputs else: shortcut = Conv2D(channels, (1, 1), strides=strides)(net) net = Conv2D(channels, (3, 3), padding='same', strides=strides)(net) net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) net = Activation('relu')(net) net = Conv2D(channels, (3, 3), padding='same')(net) net = add([net, shortcut]) return net def ResNet(inputs): net = Conv2D(16, (3, 3), padding='same')(inputs) for i in range(stack_n): net = residual_block(net, 16) net = residual_block(net, 32, strides=(2, 2)) for i in range(stack_n - 1): net = residual_block(net, 32) net = residual_block(net, 64, strides=(2, 2)) for i in range(stack_n - 1): net = residual_block(net, 64) net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) net = Activation('relu')(net) net = AveragePooling2D(8, 8)(net) net = Flatten()(net) net = Dense(10, activation='softmax')(net) return net def cross_entropy(y_true, y_pred): cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) return tf.reduce_mean(cross_entropy) def l2_loss(model, weights=weight_decay): variable_list = [] for v in model.trainable_variables: if 'kernel' in v.name: variable_list.append(tf.nn.l2_loss(v)) return tf.add_n(variable_list) * weights def accuracy(y_true, y_pred): correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) return accuracy @tf.function def train_step(model, optimizer, x, y): with tf.GradientTape() as tape: prediction = model(x, training=True) ce = cross_entropy(y, prediction) l2 = l2_loss(model) loss = ce + l2 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return ce, prediction @tf.function def test_step(model, x, y): prediction = model(x, training=False) ce = cross_entropy(y, prediction) return ce, prediction def train(model, optimizer, images, labels): sum_loss = 0 sum_accuracy = 0 # random shuffle seed = np.random.randint(0, 65536) np.random.seed(seed) np.random.shuffle(images) np.random.seed(seed) np.random.shuffle(labels) for i in tqdm(range(iterations_per_epoch)): x = images[i * batch_size: (i + 1) * batch_size, :, :, :] y = labels[i * batch_size: (i + 1) * batch_size, :] x = images_augment(x) loss, prediction = train_step(model, optimizer, x, y) sum_loss += loss sum_accuracy += accuracy(y, prediction) print('ce_loss:%f, l2_loss:%f, accuracy:%f' % (sum_loss / iterations_per_epoch, l2_loss(model), sum_accuracy / iterations_per_epoch)) def test(model, images, labels): sum_loss = 0 sum_accuracy = 0 for i in tqdm(range(test_iterations)): x = images[i * test_batch_size: (i + 1) * test_batch_size, :, :, :] y = labels[i * test_batch_size: (i + 1) * test_batch_size, :] loss, prediction = test_step(model, x, y) sum_loss += loss sum_accuracy += accuracy(y, prediction) print('test, loss:%f, accuracy:%f' % (sum_loss / test_iterations, sum_accuracy / test_iterations)) if __name__ == '__main__': # gpu config physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) # load data # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10') (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() train_labels = tf.keras.utils.to_categorical(train_labels, 10) test_labels = tf.keras.utils.to_categorical(test_labels, 10) train_images, test_images = color_normalize(train_images, test_images) # get model img_input = Input(shape=(32, 32, 3)) output = ResNet(img_input) model = models.Model(img_input, output) # show model.summary() # train learning_rate_schedules = optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rate) optimizer = optimizers.SGD(learning_rate=learning_rate_schedules, momentum=0.9, nesterov=True) for epoch in range(epoch_num): print('epoch %d' % epoch) train(model, optimizer, train_images, train_labels) test(model, test_images, test_labels) test(model, test_images, test_labels)
里面用的是残差网络
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37def residual_block(inputs, channels, strides=(1, 1)): net = BatchNormalization(momentum=0.9, epsilon=1e-5)(inputs) net = Activation('relu')(net) if strides == (1, 1): shortcut = inputs else: shortcut = Conv2D(channels, (1, 1), strides=strides)(net) net = Conv2D(channels, (3, 3), padding='same', strides=strides)(net) net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) net = Activation('relu')(net) net = Conv2D(channels, (3, 3), padding='same')(net) net = add([net, shortcut]) return net def ResNet(inputs): net = Conv2D(16, (3, 3), padding='same')(inputs) for i in range(stack_n): net = residual_block(net, 16) net = residual_block(net, 32, strides=(2, 2)) for i in range(stack_n - 1): net = residual_block(net, 32) net = residual_block(net, 64, strides=(2, 2)) for i in range(stack_n - 1): net = residual_block(net, 64) net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) net = Activation('relu')(net) net = AveragePooling2D(8, 8)(net) net = Flatten()(net) net = Dense(10, activation='softmax')(net) return net
你也可以自己创建model 比如:
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116import tensorflow as tf import numpy as np import pickle as p import os from tensorflow.keras import models, optimizers, regularizers from tensorflow.keras.callbacks import LearningRateScheduler from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense os.environ['CUDA_VISIBLE_DEVICES'] = '0' weight_decay = 5e-4 batch_size = 128 learning_rate = 1e-2 dropout_rate = 0.5 epoch_num = 50 def load_CIFAR_batch(filename): """ load single batch of cifar """ with open(filename, 'rb')as f: datadict = p.load(f, encoding='iso-8859-1') X = datadict['data'] Y = datadict['labels'] X = X.reshape(10000, 3, 32, 32) Y = np.array(Y) return X, Y def load_CIFAR(Foldername): train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32) train_label = np.zeros([50000, 10], dtype=np.float32) test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32) test_label = np.zeros([10000, 10], dtype=np.float32) for sample in range(5): X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1)) for i in range(3): train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :] for i in range(10000): train_label[i + 10000 * sample][Y[i]] = 1 X, Y = load_CIFAR_batch(Foldername + "/test_batch") for i in range(3): test_data[:, :, :, i] = X[:, i, :, :] for i in range(10000): test_label[i][Y[i]] = 1 return train_data, train_label, test_data, test_label def VGG16(): model = models.Sequential() model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Flatten()) # 2*2*512 model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) return model def scheduler(epoch): if epoch < epoch_num * 0.4: return learning_rate if epoch < epoch_num * 0.8: return learning_rate * 0.1 return learning_rate * 0.01 if __name__ == '__main__': # gpu config physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) # load data # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10') (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() train_labels = tf.keras.utils.to_categorical(train_labels, 10) test_labels = tf.keras.utils.to_categorical(test_labels, 10) # get model model = VGG16() # show model.summary() # train sgd = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True) change_lr = LearningRateScheduler(scheduler) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(train_images, train_labels, batch_size=batch_size, epochs=epoch_num, callbacks=[change_lr], validation_data=(test_images, test_labels))
VGG16 自己搭建的
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171# -*- coding: utf-8 -*- """ Created on Sat Oct 24 11:18:48 2020 @author: Jimmy_ouyang """ import numpy as np import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras import layers,datasets,losses,optimizers,Input,models,regularizers from tqdm import tqdm import cv2 epochs = 10 batchs = 64 weight_decay = 5e-4 train_num = 50000 test_batch_size = 64 test_num = 10000 learning_rate = 1e-2 def process (x,y): #x = tf.reshape(x,(-1,28*28)) x=tf.cast(x,dtype = tf.float64)/255. x=tf.cast(x,dtype = tf.float64) y=tf.one_hot(y,depth = 10) y=tf.reshape(y,(-1,10)) return x,y def color_normalize(train_images, test_images): mean = [np.mean(train_images[:, :, :, i]) for i in range(3)] # [125.307, 122.95, 113.865] std = [np.std(train_images[:, :, :, i]) for i in range(3)] # [62.9932, 62.0887, 66.7048] for i in range(3): train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i] test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i] return train_images, test_images def pic_agument(images): output = [] for img in images: img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0]) x = np.random.randint(0, 8) y = np.random.randint(0, 8) if np.random.randint(0, 2): img = cv2.flip(img, 1) output.append(img[x: x+32, y:y+32, :]) return np.ascontiguousarray(output, dtype=np.float32) def VGG16(x): cv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay))(x) cv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv1) pool1 = layers.MaxPooling2D((2, 2))(cv2) cv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool1) cv4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv3) pool2 = layers.MaxPooling2D((2, 2))(cv4) cv5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool2) cv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv5) cv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv6) pool3 = layers.MaxPooling2D((2, 2))(cv7) cv8 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool3) cv9 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv8) cv10 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv9) pool4 = layers.MaxPooling2D((2, 2))(cv10) cv11= layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(pool4) cv12 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv11) cv13 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(cv12) fl1 = layers.GlobalAveragePooling2D()(cv13) # 2*2*512 fl2 = layers.Dense(1024, activation='relu')(fl1) fl3 = layers.Dropout(0.7)(fl2) fl4 = layers.Dense(256, activation='relu')(fl3) fl5 = layers.Dropout(0.7)(fl4) out = layers.Dense(10, activation='softmax')(fl5) return out def cross_entropy(y_true, y_pred): cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) return tf.reduce_mean(cross_entropy) def accuracy(y_true, y_pred): correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) return accuracy @tf.function def train_step(model, optimizer, x, y): with tf.GradientTape() as tape: prediction = model(x, training=True) ce = cross_entropy(y, prediction) loss = ce gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return ce, prediction def train(model ,optimizer,train_data,epoch): sum_loss = 0 sum_accuracy = 0 for i,(x,y) in enumerate(train_data): with tf.GradientTape() as tape: prediction = model(x, training=True) cross_entropy = tf.keras.losses.categorical_crossentropy(y, prediction) loss = tf.reduce_mean(cross_entropy) acc = accuracy(y,prediction) gradients = tape.gradient(loss, model.trainable_variables) sum_loss += loss sum_accuracy += acc optimizer.apply_gradients(zip(gradients, model.trainable_variables)) print('epoch:%f ,train_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i)) def test(model,test_data,epoch): sum_loss = 0 sum_accuracy = 0 for i,(x,y) in enumerate(test_data) : out = model(x,training = False) loss = tf.keras.losses.categorical_crossentropy(y,out) loss = tf.reduce_mean(loss) test_accuarcy = accuracy(y,out) sum_accuracy += test_accuarcy sum_loss += loss print('epoch:%f ,test_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i)) if __name__ == "__main__": physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) (x_train,y_train),(x_test,y_test) = datasets.cifar10.load_data() x_train = pic_agument(x_train) #x_train , x_test = color_normalize(x_train,x_test) train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(len(x_train)).batch(batchs).map(process) test_data = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(batchs).map(process) img_input = Input(shape=(32,32,3)) output = VGG16(img_input) model = models.Model(img_input, output) #model = VGG16() model.summary() #learning_rate = 0.01 optimizer = tf.keras.optimizers.Adam(0.0001) #学习率一定要设置的小一些 不然梯度不会更新 #optimizer = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True) for epoch in range(epochs) : train(model ,optimizer,train_data,epoch) test(model,test_data,epoch)
Residual net 自己搭建的 我们要速度和精度 双追求
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201# -*- coding: utf-8 -*- """ Created on Sat Oct 24 11:18:48 2020 @author: Jimmy_ouyang """ import numpy as np import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras import layers,datasets,losses,optimizers,Input,models,regularizers from tqdm import tqdm import cv2 import time stack_n = 18 # layers = stack_n * 6 + 2 weight_decay = 1e-4 # training config batch_size = 128 train_num = 50000 iterations_per_epoch = int(train_num / batch_size) learning_rate = [0.001, 0.00001, 0.000001] boundaries = [80 * iterations_per_epoch, 120 * iterations_per_epoch] epoch_num = 5 def process (x,y): #x = tf.reshape(x,(-1,28*28)) x=tf.cast(x,dtype = tf.float64)/255. x=tf.cast(x,dtype = tf.float64) y=tf.one_hot(y,depth = 10) y=tf.reshape(y,(-1,10)) return x,y def color_normalize(train_images, test_images): mean = [np.mean(train_images[:, :, :, i]) for i in range(3)] # [125.307, 122.95, 113.865] std = [np.std(train_images[:, :, :, i]) for i in range(3)] # [62.9932, 62.0887, 66.7048] for i in range(3): train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i] test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i] return train_images, test_images def pic_agument(images): output = [] for img in images: img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0]) x = np.random.randint(0, 8) y = np.random.randint(0, 8) if np.random.randint(0, 2): img = cv2.flip(img, 1) output.append(img[x: x+32, y:y+32, :]) return np.ascontiguousarray(output, dtype=np.float32) def residual_step(layer,channels,strides=(1,1)): layer = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(layer) layer = tf.nn.relu(layer) if strides ==(1,1): short =layer else : short = layers.Conv2D(channels,(1,1),strides = strides)(layer) layer = layers.Conv2D(channels,(3,3),padding = 'same',strides = strides)(layer) layer = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(layer) layer = tf.nn.relu(layer) layer = layers.Conv2D(channels,(3,3),padding = 'same')(layer) layer = layers.add([short,layer]) return layer def residual(x): net = layers.Conv2D(16,(3,3),padding='same')(x) for i in range(stack_n): net = residual_step(net,16) net = residual_step(net,32,strides = (2,2)) for i in range(stack_n-1): net = residual_step(net,32) net = residual_step(net,64,strides = (2,2)) for i in range(stack_n-1): net = residual_step(net,64) net = layers.BatchNormalization(momentum = 0.9 ,epsilon = 1e-5)(net) net = tf.nn.relu(net) net = layers.AveragePooling2D(8,8)(net) net = layers.Flatten()(net) net = layers.Dense(10,activation = 'softmax')(net) return net def cross_entropy(y_true, y_pred): cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) return tf.reduce_mean(cross_entropy) def accuracy(y_true, y_pred): correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) return accuracy def l2_loss(model, weights=weight_decay): variable_list = [] for v in model.trainable_variables: if 'kernel' in v.name: variable_list.append(tf.nn.l2_loss(v)) return tf.add_n(variable_list) * weights @tf.function def train_step(model,x,y,optimizator): with tf.GradientTape() as tape: prediction = model(x, training=True) cross_entropy = tf.keras.losses.categorical_crossentropy(y, prediction) ce = tf.reduce_mean(cross_entropy) l2 = l2_loss(model) loss = ce + l2 acc = accuracy(y,prediction) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return loss ,acc def train(model ,optimizer,train_data,epoch): sum_loss = 0 sum_accuracy = 0 for i,(x,y) in enumerate(train_data): loss ,acc = train_step(model,x,y,optimizer) sum_loss += loss sum_accuracy += acc print('epoch:%f ,train_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i)) @tf.function def test_step(model, x, y): prediction = model(x, training=False) ce = cross_entropy(y, prediction) return ce, prediction def test(model,test_data,epoch): sum_loss = 0 sum_accuracy = 0 for i,(x,y) in enumerate(test_data) : loss, prediction = test_step(model, x, y) sum_loss += loss sum_accuracy += accuracy(y, prediction) print('epoch:%f ,test_ce_loss:%f , accuracy:%f' %(epoch,sum_loss / i, sum_accuracy / i)) if __name__ == "__main__": '''physical_devices = tf.config.experimental.list_physical_devices('GPU') if physical_devices : gpu0 = physical_devices[0] tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) tf.config.set_visible_devices([gpu0],"GPU") ''' (x_train,y_train),(x_test,y_test) = datasets.cifar10.load_data() x_train = pic_agument(x_train) #x_train , x_test = color_normalize(x_train,x_test) train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(len(x_train)).batch(batch_size).map(process) test_data = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(batch_size).map(process) img_input = Input(shape=(32,32,3)) output = residual(img_input) model = models.Model(img_input, output) #model = VGG16() #model.summary() #learning_rate = 0.01 #optimizer = tf.keras.optimizers.Adam(0.1) #学习率一定要设置的小一些 不然梯度不会更新 #optimizer = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True) learning_rate_schedules = optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rate) optimizer = optimizers.SGD(learning_rate=learning_rate_schedules, momentum=0.9, nesterov=True) for epoch in range(epoch_num) : start = time.time() train(model ,optimizer,train_data,epoch) test(model,test_data,epoch) end = time.time() print("循环运行时间:%.2f秒"%(end-start)) model.save('/home/jimmy/Documents/models/resnet/', save_format='tf')
最后
以上就是傲娇篮球最近收集整理的关于深度学习搭建自己的CNN并在.cifar10上训练的全部内容,更多相关深度学习搭建自己内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复