概述
烦,摆烂
输入:28 x 28 x 1 图片
卷积层:8个 3 * 3的卷积核,得到的输出:26x26x8
池化层:4x4x8的最大池化层,得到的输出:13x13x8
将上诉13x13x8的数据展开成全连接层(展平),得到1352个全连接神经元
softmax层:输出10维数据,对应手写数字的10个分类结果
import numpy as np
import pandas as pd
class Conv3x3:
def __init__(self, num_filters):
self.num_filters = num_filters
self.filters = np.random.randn(num_filters, 3, 3) / 9
def iterate_regions(self, image):
h, w = image.shape
for i in range(h - 2):
for j in range(w - 2):
im_region = image[i:(i + 3), j:(j + 3)]
yield im_region, i, j
def forward(self, input):
# input 为 image,即输入数据
# output 为输出框架,默认都为 0,都为 1 也可以,反正后面会覆盖
# input: 28x28
# output: 26x26x8
h, w = input.shape
output = np.zeros((h - 2, w - 2, self.num_filters))
self.last_input = input
for im_region, i, j in self.iterate_regions(input):
# 卷积运算,点乘再相加,ouput[i, j] 为向量,8 层
output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
# 最后将输出数据返回,便于下一层的输入使用
return output
def backprop(self, d_L_d_out, learn_rate):
# 初始化一组为 0 的 gradient,3x3x8
d_L_d_filters = np.zeros(self.filters.shape)
# im_region,一个个 3x3 小矩阵
for im_region, i, j in self.iterate_regions(self.last_input):
for f in range(self.num_filters):
# 按 f 分层计算,一次算一层,然后累加起来
d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region
# 更新卷积核参数
self.filters -= learn_rate * d_L_d_filters
return None
class MaxPool2:
# A Max Pooling layer using a pool size of 2.
def iterate_regions(self, image):
h, w, _ = image.shape
new_h = h // 2
new_w = w // 2
for i in range(new_h):
for j in range(new_w):
im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
yield im_region, i, j
def forward(self, input):
# input: 卷基层的输出,池化层的输入
h, w, num_filters = input.shape
output = np.zeros((h // 2, w // 2, num_filters))
# 存储 池化层 的输入参数,26x26x8
self.last_input = input
for im_region, i, j in self.iterate_regions(input):
output[i, j] = np.amax(im_region, axis=(0, 1))
return output
def backprop(self, d_L_d_out):
'''
Performs a backward pass of the maxpool layer.
Returns the loss gradient for this layer's inputs.
- d_L_d_out is the loss gradient for this layer's outputs.
'''
# 池化层输入数据,26x26x8,默认初始化为 0
d_L_d_input = np.zeros(self.last_input.shape)
# 每一个 im_region 都是一个 3x3x8 的8层小矩阵
# 修改 max 的部分,首先查找 max
for im_region, i, j in self.iterate_regions(self.last_input):
h, w, f = im_region.shape
# 获取 im_region 里面最大值的索引向量,一叠的感觉
amax = np.amax(im_region, axis=(0, 1))
# 遍历整个 im_region,对于传递下去的像素点,修改 gradient 为 loss 对 output 的gradient
for i2 in range(h):
for j2 in range(w):
for f2 in range(f):
# If this pixel was the max value, copy the gradient to it.
if im_region[i2, j2, f2] == amax[f2]:
d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]
return d_L_d_input
class Softmax:
def __init__(self, input_len, nodes):
# input_len: 输入层的节点个数,池化层输出拉平之后的
# nodes: 输出层的节点个数,本例中为 10
# 构建权重矩阵,初始化随机数,不能太大
self.weights = np.random.randn(input_len, nodes) / input_len
self.biases = np.zeros(nodes)
def forward(self, input):
self.last_input_shape = input.shape
input = input.flatten()
self.last_input = input
input_len, nodes = self.weights.shape
totals = np.dot(input, self.weights) + self.biases
self.last_totals = totals
exp = np.exp(totals)
return exp / np.sum(exp, axis=0)
def backprop(self, d_L_d_out, learn_rate):
for i, gradient in enumerate(d_L_d_out):
if gradient == 0:
continue
# e^totals
t_exp = np.exp(self.last_totals)
# Sum of all e^totals
S = np.sum(t_exp)
# Gradients of out[i] against totals
d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
# Gradients of totals against weights/biases/input
d_t_d_w = self.last_input
d_t_d_b = 1
d_t_d_inputs = self.weights
# Gradients of loss against totals
d_L_d_t = gradient * d_out_d_t
# Gradients of loss against weights/biases/input
d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
d_L_d_b = d_L_d_t * d_t_d_b
d_L_d_inputs = d_t_d_inputs @ d_L_d_t
self.weights -= learn_rate * d_L_d_w
self.biases -= learn_rate * d_L_d_b
# 将矩阵从 1d 转为 3d
# 1352 to 13x13x8
return d_L_d_inputs.reshape(self.last_input_shape)
import mnist
# 加载手写数字识别数据
train_images = mnist.train_images()[:1000]
train_labels = mnist.train_labels()[:1000]
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]
conv = Conv3x3(8) # 28x28x1 -> 26x26x8
pool = MaxPool2() # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10
def forward(image, label):
out = conv.forward((image / 255) - 0.5)
out = pool.forward(out)
out = softmax.forward(out)
loss = -np.log(out[label])
acc = 1 if np.argmax(out) == label else 0
return out, loss, acc
def train(im, label, lr=.005):
out, loss, acc = forward(im, label)
gradient = np.zeros(10)
gradient[label] = -1 / out[label]
gradient = softmax.backprop(gradient, lr)
gradient = pool.backprop(gradient)
gradient = conv.backprop(gradient, lr)
return loss, acc
print('开始训练')
# Train the CNN for 3 epochs
for epoch in range(5):
print('--- Epoch %d ---' % (epoch + 1))
# 打乱数据的顺序
permutation = np.random.permutation(len(train_images))
train_images = train_images[permutation]
train_labels = train_labels[permutation]
# 开始训练
loss = 0
num_correct = 0
# i: index
# im: image
# label: label
for i, (im, label) in enumerate(zip(train_images, train_labels)):
if i > 0 and i % 100 == 99:
print(
'[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
(i + 1, loss / 100, num_correct)
)
loss = 0
num_correct = 0
l, acc = train(im, label)
loss += l
num_correct += acc
# Test the CNN
print('n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
_, l, acc = forward(im, label)
loss += l
num_correct += acc
num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)
参考地址:【455】Python 徒手实现 卷积神经网络 CNN - McDelfino - 博客园 (cnblogs.com)
最后
以上就是欣喜小丸子为你收集整理的手搓卷积神经网络+mnist手写数字识别的全部内容,希望文章能够帮你解决手搓卷积神经网络+mnist手写数字识别所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复