用pytorch搭建简单CNN前馈网络处理三通道彩色图像

75 阅读 0 评论 50 点赞

我是靠谱客的博主粗心冬日，这篇文章主要介绍用pytorch搭建简单CNN前馈网络处理三通道彩色图像，现在分享给大家，希望可以做个参考。

最近想重新学习简单CNN网络前向传播的搭建，发现网上好像没有用pytorch对三通道彩色图像进行处理的。

1.自定义初始化4*4的三个卷积核，然后经过各一次的relu激活和池化，pytorch中，处理图片必须一个batch一个batch的操作，所以我们要准备的数据的格式是 [batch_size, n_channels, hight, width]，记录一下代码：

import cv2
import matplotlib.pyplot as plt


img_path = '1.jpg'

bgr_img = cv2.imread(img_path)

# bgr_img = bgr_img.transpose(2,0,1)
print(bgr_img.shape)
b,g,r = cv2.split(bgr_img)
img_rgb = cv2.merge([r,g,b])
gray_img = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
img_rgb1 = img_rgb.transpose(2,0,1)#将通道数放在最前面的位置
print(img_rgb.shape)


plt.figure()
plt.imshow(gray_img)
print(gray_img.shape)
# Normalise
gray_img = gray_img.astype("float32")/255
# print(gray_img.shape)
plt.figure()
plt.imshow(gray_img)
plt.figure()
plt.imshow(img_rgb)

# plt.show()

import numpy as np

filter_vals = np.array([[-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1]],dtype=np.float64)
filter_vals=filter_vals
print('Filter shape: ', filter_vals.shape)
filter_1 = filter_vals
filter_2 = -filter_1
filter_3 = filter_1.T
# filter_4 = -filter_3
filters = np.array([filter_1, filter_2, filter_3])#, filter_4])
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self,weight):
        super(Net, self).__init__()
        # initializes the weights of the convolutional layer to be the weights of the 4 defined filters
        # k_height, k_width = weight.shape[2:]
        # assumes there are 4 grayscale filters
        self.conv = nn.Conv2d(111,111,4, bias=False)
        self.conv.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        # calculates the output of a convolutional layer
        # pre- and post-activation
        conv_x = self.conv(x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool(activated_x)

        # returns all layers
        return conv_x, activated_x, pooled_x


# instantiate the model and set the weights
weight = torch.from_numpy(filters).unsqueeze(0)#.type(torch.FloatTensor)
print("weight shape",weight.shape)
model = Net(weight)

# print out the layer in the network
print(model)


def viz_layer(layer, n_filters=1):
    fig = plt.figure(figsize=(20, 20))
    for i in range(n_filters):
        ax = fig.add_subplot(1, n_filters, i + 1)
        ax.imshow(np.squeeze(layer[0, i].data.numpy()))
        ax.set_title('Output %s' % str(i + 1))


# plt.imshow(gray_img, cmap='gray')

fig = plt.figure(figsize=(12, 6))
fig.subplots_adjust(left=0, right=1.5, bottom=0.8, top=1, hspace=0.05, wspace=0.05)
for i in range(3):
    ax = fig.add_subplot(1, 3, i + 1, xticks=[], yticks=[])
    ax.imshow(filters[i], cmap='gray')
    ax.set_title('Filter %s' % str(i + 1))

gray_img_tensor = torch.from_numpy(img_rgb1).unsqueeze(0)#.unsqueeze(1)
gray_img_tensor=gray_img_tensor.type('torch.DoubleTensor')

print('gray_img_tensor.shape',gray_img_tensor.shape)
conv_x, activated_layer, pooled_layer = model(gray_img_tensor)

viz_layer(activated_layer)
#
# viz_layer(pooled_layer)

2.如下系原版代码，对单通道进行卷积（滤波）处理：

import cv2
import matplotlib.pyplot as plt


img_path = '1.jpg'

bgr_img = cv2.imread(img_path)
print(bgr_img.shape)
b,g,r = cv2.split(bgr_img)
img_rgb = cv2.merge([r,g,b])
print(img_rgb.shape)
gray_img = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
print(gray_img.shape)
# Normalise
gray_img = gray_img.astype("float32")/255
# print(gray_img.shape)
pass
# plt.imshow(gray_img, cmap='gray')
plt.show()

import numpy as np

filter_vals = np.array([[-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1]])

print('Filter shape: ', filter_vals.shape)
filter_1 = filter_vals
filter_2 = -filter_1
filter_3 = filter_1.T
filter_4 = -filter_3
filters = np.array([filter_1, filter_2, filter_3, filter_4])
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self,weight):
        super(Net, self).__init__()
        # initializes the weights of the convolutional layer to be the weights of the 4 defined filters
        # k_height, k_width = weight.shape[2:]
        # assumes there are 4 grayscale filters
        self.conv = nn.Conv2d(3, 3, 4, bias=False)
        self.conv.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        # calculates the output of a convolutional layer
        # pre- and post-activation
        conv_x = self.conv(x)
        activated_x = F.relu(conv_x)

        # applies pooling layer
        pooled_x = self.pool(activated_x)

        # returns all layers
        return conv_x, activated_x, pooled_x


# instantiate the model and set the weights
weight = torch.from_numpy(filters).unsqueeze(1).type(torch.FloatTensor)
model = Net(weight)

# print out the layer in the network
print(model)


def viz_layer(layer, n_filters=4):
    fig = plt.figure(figsize=(20, 20))

    for i in range(n_filters):
        ax = fig.add_subplot(1, n_filters, i + 1)
        ax.imshow(np.squeeze(layer[0, i].data.numpy()), cmap='gray')
        ax.set_title('Output %s' % str(i + 1))


# plt.imshow(gray_img, cmap='gray')

fig = plt.figure(figsize=(12, 6))
fig.subplots_adjust(left=0, right=1.5, bottom=0.8, top=1, hspace=0.05, wspace=0.05)
for i in range(4):
    ax = fig.add_subplot(1, 4, i + 1, xticks=[], yticks=[])
    ax.imshow(filters[i], cmap='gray')
    ax.set_title('Filter %s' % str(i + 1))

r = torch.from_numpy(gray_img).unsqueeze(0).unsqueeze(1)
print("gray_img_tensor.shape",gray_img_tensor.shape)
conv_x, activated_layer, pooled_layer = model(r)#r可替换为gray_img_tensor

viz_layer(activated_layer)

viz_layer(pooled_layer)

3.使用torch默认初始化的卷积核进行三次“卷积激活池化”操作，因为卷积核每次都随机初始化，故每次得出的图像结果都不同

import cv2
import matplotlib.pyplot as plt
import torchvision.models as models#就这一句，即可初始化卷积核
#或者这个import torch.optim as optim


img_path = '1.jpg'

bgr_img = cv2.imread(img_path)

# bgr_img = bgr_img.transpose(2,0,1)
print(bgr_img.shape)
b,g,r = cv2.split(bgr_img)
img_rgb = cv2.merge([r,g,b])
gray_img = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
img_rgb1 = img_rgb.transpose(2,0,1)#将通道数放在最前面的位置
print(img_rgb.shape)


plt.figure()
plt.imshow(b)
print(gray_img.shape)
# Normalise
gray_img = gray_img.astype("float32")/255
# print(gray_img.shape)
plt.figure()
plt.imshow(img_rgb)
plt.figure()
plt.imshow(b)

# plt.show()

import numpy as np

filter_vals = np.array([[-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1]],dtype=np.float64)
filter_vals=filter_vals
print('Filter shape: ', filter_vals.shape)
filter_1 = filter_vals
filter_2 = -filter_1
filter_3 = filter_1.T
# filter_4 = -filter_3
filters = np.array([filter_1, filter_2, filter_3])#, filter_4])
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # initializes the weights of the convolutional layer to be the weights of the 4 defined filters
        # k_height, k_width = weight.shape[2:]
        # assumes there are 4 grayscale filters
        self.conv1 = nn.Conv2d(3,3,4, bias=False)
        # self.conv1 = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3,3,4, bias=False)
        # self.conv2.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(3,3,4, bias=False)
        # self.conv3.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool3 = nn.MaxPool2d(2, 2)

    def forward(self, x):
        # calculates the output of a convolutional layer
        # pre- and post-activation
        conv_x = self.conv1(x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool1(activated_x)
        conv_x = self.conv2(pooled_x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool2(activated_x)
        conv_x = self.conv3(pooled_x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool3(activated_x)
        # returns all layers
        return conv_x, activated_x, pooled_x


# instantiate the model and set the weights
# weight = torch.from_numpy(filters).unsqueeze(0)#.type(torch.FloatTensor)
# print("weight shape",weight.shape)
model = Net()

# print out the layer in the network
print(model)


def viz_layer(layer, n_filters=1):
    fig = plt.figure(figsize=(20, 20))
    for i in range(n_filters):
        ax = fig.add_subplot(1, n_filters, i + 1)
        ax.imshow(np.squeeze(layer[0, i].data.numpy()))
        ax.set_title('Output %s' % str(i + 1))


# plt.imshow(gray_img, cmap='gray')

fig = plt.figure(figsize=(12, 6))
fig.subplots_adjust(left=0, right=1.5, bottom=0.8, top=1, hspace=0.05, wspace=0.05)
for i in range(3):
    ax = fig.add_subplot(1, 3, i + 1, xticks=[], yticks=[])
    ax.imshow(filters[i], cmap='gray')
    ax.set_title('Filter %s' % str(i + 1))

gray_img_tensor = torch.from_numpy(img_rgb1).unsqueeze(0)#.unsqueeze(1)
gray_img_tensor=torch.tensor(gray_img_tensor,dtype=torch.float32)
print('gray_img_tensor.shape',gray_img_tensor.shape)
conv_x, activated_layer, pooled_layer = model(gray_img_tensor)

4.加入全连接层（未添加softmax），注意需要有一个"展平”的操作。

import cv2
import matplotlib.pyplot as plt
import torchvision.models as models#就这一句，即可初始化卷积核
#或者这个import torch.optim as optim


img_path = '1.jpg'

bgr_img = cv2.imread(img_path)

# bgr_img = bgr_img.transpose(2,0,1)
print(bgr_img.shape)
b,g,r = cv2.split(bgr_img)
img_rgb = cv2.merge([r,g,b])
gray_img = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
img_rgb1 = img_rgb.transpose(2,0,1)#将通道数放在最前面的位置
print(img_rgb.shape)


plt.figure()
plt.imshow(b)
print(gray_img.shape)
# Normalise
gray_img = gray_img.astype("float32")/255
# print(gray_img.shape)
plt.figure()
plt.imshow(img_rgb)
plt.figure()
plt.imshow(b)

# plt.show()

import numpy as np

filter_vals = np.array([[-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1], [-1, -1, 1, 1]],dtype=np.float64)
filter_vals=filter_vals
print('Filter shape: ', filter_vals.shape)
filter_1 = filter_vals
filter_2 = -filter_1
filter_3 = filter_1.T
# filter_4 = -filter_3
filters = np.array([filter_1, filter_2, filter_3])#, filter_4])
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # initializes the weights of the convolutional layer to be the weights of the 4 defined filters
        # k_height, k_width = weight.shape[2:]
        # assumes there are 4 grayscale filters
        self.conv1 = nn.Conv2d(3,3,4, bias=False)
        # self.conv1 = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3,3,4, bias=False)
        # self.conv2.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(3,3,4, bias=False)
        # self.conv3.weight = torch.nn.Parameter(weight)
        # define a pooling layer
        self.pool3 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(3*73*69,8)


    def forward(self, x):
        # calculates the output of a convolutional layer
        # pre- and post-activation
        conv_x = self.conv1(x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool1(activated_x)
        conv_x = self.conv2(pooled_x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool2(activated_x)
        conv_x = self.conv3(pooled_x)
        activated_x = F.relu(conv_x)
        print('activated_x-shape',activated_x.shape)
        # applies pooling layer
        pooled_x = self.pool3(activated_x)
        print("shape",pooled_x.shape)
        pooled_x = pooled_x.view(1, -1)
        fc_x = self.fc1(pooled_x)

        # returns all layers
        return conv_x, activated_x, fc_x


# instantiate the model and set the weights
# weight = torch.from_numpy(filters).unsqueeze(0)#.type(torch.FloatTensor)
# print("weight shape",weight.shape)
model = Net()

# print out the layer in the network
print(model)


def viz_layer(layer, n_filters=1):
    fig = plt.figure(figsize=(20, 20))
    for i in range(n_filters):
        ax = fig.add_subplot(1, n_filters, i + 1)
        ax.imshow(np.squeeze(layer[0, i].data.numpy()))
        ax.set_title('Output %s' % str(i + 1))


# plt.imshow(gray_img, cmap='gray')

fig = plt.figure(figsize=(12, 6))
fig.subplots_adjust(left=0, right=1.5, bottom=0.8, top=1, hspace=0.05, wspace=0.05)
for i in range(3):
    ax = fig.add_subplot(1, 3, i + 1, xticks=[], yticks=[])
    ax.imshow(filters[i], cmap='gray')
    ax.set_title('Filter %s' % str(i + 1))

img_tensor = torch.from_numpy(img_rgb1).unsqueeze(0)#.unsqueeze(1)
img_tensor=torch.tensor(img_tensor, dtype=torch.float32)
print('gray_img_tensor.shape', img_tensor.shape)
conv_x, activated_layer, pooled_layer = model(img_tensor)