利用卷积神经网络实现手写字识别

75 阅读 0 评论 50 点赞

我是靠谱客的博主辛勤世界，最近开发中收集的这篇文章主要介绍利用卷积神经网络实现手写字识别，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

使用经典的LeNet-5结构(激活函数等有所不同)：

基于pytorch实现，包括神经网络的构建，激活函数的选择
归一化使用了像素值/255的方式实现，可以尝试用别的方式进行归一化处理

import numpy as np
import torch
import torch.optim as optim
from torch import nn
class Data:
    '''
    this class is about data module
    '''
    def __init__(self):
        self.start_loc_image=16 # the start location of image data
        self.start_loc_label=8 # the start location of label data
        self.num_pixel=28*28 # the number of pixels
        self.choice={'train-image':'train-images.idx3-ubyte','train-label':'train-labels.idx1-ubyte',
                'test-image':'t10k-images.idx3-ubyte','test-label':'t10k-labels.idx1-ubyte'} # the specific file name
    def get(self,path,train_test='',image_label=''):
        '''
        get the data from the file whose path is "path"
        :param path: the saving path of given files, default is "./file/"
        :param train_test: the data category("train" or "test")
        :param image_label: the data information("image" or "label")
        :return: the data you want
        '''
        if train_test not in ['train','test'] or image_label not in ['image','label']:raise NameError(
            'please check you spelling,"train_test" can be "train/test", "image_label" can be "image/label"')
        ch=train_test+'-'+image_label
        data=[]
        if image_label=='image':
            print('loading images ...')
            with open(path+self.choice[ch],'rb',) as f:
                file=f.read()
                for i in range(self.start_loc_image,file.__len__(),self.num_pixel):
                    item=[]
                    pixel=file[i:i+self.num_pixel].hex()
                    for p in range(0,pixel.__len__(),2):
                        item.append(int(pixel[p:p+2],16)) # decode -> get the pixel information from original file
                    data.append(self.transform2image(item))
            f.close()
        elif image_label=='label':
            print('load labels ...')
            with open(path+self.choice[ch],'rb',) as f:
                file=f.read()
                for i in range(self.start_loc_label,file.__len__()):
                    data.append(file[i]) # decode -> get the label from original file
            f.close()
        return data
    def transform2image(self,data:list):
        '''
        transform pixel point to image
        :param data: the original 1D pixel points
        :return: transformed image(28*28)
        '''
        assert data.__len__()==784
        import numpy as np
        return [np.reshape(data,(28,-1))]
    def transfer_tensor(self,data):
        '''
        transfer data to tensor format
        :param data: the original input data
        :return: transferred data
        '''
        return torch.tensor(data)
    def normalize(self,data,maximum=255):
        '''
        normalize the data with maximum
        :param data: the input data
        :param maximum: the maximum of pixel(is 255)
        :return: normalized data
        '''
        return torch.div(data,maximum)
class Network(nn.Module):
    def __init__(self):
        '''
        the corresponding parameters
        '''
        self.channel_input = 1  # the channel of input data
        self.size_kernel=5 # the size of kernel
        self.len_padding=2 # the length of kernel
        self.channel_c1=6 # the channel of convolution1
        self.channel_c2=16 # the channel of convolution2
        self.len_flatten=120 # the length of flatten data
        self.len_hidden=84 # the length of hidden layer
        self.len_out=10 # the length of final output
        super(Network, self).__init__()
        self.c1=nn.Conv2d(self.channel_input,self.channel_c1,kernel_size=self.size_kernel,padding=self.len_padding)
        self.c2=nn.Conv2d(self.channel_c1,self.channel_c2,kernel_size=self.size_kernel)
        self.fc1=nn.Linear(self.channel_c2*5*5,self.len_flatten)
        self.fc2=nn.Linear(self.len_flatten,self.len_hidden)
        self.fc3=nn.Linear(self.len_hidden,self.len_out)
        ### other parameters
        self.learning_rate=0.01 # learning rate
        self.optimizer=optim.SGD(self.parameters(),lr=self.learning_rate)
    def forward(self,x):
        '''
        calculate the output
        :param x: input vector
        :return: output
        '''
        out_c1=self.c1(x)
        out_sub1=nn.MaxPool2d(2)(out_c1)
        out_c2=self.c2(out_sub1)
        out_sub2=nn.MaxPool2d(2)(out_c2)
        out_flatten=out_sub2.view(x.size(0), -1)
        out_full_con1=self.fc1(out_flatten)
        out_full_con2=self.fc2(out_full_con1)
        out=self.fc3(out_full_con2)
        return out
    def accuracy(self,act,pre):
        '''
        calculate the accuracy
        :param act: actual value
        :param pre: predicted value
        :return: accuracy
        '''
        assert act.__len__()==pre.__len__()
        return round((act==pre).sum().item()/act.__len__(),3)
    def pre_process(self,feature,label):
        '''
        pre processing
        :param feature: feature
        :param label: label
        :return: preprocessed feature and label
        '''
        ### transform to the format of tensor
        feature=dat.transfer_tensor(feature)
        feature=dat.normalize(feature)
        label=dat.transfer_tensor(label)
        return feature,torch.tensor(label,dtype=torch.int64)
    def per_train(self,epoch,feature,label,validation=0.2,batch=50,verbose=True,num_view=5):
        '''
        train neural network
        :param epoch: training times
        :param feature: feature
        :param label: label
        :param validation: for using evaluation
        :param batch: batch size
        :param verbose: whether view the training process or not
        :param num_view: view via training "num_view" times
        :return: none
        '''
        assert feature.__len__()==label.__len__()
        print('training neural network ...')
        fea,lab=self.pre_process(feature,label)
        len_train=int(feature.__len__()*(1-validation))
        data_train,label_train=fea[:len_train+1],lab[:len_train+1]
        data_train=[data_train[i:i+batch]for i in range(0,len_train,batch)]
        label_train=[label_train[i:i+batch]for i in range(0,len_train,batch)]
        data_val,label_val=fea[len_train:],lab[len_train:]
        for e in range(epoch+1):
            self.train()
            loss_tmp=[]
            for img,lab in zip(data_train,label_train):
                pre=self(img)
                loss_train=nn.CrossEntropyLoss()(pre,lab)
                loss_tmp.append(loss_train)
                loss_train.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
            if verbose and e>0 and e%num_view==0:
                self.eval()
                pre=self(data_val)
                loss_val=nn.CrossEntropyLoss()(pre,label_val)
                _,pre_view=pre.max(1)
                acc=self.accuracy(label_val,pre_view)
                print('epoch: '+str(e)+'/'+str(epoch)+' --> training loss:',loss_train.item(),'validation loss:',
                      loss_val.item(),'validation accuracy:',acc)
def set_seed(seed):
    '''
    set random seed in order that result can be replayed
    :param seed: random seed
    :return: none
    '''
    import random
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    np.random.seed(seed)
    random.seed(seed)
Seed=0
Size_pixel= 28*28
Hidden=200
Output=10
set_seed(Seed)
if __name__ == '__main__':
    ### set some necessary parameters
    path='./file/' # the path of saved file
    ### initialize necessary class
    dat=Data() # for data related
    net=Network() # build network
    ### load image and label of training data and testing data
    # train_image=dat.get(path,'train','image')
    # train_label=dat.get(path,'train','label')
    test_image=dat.get(path,'test','image')
    test_label=dat.get(path,'test','label')
    ### train network
    epoch=100 # training times
    net.per_train(epoch,test_image,test_label)

运行结果：

loading images ...
load labels ...
training neural network ...
epoch: 5/100 --> training loss: 0.20060116052627563 validation loss: 0.3406471908092499 validation accuracy: 0.896
epoch: 10/100 --> training loss: 0.07570216804742813 validation loss: 0.226552814245224 validation accuracy: 0.929
epoch: 15/100 --> training loss: 0.043092332780361176 validation loss: 0.17325899004936218 validation accuracy: 0.948
epoch: 20/100 --> training loss: 0.026798103004693985 validation loss: 0.13407514989376068 validation accuracy: 0.957
epoch: 25/100 --> training loss: 0.018509650602936745 validation loss: 0.11087674647569656 validation accuracy: 0.963
epoch: 30/100 --> training loss: 0.012325393036007881 validation loss: 0.09859741479158401 validation accuracy: 0.971
epoch: 35/100 --> training loss: 0.0074988435953855515 validation loss: 0.09093887358903885 validation accuracy: 0.972
epoch: 40/100 --> training loss: 0.004394318908452988 validation loss: 0.08551845699548721 validation accuracy: 0.974
epoch: 45/100 --> training loss: 0.0024089752696454525 validation loss: 0.08029623329639435 validation accuracy: 0.975
epoch: 50/100 --> training loss: 0.0013418461894616485 validation loss: 0.0761108547449112 validation accuracy: 0.976
epoch: 55/100 --> training loss: 0.0008003945695236325 validation loss: 0.07389482110738754 validation accuracy: 0.978
epoch: 60/100 --> training loss: 0.0004860269255004823 validation loss: 0.07341232150793076 validation accuracy: 0.977
epoch: 65/100 --> training loss: 0.00030446669552475214 validation loss: 0.0740547850728035 validation accuracy: 0.977
epoch: 70/100 --> training loss: 0.00019296690879855305 validation loss: 0.07532798498868942 validation accuracy: 0.978
epoch: 75/100 --> training loss: 0.00012676884944085032 validation loss: 0.07695875316858292 validation accuracy: 0.978
epoch: 80/100 --> training loss: 8.715855801710859e-05 validation loss: 0.07869979739189148 validation accuracy: 0.978
epoch: 85/100 --> training loss: 6.354562356136739e-05 validation loss: 0.08037523180246353 validation accuracy: 0.979
epoch: 90/100 --> training loss: 4.849363540415652e-05 validation loss: 0.08200100064277649 validation accuracy: 0.978
epoch: 95/100 --> training loss: 3.824889790848829e-05 validation loss: 0.08359858393669128 validation accuracy: 0.978
epoch: 100/100 --> training loss: 3.062502946704626e-05 validation loss: 0.08513176441192627 validation accuracy: 0.979