MXNet创建新的操作层（详细）

234 阅读 0 评论 155 点赞

我是靠谱客的博主个性摩托，这篇文章主要介绍MXNet创建新的操作层（详细），现在分享给大家，希望可以做个参考。

本文主要介绍如何利用MXNet创建新的操作层。本文主要参考MXNet官网关于构建softmax层的例子[1]，以及使用numpy定制新操作[2]两个部分。注意，这里的softmax层是指softmax损失层，也就是实际上是指softmax操作和交叉熵损失函数共同组成的层。

第一部分中，我们介绍创建softmax损失层；第二部分，我们将这个损失层用到mnist分类中；第三部分，我们介绍sigmoid层的创建。建议先看第三部分，因为sigmoid层的构建比较简单，其前向和反向操作更加简单；而softmax损失层的操作要复杂一些。

构建一个新的层，包括以下几个步骤：

前向操作
反向操作
输入参数
输出参数
维度推理
类型推理
创建实例

一、创建softmax损失层

具体代码如下：

import mxnet as mx
import numpy as np
class NewSoftmax(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
"""实现前向操作
is_train: bool, 训练还是测试模式
req: {'null','write','inplace','add'}的列表，决定怎么赋值。如果是null，就直接跳过赋值
in_data: NDArray列表， 输入数据
out_data: NDArray列表， 输出数据，预先已经分配了内存
aux: NDArray列表， 附加状态，通过不会用到
"""
x = in_data[0].asnumpy()
y = np.exp(x-x.max(axis=1).reshape((x.shape[0],1)))
y /= y.sum(axis=1).reshape((x.shape[0],1))
self.assign(out_data[0],req[0],mx.nd.array(y))
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
"""实现反向操作
req: 如前向操作
out_grad: NDArray列表， 梯度（对应的输出数据段，可以理解为网络的后端）
in_grad: NDArray列表， 梯度（对应的输入数据段，可以理解为网络的前端），因此这个值是反向操作的输出
"""
l = in_data[1].asnumpy().ravel().astype(np.int)
y = out_data[0].asnumpy()
y[np.arange(l.shape[0]),l]-=1.0
self.assign(in_grad[0],req[0], mx.nd.array(y))
# 我们需要对创建的操作层进行注册，这样mxnet才能识别，因此，我们需要继承mx.operator.CustomOpProp类。
@mx.operator.register("newsoftmax")
class NewSoftmaxProp(mx.operator.CustomOpProp):
def __init__(self):
super(NewSoftmaxProp, self).__init__(need_top_grad=False)
#因为不需要后面层的梯度，因此设置为false
def list_arguments(self):
# 如果只有一个输入，那么可以省略
return ['data','label']
def list_outputs(self):
# 如果只有一个输出，那么可以省略
return ['output']
def infer_shape(self, in_shape):
"""通过输入的维度计算输出维度，也就是在进行维度推理。如果输出和输入维度相同，可以省略。
in_shape: 维度列表， 维度是一个int类型的元组
"""
data_shape = in_shape[0]
label_shape = (in_shape[0][0],)
output_shape = in_shape[0]
# 返回三个列表，分别代表输入维度，输出维度，附加状态数据维度(aux data shapes)
return [data_shape, label_shape],[output_shape],[]
def infer_type(self, in_type):
return in_type, [in_type[0]],[]
def create_operator(self, ctx, shapes, dtypes):
# 这里一定要有这个创建实例的函数，否则会有bug，使得模型无法收敛。
# 创建并返回定制类
return NewSoftmax()

二、分类网络

结合上面创建的softmax损失层，我们将其用在mnist字符数据集中进行分类，代码如下：

# encoding:utf-8
import logging
# 对于输出每一轮的训练信息很重要
logging.getLogger().setLevel(logging.INFO)
import os
import mxnet as mx
from mxnet import nd
from new_operation import NewSoftmax
# 载入新的层
# 准备数据，并放到NDArrayIter迭代器中
mnist = mx.test_utils.get_mnist()
mx.random.seed(42)
batch_size = 100
train_iter = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size, shuffle=True,
data_name='data', label_name='newsoftmax_label')
# 这里有个坑，这里需要指明label的名字，否则会默认为'softmax_label'，导致代码在维度推理时产生错误。
val_iter = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size,
data_name='data', label_name='newsoftmax_label')
# 定义网络
data = mx.sym.var('data')
conv1 = mx.sym.Convolution(data=data, kernel=(3,3), num_filter=20)
relu1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))
conv2 = mx.sym.Convolution(data=pool1, kernel=(3,3), num_filter=20)
relu2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
relu3 = mx.sym.Activation(data=fc1, act_type="relu")
fc2 = mx.sym.FullyConnected(data=relu3, num_hidden=10)
# cnn_symbol = mx.sym.SoftmaxOutput(data=fc2, name="softmax")
cnn_symbol = mx.sym.Custom(data=fc2, name= "newsoftmax", op_type = "newsoftmax")
#调用创建的新层。这里的op_type就是我们创建的新层的注册值(register)。
# 定义module
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
cnn_model = mx.mod.Module(symbol=cnn_symbol, context=ctx,
data_names=["data"],
label_names=["newsoftmax_label"])
# 训练
cnn_model.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1},
batch_end_callback = mx.callback.Speedometer(batch_size, 100),
# 100个batch以后输出一次训练信息
eval_metric='acc',
num_epoch=10)
# 训练10个epochs，也就是训练集数据走10遍
# 测试
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = cnn_model.predict(test_iter)
# 测试1
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
acc = mx.metric.Accuracy()
cnn_model.score(test_iter, acc)
# 测试2
print(acc)
assert acc.get()[1] > 0.98, "Achieved accuracy (%f) is lower than expected (0.98)" % acc.get()[1]

三、创建sigmoid层

具体代码如下：

class Sigmoid(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
"""Implements forward computation.
is_train : bool, whether forwarding for training or testing.
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
in_data : list of NDArray, input data.
out_data : list of NDArray, pre-allocated output buffers.
aux : list of NDArray, mutable auxiliary states. Usually not used.
"""
x = in_data[0].asnumpy()
y = 1.0 / (1.0 + np.exp(-x))
self.assign(out_data[0], req[0], mx.nd.array(y))
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
"""Implements backward computation
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to in_grad
out_grad : list of NDArray, gradient w.r.t. output data.
in_grad : list of NDArray, gradient w.r.t. input data. This is the output buffer.
"""
y = out_data[0].asnumpy()
dy = out_grad[0].asnumpy()
dx = dy*(1.0 - y)*y
self.assign(in_grad[0], req[0], mx.nd.array(dx))
@mx.operator.register("sigmoid")
# register with name "sigmoid"
class SigmoidProp(mx.operator.CustomOpProp):
def __init__(self):
super(SigmoidProp, self).__init__(True)
def list_arguments(self):
#
this can be omitted if you only have 1 input.
return ['data']
def list_outputs(self):
#
this can be omitted if you only have 1 output.
return ['output']
def infer_shape(self, in_shapes):
"""Calculate output shapes from input shapes. This can be
omited if all your inputs and outputs have the same shape.
in_shapes : list of shape. Shape is described by a tuple of int.
"""
data_shape = in_shapes[0]
output_shape = data_shape
# return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
return (data_shape,), (output_shape,), ()
def create_operator(self, ctx, in_shapes, in_dtypes):
#
create and return the CustomOp class.
return Sigmoid()
x = mx.nd.array([0, 1, 2, 3])
# attach gradient buffer to x for autograd
x.attach_grad()
# forward in a record() section to save computation graph for backward
# see autograd tutorial to learn more.
with autograd.record():
y = mx.nd.Custom(x, op_type='sigmoid')
print(y)
# call backward computation
y.backward()
# gradient is now saved to the grad buffer we attached previously
print(x.grad)

以上介绍了不带参数的新层的创建方法。下一讲将是带有参数的新层的创建方法。

参考

[1] https://github.com/apache/incubator-mxnet/blob/master/example/numpy-ops/custom_softmax.py
[2] https://mxnet.incubator.apache.org/tutorials/gluon/customop.html
[3] https://blog.csdn.net/qq_25491201/article/details/51284416