zoukankan html css js c++ java

Mxnet基础知识(二)

1 混合式编程

　　深度学习框架中，pytorch采用命令式编程，tensorflow采用符号式编程。mxnet的gluon则尝试将命令式编程和符号式编程结合。

1.1 符号式编程和命令式编程

　　符号式编程更加灵活，便于理解和调试；命令式编程能对代码进行优化，执行起来效率更高，如下所示：

　　命令式编程：代码会根据执行顺序，逐行执行

#命令式编程

def add(a, b):
    return a + b

def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g

fancy_func(1, 2, 3, 4)

　　符号式编程：下面代码会通过字符串的形式传给compile，compile能看到所有的代码，能对代码结构和内存进行优化，加快代码执行效率

#符号式编程

def add_str():
    return '''
def add(a, b):
    return a + b
'''

def fancy_func_str():
    return '''
def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g
'''

def evoke_str():
    return add_str() + fancy_func_str() + '''
print(fancy_func(1, 2, 3, 4))
'''

prog = evoke_str()
print(prog)
y = compile(prog, '', 'exec')
exec(y)

mxnet构建网络时除了nn.Block和nn.Sequential外，还有nn.HybridBlock和nn.HybridSequential, 实现在构建时通过命令式编程方式，代码执行时转变成符号式编程。HybridBlock和HybridSequential构建的网络net，通过net.hybride()可以将网络转变成符号网络图（symbolic graph），对代码结构进行优化，而且mxnet会缓存符号图，随后的前向传递中重复使用符号图。

#coding:utf-8
from mxnet.gluon import nn
from mxnet import nd

class HybridNet(nn.HybridBlock):
    def __init__(self, **kwargs):
        super(HybridNet, self).__init__(**kwargs)
        self.hidden = nn.Dense(10)
        self.output = nn.Dense(2)

    def hybrid_forward(self, F, x):
        print('F: ', F)
        print('x: ', x)
        x = F.relu(self.hidden(x))
        print('hidden: ', x)
        return self.output(x)

#按原始命令式编程方程，逐行执行
net = HybridNet()
net.initialize()
x = nd.random.normal(shape=(1, 4))
net(x)

#net.hybridize()会对代码结构进行优化，转变成符号式编程
net.hybridize()
net(x)

#再次执行时，不会打印代码中的print部分，这是因为hybride后，构建成符号式代码网络，mxnet会缓存符号图，直接执行符号图，不会再去调用python原始代码
net(x)

　　另外，继承自HybridBlock的网络需要实现的是hybrid_forward()相比于forward()多了一个参数F，F会根据输入的x类型选择执行，即x若为mxnet.ndarry，则F调用ndarry的方法；若x若为mxnet.symbol，则调用symbol的方法。

2. 延迟初始化

　　在构建网络时，mxnet支持不指明参数的输入尺寸，只需指明参数的输出尺寸。这是通过延迟初始化实现

from mxnet import init, nd
from mxnet.gluon import nn


def getnet():
    net = nn.Sequential()
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))
    return net

#网络参数未初始化，无具体值
net = getnet()
print(1, net.collect_params())   #print(1, net[0].weight.data())

#网络参数未初始化，无具体值
net.initialize()
print(2, net.collect_params())  #print(2, net[0].weight.data())

#根据输入x的尺寸，网络推断出各层参数的尺寸，然后进行初始化
x = nd.random.uniform(shape=(2, 30))
net(x)
print(3, net.collect_params())
print(3, net[0].weight.data())

#第二次执行时，不会再进行初始化
net(x)

　　init提供了许多初始化方法，如下：

init.Zero()               #初始化为常数0
init.One()                 #初始化为常数1
init.Constant(value=0.05)  #初始化为常数0.05
init.Orthogonal()          #初始化为正交矩阵
init.Uniform(scale=0.07)  #(-0.07, 0.07)之间的随机分布
init.Normal(sigma=0.01)  #均值为0, 标准差为0.01的正态分布
init.Xavier(magnitude=3)  # magnitude初始化， 适合tanh
init.MSRAPrelu(slope=0.25)  #凯明初始化，适合relu

　　自定义初始化：

#第一层和第二层采用不同的方法进行初始化，
# force_reinit：无论网络是否初始化，都重新初始化
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[1].initialize(init=init.Constant(42), force_reinit=True)

#自定义初始化，需要继承init.Initializer， 并实现 _init_weight
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5   # 绝对值小于5的赋值为0， 大于等于5的保持不变

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

3. 参数和模块命名

　　mxnet网络中的parameter和block都有命名(prefix)， parameter的名字由用户指定，block的名字由用户或mxnet自动创建

mydense = nn.Dense(100, prefix="mydense_")
print(mydense.prefix)  #mydense_
print(mydense.collect_params())    #mydense_weight, mydense_bias
 
dense0 = nn.Dense(100)
print(dense0.prefix)      #dense0_
print(dense0.collect_params())  #dense0_weight, dense0_bias

dense1 = nn.Dense(100)     
print(dense1.prefix)   #dense1_
print(dense1.collect_params())  #dense1_weight, dense1_bias

　　每一个block都有一个name_scope(), 在其上下文中创建的子block，会采用其名字作为前缀，注意下面model0和model1的名字差别

from mxnet import gluon
import mxnet as mx

class Model(gluon.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(20)
            self.dense1 = gluon.nn.Dense(20)
            self.mydense = gluon.nn.Dense(20, prefix='mydense_')

    def forward(self, x):
        x = mx.nd.relu(self.dense0(x))
        x = mx.nd.relu(self.dense1(x))
        return mx.nd.relu(self.mydense(x))

model0 = Model()
model0.initialize()
model0(mx.nd.zeros((1, 20)))
print(model0.prefix)         #model0_
print(model0.dense0.prefix)  #model0_dense0_
print(model0.dense1.prefix)  #model0_dense1_
print(model0.mydense.prefix) #model0_mydense_


model1 = Model()
model1.initialize()
model1(mx.nd.zeros((1, 20)))
print(model1.prefix)          #model1_
print(model1.dense0.prefix)   #model1_dense0_
print(model1.dense1.prefix)   #model1_dense1_
print(model1.mydense.prefix)  #model1_mydense_

　　不同的命名，其保存的参数名字也会有差别，在保存和加载模型参数时会引起错误，如下所示：

#如下方式保存和加载：model0保存的参数，model1加载会报错
model0.collect_params().save('model.params')
try:
    model1.collect_params().load('model.params', mx.cpu())
except Exception as e:
    print(e)
print(model0.collect_params(), '
')
print(model1.collect_params())


#如下方式保存和加载：model0保存的参数，model1加载不会报错
model0.save_parameters('model.params')
model1.load_parameters('model.params')
print(mx.nd.load('model.params').keys())

在加载预训练的模型，进行finetune时，注意命名空间, 如下所示：

#加载预训练模型，最后一层为1000类别的分类器
alexnet = gluon.model_zoo.vision.alexnet(pretrained=True)
print(alexnet.output)
print(alexnet.output.prefix)

#修改最后一层结构为 100类别的分类器，进行finetune
with alexnet.name_scope():
    alexnet.output = gluon.nn.Dense(100)
alexnet.output.initialize()
print(alexnet.output)

　　 Sequential创建的net获取参数：

from mxnet import init, nd
from mxnet.gluon import nn


net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()  # Use the default initialization method

x = nd.random.uniform(shape=(2, 20))
net(x)            # Forward computation

print(net[0].params)
print(net[1].params)

#通过属性获取
print(net[1].bias)
print(net[1].bias.data())
print(net[0].weight.grad())
#通过字典方式获取
print(net[0].params['dense0_weight'])
print(net[0].params['dense0_weight'].data())
#获取所有参数
print(net.collect_params())
print(net[0].collect_params())
net.collect_params()['dense1_bias'].data()
#正则匹配
print(net.collect_params('.*weight'))  
print(net.collect_params('dense0.*'))

　　Block创建网络获取参数：

from mxnet import gluon
import mxnet as mx

class Model(gluon.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(20)
            self.dense1 = gluon.nn.Dense(20)
            self.mydense = gluon.nn.Dense(20, prefix='mydense_')

    def forward(self, x):
        x = mx.nd.relu(self.dense0(x))
        x = mx.nd.relu(self.dense1(x))
        return mx.nd.relu(self.mydense(x))

model0 = Model()
model0.initialize()
model0(mx.nd.zeros((1, 20)))

#通过有序字典_children
print(model0._children)
print(model0._children['dense0'].weight._data)
print(model0._children['dense0'].bias._data)

#通过收集所有参数
print(model0.collect_params()['model0_dense0_weight']._data)
print(model0.collect_params()['model0_dense0_bias']._data)

　 Parameter和ParameterDict

　　gluon.Parameter类能够创建网络中的参数，gluon.ParameterDict类是字典，建立了parameter name和parameter实例之间的映射，通过ParameterDict也可以创建parameter.

Parameter的使用

class MyDense(nn.Block):

    def __init__(self, units, in_units, **kwargs):
        # units: the number of outputs in this layer
        # in_units: the number of inputs in this layer

        super(MyDense, self).__init__(**kwargs)
        self.weight = gluon.Parameter('weight', shape=(in_units, units))  #创建名为weight的参数
        self.bias = gluon.Parameter('bias', shape=(units,))    #创建名为bias的参数

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)


net = nn.Sequential()
net.add(MyDense(units=8, in_units=64),
        MyDense(units=1, in_units=8))
#初始化参数
for block in net:
    if hasattr(block, "weight"):
        block.weight.initialize()
    if hasattr(block, "bias"):
        block.bias.initialize()
print(net(nd.random.uniform(shape=(2, 64))))
print(net)

ParameterDict使用

#创建一个parameterdict，包含一个名为param2的parameter
params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
print(params)
print(params.keys())
print(params['param2'])

查看全文

相关阅读:
【css】媒体查询（@media 查询）
【angular】利用$scope.$apply() 按需要强制渲染前端页面
 【html】百度经常使用的 map标签
 【d2-admin】浅入了解 d2-admin之安装
 【web前端】谈谈浏览器的兼容性（面试题）
【web性能优化】相关文章
 【web性能优化】DOM的reflow 和repaint
【web性能优化】视频类优化（慕课网笔记）
js高阶函数--判断数据类型、函数胡柯里化；
js数组排序

原文地址：https://www.cnblogs.com/silence-cho/p/12953471.html