1. 下载教程
可以用浏览器下载zip格式并解压,在解压目录文件资源管理器的地址栏输入cmd
进入命令行模式。
也可以
git pull https://github.com/mli/gluon-tutorials-zh
2.安装gluon CPU
添加源:
# 优先使用清华conda镜像 conda config --prepend channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # 也可选用科大conda镜像 conda config --prepend channels http://mirrors.ustc.edu.cn/anaconda/pkgs/free/
cmd中安装
conda env create -f environment.yml activate gluon # 注意Windows下不需要 source
可更新教程:
conda env update -f environment.yml
3.安装GPU版本
先卸载CPU
pip uninstall mxnet
然后
pip install --pre mxnet-cu75 # CUDA 7.5 pip install --pre mxnet-cu80 # CUDA 8.0
【可选项】国内用户可使用豆瓣pypi镜像加速下载:
pip install --pre mxnet-cu75 -i https://pypi.douban.com/simple # CUDA 7.5 pip install --pre mxnet-cu80 -i https://pypi.douban.com/simple # CUDA 8.0
查看安装
import pip for pkg in ['mxnet', 'mxnet-cu75', 'mxnet-cu80']: pip.main(['show', pkg])
4.查看教程
然后安装notedown,运行Jupyter并加载notedown插件:
pip install https://github.com/mli/notedown/tarball/master
jupyter notebook --generate-config jupyter notebook --NotebookApp.contents_manager_class='notedown.NotedownContentsManager'
5.教程简记
跟NumPy的转换
from mxnet import ndarray as nd
import numpy as np x = np.ones((2,3)) y = nd.array(x) # numpy -> mxnet z = y.asnumpy() # mxnet -> numpy print([z, y])
自动求导
import mxnet.autograd as ag
假设我们想对函数 $f = 2*x^2$ 求关于 $x$的导数。
1.创建变量
x = nd.array([[1, 2], [3, 4]])
2.通过NDArray的方法attach_grad()
来要求系统申请梯度空间
x.attach_grad()
3.定义函数 f
with ag.record(): y = x * 2 z = y * x
4.反向传播,求梯度
z.backward()
5.梯度:
print('x.grad: ', x.grad)
线性回归,从零开始
#coding=utf-8 """线性回归,从零开始""" from mxnet import ndarray as nd from mxnet import autograd import matplotlib.pyplot as plt import random # 1.创建数据集 # y[i] = 2 * X[i][0] - 3.4 * X[i][1] + 4.2 + noise # y = X*w + b + n num_inputs = 2 num_examples = 1000 true_w = [2, -3.4] true_b = 4.2 X = nd.random_normal(shape=(num_examples, num_inputs)) y = true_w[0] * X[:,0] + true_w[1] * X[:,1] + true_b y += 0.01 * nd.random_normal(shape=y.shape) # plt.scatter(X[:,1].asnumpy(), y.asnumpy()) # plt.show() # 2.数据读取 batch_size = 10 def data_iter(): # 产生一个随机索引 idx = list(range(num_examples)) random.shuffle(idx) for i in range(0, num_examples, batch_size): j = nd.array(idx[i:min(i+batch_size, num_examples)]) yield nd.take(X, j), nd.take(y, j) # for data, label in data_iter(): # print (data, label) # break # 3.初始化模型参数 w = nd.random_normal(shape=(num_inputs,1)) b = nd.zeros((1,)) params = [w, b] # print (params) # 创建梯度空间 for param in params: param.attach_grad() # 4.定义模型 def net(X): return nd.dot(X, w) + b # 5.定义损失函数 def square_loss(yhat, y): # 注意这里将y变形成yhat的形状来避免矩阵的broadcasting return (yhat - y.reshape(yhat.shape)) ** 2 # 6.优化 def SGD(params, lr): for param in params: param[:] = param - lr * param.grad # 7.训练 # 模型函数 def real_fn(X): return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2 # 绘制损失随训练次数降低的折线图,以及预测值和真实值的散点图 def plot(losses, X, sample_size=100): xs = list(range(len(losses))) fig, axes = plt.subplots(1, 2) axes[0].set_title('Loss during training') axes[0].plot(xs, losses, '-r') axes[1].set_title('Estimated vs real function') axes[1].plot(X[:sample_size, 1].asnumpy(), net(X[:sample_size, :]).asnumpy(), 'or', label='Estimated') axes[1].plot(X[:sample_size, 1].asnumpy(), real_fn(X[:sample_size, :]).asnumpy(), '*g', label='Real') axes[1].legend() plt.show() epochs = 5 learning_rate = 0.001 niter = 0 losses = [] moving_loss = 0 smoothing_constant = 0.01 # 训练 for e in range(epochs): total_loss = 0 # 每个epoch for data, label in data_iter(): with autograd.record(): output = net(data) # 前向传播 loss = square_loss(output, label) loss.backward() # 反向传播 SGD(params, learning_rate) # 更新参数 iter_loss = nd.sum(loss).asscalar() / batch_size total_loss += nd.sum(loss).asscalar() # 记录损失变化 niter += 1 curr_loss = nd.mean(loss).asscalar() moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss losses.append(iter_loss) if (niter + 1) % 100 == 0: print("Epoch %s, batch %s. Average loss: %f" % (e, niter, total_loss / num_examples)) plot(losses, X)
使用GPU
a = nd.array([1,2,3], ctx=mx.gpu())
b = nd.zeros((3,2), ctx=mx.gpu())
可以通过copyto
和as_in_context
来在设备直接传输数据。
y = x.copyto(mx.gpu()) z = x.as_in_context(mx.gpu())
这两个函数的主要区别是,如果源和目标的context一致,as_in_context
不复制,而copyto
总是会新建内存:
这类似与caffe中的cuda操作
float* tmp_transform_bbox = NULL;
CUDA_CHECK(cudaMalloc(&tmp_transform_bbox, 7 * sizeof(Dtype) * rpn_pre_nms_top_n));//修改retained_anchor_num cudaMemcpy(tmp_transform_bbox, &transform_bbox_[transform_bbox_begin], rpn_pre_nms_top_n * sizeof(Dtype) * 7, cudaMemcpyDeviceToDevice);
参数获取
w = net[0].weight b = net[0].bias print 'name: ', net[0].name, ' weight: ', w, ' bias: ', b print('weight:', w.data()) print('weight gradient', w.grad()) print('bias:', b.data()) print('bias gradient', b.grad())
params = net.collect_params() print(params) print(params['sequential0_dense0_bias'].data()) print(params.get('dense0_weight').data())
参数初始化
from mxnet import init params = net.collect_params() params.initialize(init=init.Normal(sigma=0.02), force_reinit=True) print(net[0].weight.data(), net[0].bias.data())
6.使用中错误解决
1.python2打印权重报错
w = net[0].weight b = net[0].bias print('name: ', net[0].name, ' weight: ', w, ' bias: ', b)
把C:Anaconda2envsgluonLibsite-packagesmxnetgluonparameter.py 119行改为
s = 'Parameter {name} (shape={_shape}, dtype={dtype})'
同时,如果是Python2需要把print后去掉括号。。。