7.6 卷积网络分类过程可视化
7.6.1 导入库
[1]:
import network
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm,colors
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
np.random.seed(100)
7.6.2 定义各类激活函数
[2]:
activation = 'relu'
if activation=='relu':
activation_fn = network.relu_activation
elif activation == 'sigmoid':
activation_fn = network.sigmoid_activation
elif activation == 'tanh':
activation_fn = network.tanh_activation
else:
print(activation+' function not implemented')
7.6.3 导入数据开始训练
根据需要设定合适的参数进行训练,net.SGD()函数参数如下:
training_data #训练数据
epochs #训练次数
mini_batch_size #batch大小
eta #学习率
lmbda = 0.0 #正则化系数
evaluation_data=None #验证数据
monitor_evaluation_cost=False #监测验证损失
monitor_evaluation_accuracy=True #监测验证精度
monitor_training_cost=True #监测训练损失
monitor_training_accuracy=True #监测训练精度
early_stopping_n = 0 #早停阈值
verbose = 0 # 是否开启冗余输出,开启之后才能显示上述监测内容
save_loss=False # 保存损失
save_delta = False # 保存误差
save_grad = False # 保存梯度
save_weights = False # 保存训练过程中的权重
save_Pl_Pa = False #保存各层偏导
[3]:
X, y = datasets.make_circles(n_samples=2000, factor=0.3, noise=.1,random_state=123)
X, X_test, y, y_test = train_test_split(X, y, test_size=0.33)
c,r = np.mgrid[[slice(X.min()- .2,X.max() + .2,50j)]*2]
p = np.c_[c.flat,r.flat]
#归一化
ss = StandardScaler().fit(X)
X = ss.transform(X)
p_0 = ss.transform(p)
X_test = ss.transform(X_test)
p = list(np.expand_dims(p_0,2))
#调整数据数据
training_data = list([[np.expand_dims(feature,axis=1),label] for feature,label in zip(X,y)])
test_data = list([[np.expand_dims(feature,axis=1),label] for feature,label in zip(X_test,y_test)])
#"""全连接与卷积输入如[2,3,[3,2],1],其中的[3,2]代表卷积核为1*2,个数为卷积核数量为3"""
net = network.Network([2,3,[2,2],1],activation_fn=activation_fn,cost=network.BinaryLogCost,layers_type=['FC','C','FC'])
#是否已经训练且保存中间参数
have_trained = False
if have_trained:
#如果已经训练了就直接加载
weights_log = np.load('log/weights_log.npy',allow_pickle=True)
bias_log = np.load('log/bias_log.npy',allow_pickle=True)
net.set_weights(weights_log[800],bias_log[800])
else:
#否则重新训练
net.default_weight_initializer()
_ = net.SGD(training_data, 2000, len(training_data), 0.08, evaluation_data=test_data,verbose=1,
save_Pl_Pa=True,
save_weights=True,
save_grad=True,
save_delta=True,
save_loss=True,
monitor_evaluation_accuracy=True,
monitor_training_accuracy=True,
monitor_training_cost=True)
7.6.4 可视化实验数据及实验结果
[9]:
fig = plt.figure(figsize = (9,3))
#自定义cmap
top = cm.get_cmap('Oranges_r', 512)
bottom = cm.get_cmap('Blues', 512)
newcolors = np.vstack((top(np.linspace(0.55, 1, 512)),
bottom(np.linspace(0, 0.75, 512))))
cm_bright = ListedColormap(newcolors, name='OrangeBlue')
plt.subplot(121)
m1 = plt.scatter(*X.T,c = y,cmap = cm_bright,edgecolors='white',s = 20,linewidths = 0.5)
plt.title('train samples')
plt.axis('equal')
plt.subplot(122)
m2 = plt.scatter(*X_test.T,c = y_test,cmap = cm_bright,edgecolors='white',s = 20,linewidths = 0.5);
plt.title('test samples')
plt.axis('equal')
ax = fig.get_axes()
plt.colorbar(ax = ax);
plt.show();
prob = np.squeeze(net.predict_pro(p))
p1 = np.array([np.squeeze(pp) for pp in p])
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(9, 3),subplot_kw = {'aspect':'equal'})
ax1.scatter(*p1.T,c = prob,cmap = cm_bright)
ax1.scatter(*X.T,c = y,cmap = cm_bright,edgecolors='white',s = 20,linewidths = 0.5)
ax1.set_title('train score:%.5f'%net.accuracy(training_data,convert=True))
mp = ax2.scatter(*p1.T,c = prob,cmap = cm_bright)
ax2.scatter(*X_test.T,c = y_test,cmap = cm_bright,edgecolors='white',s = 20,linewidths = 0.5)
ax2.set_title('test score:%.5f'%net.accuracy(test_data,convert=True));
plt.colorbar(mp,ax = [ax1,ax2]);
7.6.5 定义逐层可视化函数
[10]:
def scatter(p, c, X, wb=None, cmap='Set3', value=True, zero_start=True):
global y
cols = p.shape[-1]
# assert cols in (1,2,3)
fig = plt.figure(figsize=(6, 4))
c_u = np.unique(c)
if cols > 3:
ax = plt.gca()
ax.axis('equal')
if wb is not None:
a1, a2 = p.min(0)[:2] - 0.2
b1, b2 = p.max(0)[:2] + 0.2
(u1, u2, *_), b_ = wb
y1, y2 = (a1 * u1 + b_) / (-u2), (b1 * u1 + b_) / (-u2)
ax.plot([a1, b1], [y1, y2], 'k--')
ax.set_ylim(a2, b2)
st = ax.scatter(*p[:, :2].T, c=c, cmap=cmap)
ax.scatter(*X[:, :2].T, c=y, alpha=0.7, cmap=cm_bright, edgecolors='white', s=20, linewidths=0.5)
if value:
fig.colorbar(st,shrink=0.8)
else:
if zero_start:
fig.colorbar(st, ticks=np.linspace(0, c_u.shape[0] - 1, c_u.shape[0]),shrink=0.8)
else:
fig.colorbar(st, ticks=c_u,shrink=0.8)
ax.set_xlabel('X')
ax.set_ylabel('Y')
return ax
elif cols == 3:
ax3d = Axes3D(fig)
if wb is not None:
(u1, u2, u3), b_ = wb
a1, a2, a3 = p.min(0)[:3]
b1, b2, b3 = p.max(0)[:3]
if u3!=0:
a, b = np.mgrid[a1 - 1:b1:10j, a2 - 1:b2:10j]
z_ = (a * u1 + b * u2 + b_) / (-u3)
else:
if np.abs(u1)>np.abs(u2):
b, z_ = np.mgrid[a2-1:b2:10j,a3-1:b3:10j]
a = (b*u2+b_)/(-u1)
else:
a,z_ = np.mgrid[a1-1:b1:10j,a3-1:b3:10j]
b = (a*u1+b_)/(-u2)
wf = ax3d.plot_wireframe(a, b, z_)
wf.set_color('k')
mp = ax3d.scatter(*(p.T), c=c, cmap=cmap)
ax3d.scatter(*X.T, c=y, cmap=cm_bright, edgecolors='white', s=40, linewidths=0.5)
if value:
fig.colorbar(mp, shrink=0.8)
else:
if zero_start:
fig.colorbar(mp, shrink=0.8, ticks=np.linspace(0, c_u.shape[0] - 1, c_u.shape[0]))
else:
fig.colorbar(mp, ticks=c_u,shrink=0.8)
ax3d.set_xlabel('X')
ax3d.set_ylabel('Y')
ax3d.set_zlabel('Z')
return ax3d
elif cols == 2:
ax = plt.gca()
ax.axis('equal')
if wb is not None:
a1, a2 = p.min(0) - 0.2
b1, b2 = p.max(0) + 0.2
(u1, u2), b_ = wb
y1, y2 = (a1 * u1 + b_) / (-u2), (b1 * u1 + b_) / (-u2)
ax.plot([a1, b1], [y1, y2], 'r--')
ax.set_ylim(a2, b2)
st = ax.scatter(*p.T, c=c, cmap=cmap)
ax.scatter(*X.T, c=y, alpha=0.7, cmap=cm_bright, edgecolors='white', s=20, linewidths=0.5)
if value:
fig.colorbar(st,shrink=0.8)
else:
if zero_start:
fig.colorbar(st, ticks=np.linspace(0, c_u.shape[0] - 1, c_u.shape[0]),shrink=0.8)
else:
fig.colorbar(st, ticks=c_u,shrink=0.8)
ax.set_xlabel('X')
ax.set_ylabel('Y')
else:
ax = plt.gca()
t, tt = np.zeros_like(p.flat), np.zeros_like(X.flat)
st = plt.scatter(p.flat, t, c=c, cmap=cmap)
ax.scatter(X.flat, tt, c=y, alpha=0.7, cmap=cm_bright, edgecolors='white', s=20, linewidths=0.5)
if value:
fig.colorbar(st,shrink=0.8)
else:
if zero_start:
fig.colorbar(st, ticks=np.linspace(0, c_u.shape[0] - 1, c_u.shape[0]),shrink=0.8)
else:
fig.colorbar(st, ticks=c_u,shrink=0.8)
return ax
def add_hyperplane(ax,p,wb,color='gray'):
#添加超平面
cols = p.shape[-1]
assert cols in [2,3]
if cols==3:
a1, a2 = p.min(0)[:2]
b1, b2 = p.max(0)[:2]
a, b = np.mgrid[a1 - 1:b1:10j, a2 - 1:b2:10j]
(u1, u2, u3), b_ = wb
z_ = (a * u1 + b * u2 + b_) / (-u3)
ax.plot_wireframe(a,b,z_,color=color)
else:
a1, a2 = p.min(0) - 0.2
b1, b2 = p.max(0) + 0.2
(u1, u2), b_ = wb
y1, y2 = (a1 * u1 + b_) / (-u2), (b1 * u1 + b_) / (-u2)
ax.plot([a1, b1], [y1, y2], color=color)
return ax
def mapping(code):
# 转化为01编码的类型
numMap = np.zeros(code.shape[0])
uniq = np.unique(code, axis=0)
for i, arr in enumerate(uniq):
m = (np.sum(code == arr, axis=1) == code.shape[-1])
numMap[m] = i
return numMap
color_list = ['aquamarine', 'b', 'blueviolet','c', 'chartreuse',
'darkcyan', 'darkgreen', 'darkkhaki', 'deeppink', 'deepskyblue',
'gold', 'indigo', 'lightcoral', 'maroon', 'navy', 'olivedrab', 'peru',
'pink', 'rosybrown', 'saddlebrown', 'slategray', 'steelblue', 'teal',
'thistle', 'violet', 'y', 'yellow']
cmap = colors.ListedColormap(color_list)
7.6.6 卷积网络分类过程逐层可视化
[11]:
%matplotlib notebook
W,B = [w.T for w in net.weights],[b.squeeze(1) for b in net.biases]
actf = net.activation_fn.fn
X = X[:200]
y = y[:200]
# plt.close('all')
inV, inX = p_0, X
layersBinCode = None
for layer,(w, b) in enumerate(zip(W, B)):
l_num =layer+1
if net.layers_type[layer]=='FC':
if l_num < len(W):
activation = 'relu'
actf = net.activation_fn.fn
actf_P = net.activation_fn.prime
else:
activation = 'sigmoid'
actf = network.sigmoid_activation.fn
actf_P = network.sigmoid_activation.prime
transV = inV @ w + b
transX = inX @ w + b
actV = actf(transV)
actX = actf(transX)
# 第k层各个节点的划分(第k层的二进制编码)
if activation == 'relu':
layerBinCode = np.where(actV > 0, 1, 0)
elif activation == 'sigmoid':
# layerBinCode = actV
layerBinCode = np.where(actV > 0.5, 1, 0)
elif activation == 'tanh':
layerBinCode = np.where(actV > 0, 1, 0)
else:
conv_size = net.sizes[layer+1]
activation='relu'
actf = net.activation_fn.fn
transV_ = np.zeros((inV.shape[0],conv_size[0],conv_size[1]))
transX_ = np.zeros((inX.shape[0],conv_size[0],conv_size[1]))
for c_num in range(inV.shape[1]-conv_size[1]+1):
transV_[:,:,c_num]=inV[:,c_num:c_num+conv_size[1]].reshape(-1,conv_size[1])@w+b
transX_[:,:,c_num]=inX[:,c_num:c_num+conv_size[1]].reshape(-1,conv_size[1])@w+b
actV_ = actf(transV_)
actX_ = actf(transX_)
actX_max_pooling = np.max(actX_,axis=2)
actV_max_pooling = np.max(actV_,axis=2)
actX_max_ind = np.argmax(transX_,axis=2)
actV_max_ind = np.argmax(transV_,axis=2)
# actX_max_ind = np.argmax(actX_,axis=2)
# actV_max_ind = np.argmax(actV_,axis=2)
# 第k层各个节点的划分(第k层的二进制编码)
if activation == 'relu':
layerBinCode = np.where(actV_max_pooling > 0, 1, 0)
layerBinCode_noP = np.where(actV_>0, 1, 0)
elif activation == 'sigmoid':
layerBinCode = np.where(actV_max_pooling > 0.5, 1, 0)
layerBinCode_noP = np.where(actV_>0.5,1,0)
elif activation == 'tanh':
layerBinCode = np.where(actV_max_pooling > 0, 1, 0)
actX = actX_max_pooling
actV = actV_max_pooling
# print(np.min(actV))
transX = np.squeeze([t_x[list(range(transX_.shape[1])),a_i] for t_x,a_i in zip(transX_,actX_max_ind)])
transV = np.squeeze([t_v[list(range(transV_.shape[1])),a_i] for t_v,a_i in zip(transV_,actV_max_ind)])
layerNumCode = mapping(layerBinCode)
# 第k层的激活神经元的数量
layerNumCode2 = np.sum(layerBinCode, 1)
# 前k层的二进制编码
layersBinCode = layerBinCode if layersBinCode is None else np.hstack((layersBinCode, layerBinCode))
# 前k层的数字编码
layersNumCode = mapping(layersBinCode)
# 前k层激活神经元的数量
layersNumCode2 = np.sum(layersBinCode, 1)
n = actV.shape[-1]
l = np.vstack((w, b)).T.astype('<U5').tolist()
sl = [';'.join(z) for z in l]
projIn = '3d' if inV.shape[-1] == 3 else None
projOut = '3d' if transV.shape[-1] == 3 else None
for i in range(n):
# 在输入空间形成超平面
if net.layers_type[layer]=='FC':
w_draw = w[:, i]
b_draw = b[i]
else:
w_draw = np.append(w[:,i],0)
b_draw = b[i]
transV_noP = transV_[:,i]
actV_noP = actV_[:,i]
tranX_noP = transX_[:,i]
actX_noP = actX_[:,i]
# ax = scatter(inV,transV_noP.T[0],inX,(np.append(w[:,i],0),b[i]),cmap='summer')
# ax.set_title("卷积层的第0个线性变化及超平面")
# ax = scatter(inV,transV_noP.T[1],inX,(np.append(0,w[:,i]),b[i]),cmap='summer')
# ax.set_title("卷积层的第1个线性变化及超平面")
# ax = scatter(inV[:,:2],transV_noP.T[0],inX[:,:2],(w[:,i],b[i]),cmap='summer')
# ax.set_title("卷积层的第0个线性变化及超平面")
# ax = scatter(inV[:,1:3],transV_noP.T[1],inX[:,1:3],(w[:,i],b[i]),cmap='summer')
# ax.set_title("卷积层的第1个线性变化及超平面")
ax = scatter(inV[:,:2],actV_noP.T[0],inX[:,:2],(w[:,i],b[i]),cmap='summer')
ax.set_title("卷积层的第0个激活及超平面")
ax = scatter(inV[:,1:3],actV_noP.T[1],inX[:,1:3],(w[:,i],b[i]),cmap='summer')
ax.set_title("卷积层的第1个激活及超平面")
ax = scatter(inV,layerBinCode_noP[:,i,:].T[0,:],inX,cmap='coolwarm',value=False)
ax.set_title("卷积层的第0个特征图对输入空间的划分")
ax = scatter(p_0,layerBinCode_noP[:,i,:].T[0,:],X, cmap='coolwarm', value=False)
ax.set_title("卷积层的第0个特征图对原始空间的划分")
ax = scatter(inV,layerBinCode_noP[:,i,:].T[1,:],inX,cmap='coolwarm',value=False)
ax.set_title("卷积层的第1个特征图对输入空间的划分")
ax = scatter(p_0,layerBinCode_noP[:,i,:].T[1,:],X,cmap='coolwarm',value=False)
ax.set_title("卷积层的第1个特征图对原始空间的划分")
# ax = scatter(inV,actV_max_ind.T[i],inX,cmap='rainbow',value=False)
# ax.set_title('第%d个卷积层在输入空间最大池化'%(i))
ax = scatter(p_0,actV_max_ind.T[i],X,cmap='Set1',value=False)
ax.set_title('第%d个卷积层在原始空间最大池化'%(i))
ax = scatter(inV, transV.T[i], inX, (w_draw, b_draw), cmap='summer')
if net.layers_type[layer]=='C':
add_hyperplane(ax,inV,(np.append(0,w[:,i]),b[i]))
ax.set_title('第%d个节点形成超平面:' % (i) + str(sl[i]))
# 后层每个节点对输入空间进行激活的值
ax = scatter(inV, actV.T[i], inX, (w_draw, b_draw), cmap='summer')
if net.layers_type[layer]=='C':
add_hyperplane(ax,inV,(np.append(0,w[:,i]),b[i]))
ax.set_title('第%d个节点激活' % (i))
# 后层每个节点对输入空间的划分
ax = scatter(inV, layerBinCode.T[i], inX, (w_draw, b_draw), cmap='coolwarm', value=False,zero_start=False)
if net.layers_type[layer]=='C':
add_hyperplane(ax,inV,(np.append(0,w[:,i]),b[i]))
ax.set_title('第%d个节点对输入空间划分' % (i))
# 在原始特征空间进行划分
ax = scatter(p_0, layerBinCode.T[i], X, cmap='coolwarm', value=False,zero_start=False)
ax.set_title('第%d个节点对原始特征空间划分' % (i))
ax = scatter(inV, layerNumCode, inX, value=False,cmap='coolwarm')
ax.set_title('第%d层对输入空间的总体划分' % (l_num))
# 输入空间进行仿射变换后在后层空间中的情况
ax = scatter(transV, layerNumCode2, transX, cmap='summer', value=False, zero_start=False)
ax.set_title('第%d层对输入空间线性变化' % (l_num))
# 输入空间在进行非线性变换后在后层空间中的情况
ax = scatter(actV, layerNumCode2, actX, cmap='summer', value=False, zero_start=False)
ax.set_title('第%d层对输入空间激活后编码情况' % (l_num))
# 第k层对原始特征空间的总体划分(叠加?)
ax = scatter(p_0, layerNumCode, X, value=False,zero_start=False,cmap='coolwarm')
ax.set_title('第%d层对原始特征空间的总体划分' % (l_num))
# 前k层对原始特征空间胞腔分解情况(叠加?)
ax = scatter(p_0, layersNumCode, X, value=False, zero_start=False,cmap=cmap)
ax.set_title('前%d层对原始特征空间的总体划分' % (l_num))
inX = actX
inV = actV
plt.show();
D:\anaconda\lib\site-packages\ipykernel_launcher.py:5: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
"""