Adaboost集成算法
高**
[10]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.ensemble as ada
import skimage.io as imr
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
def get_raw_data():
raw_data = imr.imread('dc.tif')
return raw_data
raw_data = get_raw_data()
file = open('label.txt')
f = file.read()
short = f.split('\n')
l = []
for s in short:
s1 = s.split('\t')
l.append(s1)
num = int(len(l)/3)
arr = np.zeros(5*num).reshape(num,5)
for i in range(num):
arr[i][0] = int(l[i*3][4])
arr[i][1] = int(l[i*3+1][1])
arr[i][2] = int(l[i*3+1][2])
arr[i][3] = int(l[i*3+2][1])
arr[i][4] = int(l[i*3+2][2])
arr = arr.astype(np.int16)
i = 1
n = 0
xy_has_label = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[],'7':[]}
for n in range(arr.shape[0]):
c = [(x-1,y-1) for x in range(arr[n][1],arr[n][3]+1) for y in range(arr[n][2],arr[n][4]+1)]
for t in c:
xy_has_label[str(i)].append(t)
if n+1<(arr.shape[0]):
i = arr[n+1][0]
for i in xy_has_label:
xy_has_label[i] = np.asarray(xy_has_label[i])
def seg_data(xy_of_data,seg_size=0.5):
sub_xy_has_label_train = {}
sub_xy_has_label_test = {}
for i in xy_of_data:
l = len(xy_of_data[i])
perm = np.arange(l)
np.random.shuffle(perm)
sub_l = int(l*(1 - seg_size))
temp_arr = xy_of_data[i][perm]
sub_xy_has_label_train[i] = temp_arr[:sub_l]
sub_xy_has_label_test[i] = temp_arr[sub_l:]
return sub_xy_has_label_train, sub_xy_has_label_test
sub_xy_has_label_train, sub_xy_has_label_test = seg_data(xy_has_label)
DIM = 191
def get_data(sub_xy_has_label,raw_data):
data = raw_data[0:,0,0].reshape(1,DIM)
l = np.zeros(1).reshape(1,)
for i in sub_xy_has_label:
row = sub_xy_has_label[i].shape[0]
t = np.ones(row).reshape(row,)
t = t*int(i)
l = np.concatenate((l,t),axis = 0)
for (x,y) in sub_xy_has_label[i]:
data = np.concatenate((data,raw_data[0:,x,y].reshape(1,DIM)),axis = 0)
data = np.delete(data,0,0)
l = np.delete(l,0,0)
l = l.astype(np.int16)
return data, l
train_data, train_l = get_data(sub_xy_has_label_train,raw_data)
test_data, test_l = get_data(sub_xy_has_label_test,raw_data)
def next_batch(im_a,l_a):
perm = np.arange(len(l_a))
np.random.shuffle(perm)
train_size = len(l_a)*0.8
train = im_a[perm]
l_train = l_a[perm]
return train[0:train_size],l_train[0:train_size],train[train_size:],l_train[train_size:]
data_1, data_2 = seg_data(xy_has_label,0)
data_0, l_0 = get_data(data_1,raw_data)
[14]:
from sklearn.decomposition import PCA
d_reduce = PCA(n_components=3).fit_transform(data_0)
print(d_reduce.shape,data.s)
(8079, 3)
[ ]:
classifier_tree = ada.AdaBoostClassifier(DecisionTreeClassifier(max_depth = 10),algorithm="SAMME",n_estimators=200)
print(cross_val_score(classifier_tree,data_0,l_0,cv=10))
[11]:
classifier_tree = ada.AdaBoostClassifier(DecisionTreeClassifier(max_depth = 10),algorithm="SAMME",n_estimators=200)
classifier_tree.fit(train_data,train_l)
z = classifier_tree.predict(test_data)
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, float('%.2f'%cm[i, j]),
horizontalalignment="center",
color="red" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
cnf_m = confusion_matrix(test_l,z)
np.set_printoptions(precision=2)
class_names = [1,2,3,4,5,6,7]
plot_confusion_matrix(cnf_m, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_m, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
Confusion matrix, without normalization
[[1914 2 0 1 0 0 0]
[ 8 195 0 0 0 1 4]
[ 1 0 87 0 0 0 0]
[ 1 1 0 962 0 0 0]
[ 1 0 0 0 202 0 0]
[ 0 0 0 0 0 610 2]
[ 1 1 0 0 0 1 46]]
Normalized confusion matrix
[[ 9.98e-01 1.04e-03 0.00e+00 5.22e-04 0.00e+00 0.00e+00
0.00e+00]
[ 3.85e-02 9.38e-01 0.00e+00 0.00e+00 0.00e+00 4.81e-03
1.92e-02]
[ 1.14e-02 0.00e+00 9.89e-01 0.00e+00 0.00e+00 0.00e+00
0.00e+00]
[ 1.04e-03 1.04e-03 0.00e+00 9.98e-01 0.00e+00 0.00e+00
0.00e+00]
[ 4.93e-03 0.00e+00 0.00e+00 0.00e+00 9.95e-01 0.00e+00
0.00e+00]
[ 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00 9.97e-01
3.27e-03]
[ 2.04e-02 2.04e-02 0.00e+00 0.00e+00 0.00e+00 2.04e-02
9.39e-01]]
[12]:
classifier_tree1 = ada.AdaBoostClassifier(DecisionTreeClassifier(max_depth = 10),algorithm="SAMME",n_estimators=200)
classifier_tree1.fit(data_0,l_0)
classification = np.zeros(1280*307).reshape(1280,307)
c = raw_data.swapaxes(0,2)
c = c.swapaxes(0,1)
c = c.reshape(1280*307,191)
pre = classifier_tree1.predict(c)
classification = pre.reshape(1280,307)
[13]:
plt.imshow(classification)
fig = plt.gcf()
fig.savefig('test.png',dpi=300)
plt.show()
[ ]: