随机森林
吕**
[3]:
%pylab inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
from matplotlib.colors import ListedColormap
from skimage import io,data
from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
import numpy.ma as ma
import os, shutil
Populating the interactive namespace from numpy and matplotlib
d:\Users\Lenovo\Anaconda3\lib\site-packages\IPython\core\magics\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['imread']
`%matplotlib` prevents importing * from pylab and numpy
"\n`%matplotlib` prevents importing * from pylab and numpy"
[4]:
import numpy as np
from skimage import io,data
from skimage.io import imread
img = imread('E:\Hyperspectral_Project\dc.tif')
roi = io.imread('E:\Hyperspectral_Project\protest.tif')
img = np.transpose(img,(1,2,0))#(1280, 307,191)
labels = np.unique(roi[roi > 0])
X =img.reshape(392960,191)
t =img.reshape(392960,191)
X = X[:,1:30]
t = t[:,1:30]
Y=roi[:,:,0]
np.unique(Y)
Y=Y.ravel()
print(Y.shape)
print(X.shape)
X = X[Y>0,:]
Y = Y[Y>0]
np.unique(Y)
(392960,)
(392960, 29)
[4]:
array([ 2, 3, 51, 102, 153, 204, 255], dtype=int16)
[3]:
X_train, X_test, y_train, y_test = train_test_split(
X,
Y,
train_size=0.75,
random_state= 42,
stratify=Y)
[4]:
X_train, X_valid, y_train, y_valid = train_test_split(
X_train,
y_train,
train_size=0.66,
random_state= 0,
stratify=y_train)
模型参数调试,创建最佳分类器
[5]:
from sklearn.ensemble import RandomForestClassifier
OOB=np.zeros(20)
f=open(r"E:\output.txt",'w')
for i in range(100,120,1):
model = RandomForestClassifier(n_estimators=i, random_state=0,oob_score=True)
model.fit(X_train,y_train)
f.write('-----------------------------------\n')
f.write("TreesNumber: %d"%(i))
f.write('\n');
OOBerror=1-model.oob_score_
f.write("OOB error: %.6f"%OOBerror)
f.write('\n');
f.write('-------------------------\n')
OOB[i-100]=OOBerror
print(OOBerror)
f.close()
minError=np.min(OOB)
ind=np.where(OOB==minError)
trees=ind[0][0]
rfc=RandomForestClassifier(n_estimators=trees+1,max_features=3,oob_score=True,warm_start=True)
rfc.fit(X_train,y_train)
0.0231707317073
0.0227642276423
0.0233739837398
0.0231707317073
0.0231707317073
0.0231707317073
0.0229674796748
0.0229674796748
0.0229674796748
0.0231707317073
0.0229674796748
0.0233739837398
0.0235772357724
0.0233739837398
0.0235772357724
0.0233739837398
0.0229674796748
0.0229674796748
0.0227642276423
0.0227642276423
d:\Users\Lenovo\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:439: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.
warn("Some inputs do not have OOB scores. "
d:\Users\Lenovo\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:444: RuntimeWarning: invalid value encountered in true_divide
predictions[k].sum(axis=1)[:, np.newaxis])
[5]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features=3, max_leaf_nodes=None,
min_impurity_split=1e-07, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=2, n_jobs=1, oob_score=True, random_state=None,
verbose=0, warm_start=True)
混淆矩阵
[6]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
mat = confusion_matrix(y_test, y_model)
sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)
plt.xlabel('predicted value')
plt.ylabel('true value');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-12f02ec26980> in <module>()
2 import matplotlib.pyplot as plt
3 import seaborn as sns
----> 4 mat = confusion_matrix(y_test, y_model)
5
6 sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)
NameError: name 'y_model' is not defined
分类精度
[ ]:
model.score(X_train, y_train)
model.score(X_valid, y_valid)
y_model = model.predict(X_test)
accuracy_score(y_test, y_model)
[ ]:
#对图像进行分类
img_class=model.predict(t)
img_class=img_class.reshape(1280,307)
#绘制 plt.imshow(img_class)
#保存
filename=r'E:\Hyperspectral_Project'
io.imsave(filename+os.path.sep+"1.tif",img_class)