随机森林

吕**

[3]:

%pylab inline

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
from matplotlib.colors import ListedColormap
from skimage import io,data

from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.externals import joblib
from sklearn.model_selection  import train_test_split
import numpy.ma as ma
import os, shutil

Populating the interactive namespace from numpy and matplotlib

d:\Users\Lenovo\Anaconda3\lib\site-packages\IPython\core\magics\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['imread']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

读取数据

[4]:

import numpy as np
from skimage import io,data
from skimage.io import imread
img = imread('E:\Hyperspectral_Project\dc.tif')
roi = io.imread('E:\Hyperspectral_Project\protest.tif')

img = np.transpose(img,(1,2,0))#(1280, 307,191)
labels = np.unique(roi[roi > 0])
X =img.reshape(392960,191)
t =img.reshape(392960,191)

X = X[:,1:30]
t = t[:,1:30]

Y=roi[:,:,0]
np.unique(Y)

Y=Y.ravel()

print(Y.shape)
print(X.shape)

X = X[Y>0,:]
Y = Y[Y>0]

np.unique(Y)

(392960,)
(392960, 29)

[4]:

array([  2,   3,  51, 102, 153, 204, 255], dtype=int16)

from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from ipywidgets import interact,interact_manual创建训练集

[3]:

X_train, X_test, y_train, y_test = train_test_split(
        X,
        Y,
        train_size=0.75,
        random_state= 42,
        stratify=Y)

[4]:

X_train, X_valid, y_train, y_valid = train_test_split(
        X_train,
        y_train,
        train_size=0.66,
        random_state= 0,
        stratify=y_train)

模型参数调试，创建最佳分类器

[5]:

from sklearn.ensemble import RandomForestClassifier
OOB=np.zeros(20)
f=open(r"E:\output.txt",'w')
for i in range(100,120,1):
    model = RandomForestClassifier(n_estimators=i, random_state=0,oob_score=True)
    model.fit(X_train,y_train)

    f.write('-----------------------------------\n')
    f.write("TreesNumber: %d"%(i))
    f.write('\n');
    OOBerror=1-model.oob_score_
    f.write("OOB error: %.6f"%OOBerror)
    f.write('\n');
    f.write('-------------------------\n')
    OOB[i-100]=OOBerror
    print(OOBerror)

f.close()
minError=np.min(OOB)
ind=np.where(OOB==minError)
trees=ind[0][0]
rfc=RandomForestClassifier(n_estimators=trees+1,max_features=3,oob_score=True,warm_start=True)
rfc.fit(X_train,y_train)

0.0231707317073
0.0227642276423
0.0233739837398
0.0231707317073
0.0231707317073
0.0231707317073
0.0229674796748
0.0229674796748
0.0229674796748
0.0231707317073
0.0229674796748
0.0233739837398
0.0235772357724
0.0233739837398
0.0235772357724
0.0233739837398
0.0229674796748
0.0229674796748
0.0227642276423
0.0227642276423

d:\Users\Lenovo\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:439: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.
  warn("Some inputs do not have OOB scores. "
d:\Users\Lenovo\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:444: RuntimeWarning: invalid value encountered in true_divide
  predictions[k].sum(axis=1)[:, np.newaxis])

[5]:

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features=3, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=2, n_jobs=1, oob_score=True, random_state=None,
            verbose=0, warm_start=True)

y_model=model.predict(X_test)

混淆矩阵

[6]:

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
mat = confusion_matrix(y_test, y_model)

sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)
plt.xlabel('predicted value')
plt.ylabel('true value');

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-12f02ec26980> in <module>()
      2 import matplotlib.pyplot as plt
      3 import seaborn as sns
----> 4 mat = confusion_matrix(y_test, y_model)
      5
      6 sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)

NameError: name 'y_model' is not defined

分类精度

[ ]:

model.score(X_train, y_train)
model.score(X_valid, y_valid)
y_model = model.predict(X_test)
accuracy_score(y_test, y_model)

[ ]:

#对图像进行分类
img_class=model.predict(t)
img_class=img_class.reshape(1280,307)
#绘制  plt.imshow(img_class)
#保存
filename=r'E:\Hyperspectral_Project'
io.imsave(filename+os.path.sep+"1.tif",img_class)