随机森林可视化

张**

[1]:
%pylab inline
%config InlineBackend.figure_format = 'svg'
%matplotlib inline
import warnings
warnings.simplefilter("ignore")
import matplotlib.pyplot as plt

import numpy as np
from skimage import io
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
Populating the interactive namespace from numpy and matplotlib
[2]:
mms = MinMaxScaler();
#原始数据转为BIP格式后,读取影像数据,并reshape为(nsamples,nfeatures)
hiperSpectral = io.imread('dcBIP.tif');
hang = hiperSpectral.shape[0];
lie = hiperSpectral.shape[1];  NumberBands = hiperSpectral.shape[2];
AllXdata = hiperSpectral.reshape(hang*lie,NumberBands);
AllxNormalization = mms.fit_transform( AllXdata );
#样本矩阵的读取
Ylabel = sio.loadmat('allSampe.mat')['S'];
YLabelOnedime = Ylabel.reshape(hang*lie);

index=np.where(YLabelOnedime > 0)[0];
Ylabelsample = YLabelOnedime[index];

Xdatasamples = AllxNormalization[index,:];
[3]:
from ipywidgets import interact
import ipywidgets as widgets
#@interact_manual
def plot_RFtree_interactive(numberTree,testsize,Nodesplit,maxDepth):
    X_train,X_test,y_train,y_test = train_test_split (Xdatasamples,Ylabelsample,test_size = testsize,random_state=2,stratify = Ylabelsample);
    clfRF = RandomForestClassifier(n_estimators = numberTree, criterion = Nodesplit,max_depth = maxDepth);

    tempModel = clfRF.fit(X_train, y_train);
    featureImport = tempModel.feature_importances_;
    plt.figure(figsize=(12,6), dpi=100);
    plt.subplot(211);    plt.grid();
    plt.xlabel("Index of features");       afterSorted = sorted( list(featureImport), reverse=True );
    plt.ylabel("feature_importances");   TopValue = afterSorted[0:10];  TopIndex = [ list(featureImport).index(x)+1  for x in TopValue ];
    plt.scatter(range(1,NumberBands+1), featureImport,s=3,c='r');
    plt.show();
    y_model = tempModel.predict(X_test);
    testscore = accuracy_score(y_test, y_model);
    print('训练集的样本数:',len(y_train),'        ','测试集的样本数:',len(y_test),'        ','测试精度:',testscore);
    print('featureImportance的TOP10:',TopValue );   print('featureImportance的TOP10对应的波段:',TopIndex);

interact(plot_RFtree_interactive,numberTree = widgets.IntSlider(min=1,max=20,step=1,value=5,description='numTree'),
         testsize= widgets.FloatSlider(min=0.1,max=0.6,step=0.05,value=0.1,description='testRatio'),Nodesplit=['gini','entropy'],
          maxDepth = widgets.IntSlider(min=1,max=20,step=1,value=1,description='maxDepth')   );

[4]:
#@interact_manual
def plot_RFtree_interactivewithPCA(numberTree,testsize,Nodesplit,maxDepth,nuberCON):
    pcaUse = PCA(n_components = nuberCON).fit(AllxNormalization);
    XafterTransform = pcaUse.transform(AllxNormalization);
    XdatasamplesPCA = XafterTransform[index,:];

    X_train,X_test,y_train,y_test = train_test_split (XdatasamplesPCA,Ylabelsample,
                                                      test_size = testsize,random_state=2,stratify = Ylabelsample);
    clfRF = RandomForestClassifier(n_estimators = numberTree, criterion = Nodesplit,max_depth = maxDepth);

    tempModel = clfRF.fit(X_train, y_train);
    featureImport = tempModel.feature_importances_;
    plt.figure(figsize=(12,6), dpi=100);
    plt.subplot(211);    plt.grid();
    plt.xlabel("Index of features");       afterSorted = sorted( list(featureImport), reverse=True );
    plt.ylabel("feature_importances");   TopValue = afterSorted[0:10];  TopIndex = [ list(featureImport).index(x)+1  for x in TopValue ];
    plt.scatter(range(1,nuberCON+1), featureImport,s=3,c='r');
    plt.show();
    y_model = tempModel.predict(X_test);
    testscore = accuracy_score(y_test, y_model);
    print('主成分个数:',nuberCON,'    训练集的样本数:',len(y_train),'      ','测试集的样本数:',len(y_test),'     ','测试精度:',testscore);
    print('featureImportance的TOP10:',TopValue );   print('featureImportance的TOP10对应的波段:',TopIndex);

interact(plot_RFtree_interactivewithPCA,numberTree = widgets.IntSlider(min=1,max=20,step=1,value=5,description='numTree'),
         testsize= widgets.FloatSlider(min=0.1,max=0.6,step=0.05,value=0.1,description='testRatio'),Nodesplit=['gini','entropy'],
        maxDepth = widgets.IntSlider(min=1,max=20,step=1,value=5,description='maxDepth'),
        nuberCON = widgets.IntSlider(min=1,max=30,step=1,value=5,description='nuberCON') );