6.3.1 决策树和随机森林

[1]:
from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')
img.shape
[1]:
(191, 1280, 307)
[2]:
import numpy as np
from skimage.exposure import rescale_intensity

#选取第60、27和17波段,组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])

#对图像进行对比度拉伸,放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))

img_rescale = rescale_intensity(img_RGB,
                                in_range=(p_10, p_90),
                                out_range=(0, 255))

img_rescale = img_rescale.astype('uint8')
imsave('hyperspectral_img_RGB.png', img_rescale)
[3]:
from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')

import numpy as np
from skimage.exposure import rescale_intensity
#选取第60、27和17波段,组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])
#对图像进行对比度拉伸,放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))
img_rescale = rescale_intensity(img_RGB,
                                in_range=(p_10, p_90),
                                out_range=(0, 255))
img_rescale = img_rescale.astype('uint8')
[4]:
def get_label(file_path):
    with open(file_path, 'r') as label_file:
        label_data = label_file.readlines()[13:]
    label_list = []
    region_list = []
    for i in range(len(label_data) // 3):
        #解析文件,获得标注区域的标签、左上角像素坐标和右下角像素坐标
        label = label_data[i * 3].split('\t')[4]
        r_1, c_1 = label_data[i * 3 + 1][:-1].split('\t')[1:]
        r_2, c_2 = label_data[i * 3 + 2][:-1].split('\t')[1:]
        label_list.append(int(label)-1)
        region_list.append(
            (int(r_1) - 1, int(c_1) - 1, int(r_2), int(c_2)))
    return label_list, region_list
[13]:
img_file_path = 'Hyperspectral_Project/dc.tif'
label_file_path = 'Hyperspectral_Project/dctest.project'
X,y,pixel_position = load_hyperspectral_data(img_file_path,label_file_path)
[49]:
NDVI = (NIR-R)/(NIR+R)
MNDWI= (G-MIR)/(G+MIR)
NDBI = (MIR-NIR)/(MIR+NIR)

[75]:
input_x = np.concatenate(
    [NDVI.reshape((-1, 1)),
     MNDWI.reshape((-1, 1)),
     NDBI.reshape((-1, 1))],
    axis=1)
input_y = [labels[item] for item in y]
[76]:
input_x.shape
[76]:
(8079, 3)
[80]:
import pandas as pd
import seaborn as sns
input_pd = pd.DataFrame(input_x,columns=['NDVI','MNDWI','NDBI'])
input_pd['Categories']= input_y
[313]:
def plot_svc_decision_function(model, ax=None, plot_support=True):
    """Plot the decision function for a 2D SVC"""
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)

    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])

    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=100, linewidth=1, edgecolors='k' ,facecolors='none');
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
[314]:
def select_categories_visulization(selected_categories):

    selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                           (input_pd['Categories'] == selected_categories[1])]
    model = SVC(kernel='linear', C=1e10)



    features = selected_data.drop(['Categories','NDBI'],axis=1)
    target = selected_data['Categories']
    model.fit(features, target)

    sns.relplot(data=selected_data,
                x='NDVI',
                y='MNDWI',
                hue='Categories',)
    plot_svc_decision_function(model)
[231]:
def select_multiple_categories_visulization(selected_categories):

    selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
                                          regex=True)
    selected_data = input_pd[selected_data_index]

    model = SVC(kernel='linear', C=1e10)

    features = selected_data.drop(['Categories', 'NDBI'], axis=1)
    target = selected_data['Categories']
    model.fit(features, target)

    sns.relplot(
        data=selected_data,
        x='NDVI',
        y='MNDWI',
        hue='Categories',
    )
    plot_svc_decision_function(model)
[331]:
selected_categories = ['Roofs', 'Path']
selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
                                          regex=True)
selected_data = input_pd[selected_data_index]

X = selected_data.drop(['Categories', 'NDBI'], axis=1)
y = selected_data['Categories']
[613]:

from matplotlib import cm cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True) def plot_mlp_decision_function(model, ax=None): """Plot the decision function""" if ax is None: ax = plt.gca() xlim = ax.get_xlim() ylim = ax.get_ylim() x = np.linspace(xlim[0], xlim[1], 200) y = np.linspace(ylim[0], ylim[1], 200) Y, X = np.meshgrid(y, x) xy = np.vstack([X.ravel(), Y.ravel()]).T if hasattr(model, "decision_function"): Z = model.decision_function(xy) else: Z = model.predict_proba(xy)[:, 1] Z = Z.reshape(X.shape) norm = cm.colors.Normalize(vmax=1, vmin=0) levels = np.arange(0,1.01,0.1) cset1 = ax.contourf(X, Y, Z, levels, norm=norm, alpha=.9, zorder = -2, cmap=cm.get_cmap(cmap, len(levels) - 1)) cset2 = ax.contour(X, Y, Z, cset1.levels,linewidths=0.5, alpha=.5, colors='k', zorder = -1) for c in cset2.collections: c.set_linestyle('--') cset3 = ax.contour(X, Y, Z, (0.5,), colors='k', alpha=.8,linewidths=1.5,zorder = -1) plt.colorbar(cset1, orientation='horizontal', fraction=0.05, ax=ax) ax.set_xlim(xlim)
[ ]:
def visualize_classifier(model, X, y, ax=None, cmap='rainbow'):
    ax = ax or plt.gca()


    ax.axis('tight')
    ax.axis('off')
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # fit the estimator
    model.fit(X, y)
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    # Create a color plot with the results
    n_classes = len(np.unique(y))
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                           levels=np.arange(n_classes + 1) - 0.5,
                           cmap=cmap,
                           #clim=(y.min(), y.max()),
                           zorder=1)

    ax.set(xlim=xlim, ylim=ylim)
[ ]:
def visualize_tree(estimator, X, y, boundaries=True,
                   xlim=None, ylim=None, ax=None):
    ax = ax or plt.gca()

    # Plot the training points
    ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap='viridis',
               clim=(y.min(), y.max()), zorder=3)
    ax.axis('tight')
    ax.axis('off')
    if xlim is None:
        xlim = ax.get_xlim()
    if ylim is None:
        ylim = ax.get_ylim()

    # fit the estimator
    estimator.fit(X, y)
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
    Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    n_classes = len(np.unique(y))
    Z = Z.reshape(xx.shape)
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                           levels=np.arange(n_classes + 1) - 0.5,
                           cmap='viridis',
                           #clim=(y.min(), y.max()),
                           zorder=1)

    ax.set(xlim=xlim, ylim=ylim)

    # Plot the decision boundaries
    def plot_boundaries(i, xlim, ylim):
        if i >= 0:
            tree = estimator.tree_

            if tree.feature[i] == 0:
                ax.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k', zorder=2)
                plot_boundaries(tree.children_left[i],
                                [xlim[0], tree.threshold[i]], ylim)
                plot_boundaries(tree.children_right[i],
                                [tree.threshold[i], xlim[1]], ylim)

            elif tree.feature[i] == 1:
                ax.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k', zorder=2)
                plot_boundaries(tree.children_left[i], xlim,
                                [ylim[0], tree.threshold[i]])
                plot_boundaries(tree.children_right[i], xlim,
                                [tree.threshold[i], ylim[1]])

    if boundaries:
        plot_boundaries(0, xlim, ylim)
[616]:
selected_categories = ['Street', 'Path']

from sklearn.tree import DecisionTreeClassifier



selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()



features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_17_0.png
[615]:
selected_categories = ['Shadow','Water']

from sklearn.tree import DecisionTreeClassifier



selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()



features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_18_0.png
[614]:
from sklearn.neural_network import MLPClassifier

selected_categories = ['Grass','Trees']


selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()



features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_19_0.png
[617]:
selected_categories = ['Roofs', 'Path']

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()



features = selected_data.drop(['Categories','NDVI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            y='NDBI',
            x='MNDWI',
            hue='Categories',s=10)
plt.xlim(0.65,0.80)
plt.ylim(-0.74,-0.66)
ax = plt.gca()
ax.set_aspect(1.5)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_20_0.png
[624]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=1000, random_state=0)

selected_categories = ['Street', 'Path']


selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]




features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_21_0.png
[625]:
selected_categories = ['Shadow','Water']

cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True)


selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = RandomForestClassifier(n_estimators=1000, random_state=0)



features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_22_0.png
[626]:


selected_categories = ['Roofs', 'Path'] selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) | (input_pd['Categories'] == selected_categories[1])] model = RandomForestClassifier(n_estimators=1000, random_state=0) features = selected_data.drop(['Categories','NDVI'],axis=1) target = selected_data['Categories'] model.fit(features, target) sns.relplot(data=selected_data, y='NDBI', x='MNDWI', hue='Categories',s=10) plt.xlim(0.65,0.80) plt.ylim(-0.74,-0.66) ax = plt.gca() ax.set_aspect(1.5) plt.grid(b=False) plot_mlp_decision_function(model) plt.savefig( f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf', bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_23_0.png
[629]:


selected_categories = ['Grass','Trees'] cmap = sns.diverging_palette(20,200,sep=20,as_cmap=True) selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) | (input_pd['Categories'] == selected_categories[1])] model = RandomForestClassifier(n_estimators=1000, random_state=0) features = selected_data.drop(['Categories','NDBI'],axis=1) target = selected_data['Categories'] model.fit(features, target) sns.relplot(data=selected_data, x='NDVI', y='MNDWI', hue='Categories',s=10) plt.grid(b=False) plot_mlp_decision_function(model) plt.savefig( f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf', bbox_inches='tight')
../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_24_0.png