6.3.1 决策树和随机森林

[1]:

from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')
img.shape

[1]:

(191, 1280, 307)

[2]:

import numpy as np
from skimage.exposure import rescale_intensity

#选取第60、27和17波段，组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])

#对图像进行对比度拉伸，放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))

img_rescale = rescale_intensity(img_RGB,
                                in_range=(p_10, p_90),
                                out_range=(0, 255))

img_rescale = img_rescale.astype('uint8')
imsave('hyperspectral_img_RGB.png', img_rescale)

[3]:

from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')

import numpy as np
from skimage.exposure import rescale_intensity
#选取第60、27和17波段，组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])
#对图像进行对比度拉伸，放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))
img_rescale = rescale_intensity(img_RGB,
                                in_range=(p_10, p_90),
                                out_range=(0, 255))
img_rescale = img_rescale.astype('uint8')

[4]:

def get_label(file_path):
    with open(file_path, 'r') as label_file:
        label_data = label_file.readlines()[13:]
    label_list = []
    region_list = []
    for i in range(len(label_data) // 3):
        #解析文件，获得标注区域的标签、左上角像素坐标和右下角像素坐标
        label = label_data[i * 3].split('\t')[4]
        r_1, c_1 = label_data[i * 3 + 1][:-1].split('\t')[1:]
        r_2, c_2 = label_data[i * 3 + 2][:-1].split('\t')[1:]
        label_list.append(int(label)-1)
        region_list.append(
            (int(r_1) - 1, int(c_1) - 1, int(r_2), int(c_2)))
    return label_list, region_list

[13]:

img_file_path = 'Hyperspectral_Project/dc.tif'
label_file_path = 'Hyperspectral_Project/dctest.project'
X,y,pixel_position = load_hyperspectral_data(img_file_path,label_file_path)

[49]:

NDVI = (NIR-R)/(NIR+R)
MNDWI= (G-MIR)/(G+MIR)
NDBI = (MIR-NIR)/(MIR+NIR)

[75]:

input_x = np.concatenate(
    [NDVI.reshape((-1, 1)),
     MNDWI.reshape((-1, 1)),
     NDBI.reshape((-1, 1))],
    axis=1)
input_y = [labels[item] for item in y]

[76]:

input_x.shape

[76]:

(8079, 3)

[80]:

import pandas as pd
import seaborn as sns
input_pd = pd.DataFrame(input_x,columns=['NDVI','MNDWI','NDBI'])
input_pd['Categories']= input_y

[313]:

def plot_svc_decision_function(model, ax=None, plot_support=True):
    """Plot the decision function for a 2D SVC"""
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)

    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])

    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=100, linewidth=1, edgecolors='k' ,facecolors='none');
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

[314]:

def select_categories_visulization(selected_categories):

    selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                           (input_pd['Categories'] == selected_categories[1])]
    model = SVC(kernel='linear', C=1e10)



    features = selected_data.drop(['Categories','NDBI'],axis=1)
    target = selected_data['Categories']
    model.fit(features, target)

    sns.relplot(data=selected_data,
                x='NDVI',
                y='MNDWI',
                hue='Categories',)
    plot_svc_decision_function(model)

[231]:

def select_multiple_categories_visulization(selected_categories):

    selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
                                          regex=True)
    selected_data = input_pd[selected_data_index]

    model = SVC(kernel='linear', C=1e10)

    features = selected_data.drop(['Categories', 'NDBI'], axis=1)
    target = selected_data['Categories']
    model.fit(features, target)

    sns.relplot(
        data=selected_data,
        x='NDVI',
        y='MNDWI',
        hue='Categories',
    )
    plot_svc_decision_function(model)

[331]:

selected_categories = ['Roofs', 'Path']
selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
                                          regex=True)
selected_data = input_pd[selected_data_index]

X = selected_data.drop(['Categories', 'NDBI'], axis=1)
y = selected_data['Categories']

[613]:

from matplotlib import cm


cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True)

def plot_mlp_decision_function(model, ax=None):
    """Plot the decision function"""
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    x = np.linspace(xlim[0], xlim[1], 200)
    y = np.linspace(ylim[0], ylim[1], 200)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T

    if hasattr(model, "decision_function"):
        Z = model.decision_function(xy)
    else:
        Z = model.predict_proba(xy)[:, 1]

    Z = Z.reshape(X.shape)

    norm = cm.colors.Normalize(vmax=1, vmin=0)


    levels = np.arange(0,1.01,0.1)

    cset1 = ax.contourf(X, Y, Z, levels,
                        norm=norm,
                        alpha=.9,
                        zorder = -2,
                        cmap=cm.get_cmap(cmap, len(levels) - 1))

    cset2 = ax.contour(X, Y, Z, cset1.levels,linewidths=0.5,
                       alpha=.5,
                       colors='k',
                      zorder = -1)

    for c in cset2.collections:
        c.set_linestyle('--')

    cset3 = ax.contour(X, Y, Z, (0.5,), colors='k', alpha=.8,linewidths=1.5,zorder = -1)
    plt.colorbar(cset1,
                 orientation='horizontal',
                 fraction=0.05,
                 ax=ax)

    ax.set_xlim(xlim)

[ ]:

def visualize_classifier(model, X, y, ax=None, cmap='rainbow'):
    ax = ax or plt.gca()


    ax.axis('tight')
    ax.axis('off')
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # fit the estimator
    model.fit(X, y)
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    # Create a color plot with the results
    n_classes = len(np.unique(y))
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                           levels=np.arange(n_classes + 1) - 0.5,
                           cmap=cmap,
                           #clim=(y.min(), y.max()),
                           zorder=1)

    ax.set(xlim=xlim, ylim=ylim)

[ ]:

def visualize_tree(estimator, X, y, boundaries=True,
                   xlim=None, ylim=None, ax=None):
    ax = ax or plt.gca()

    # Plot the training points
    ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap='viridis',
               clim=(y.min(), y.max()), zorder=3)
    ax.axis('tight')
    ax.axis('off')
    if xlim is None:
        xlim = ax.get_xlim()
    if ylim is None:
        ylim = ax.get_ylim()

    # fit the estimator
    estimator.fit(X, y)
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
    Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    n_classes = len(np.unique(y))
    Z = Z.reshape(xx.shape)
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                           levels=np.arange(n_classes + 1) - 0.5,
                           cmap='viridis',
                           #clim=(y.min(), y.max()),
                           zorder=1)

    ax.set(xlim=xlim, ylim=ylim)

    # Plot the decision boundaries
    def plot_boundaries(i, xlim, ylim):
        if i >= 0:
            tree = estimator.tree_

            if tree.feature[i] == 0:
                ax.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k', zorder=2)
                plot_boundaries(tree.children_left[i],
                                [xlim[0], tree.threshold[i]], ylim)
                plot_boundaries(tree.children_right[i],
                                [tree.threshold[i], xlim[1]], ylim)

            elif tree.feature[i] == 1:
                ax.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k', zorder=2)
                plot_boundaries(tree.children_left[i], xlim,
                                [ylim[0], tree.threshold[i]])
                plot_boundaries(tree.children_right[i], xlim,
                                [tree.threshold[i], ylim[1]])

    if boundaries:
        plot_boundaries(0, xlim, ylim)

[616]:

selected_categories = ['Street', 'Path']

from sklearn.tree import DecisionTreeClassifier

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()

features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)

sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_17_0.png

[615]:

selected_categories = ['Shadow','Water']

from sklearn.tree import DecisionTreeClassifier

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()

features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)

sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_18_0.png

[614]:

from sklearn.neural_network import MLPClassifier

selected_categories = ['Grass','Trees']

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = DecisionTreeClassifier()

features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)

sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_19_0.png

[617]:

selected_categories = ['Roofs', 'Path']

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()



features = selected_data.drop(['Categories','NDVI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            y='NDBI',
            x='MNDWI',
            hue='Categories',s=10)
plt.xlim(0.65,0.80)
plt.ylim(-0.74,-0.66)
ax = plt.gca()
ax.set_aspect(1.5)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_20_0.png

[624]:

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=1000, random_state=0)

selected_categories = ['Street', 'Path']

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)

sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_21_0.png

[625]:

selected_categories = ['Shadow','Water']

cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True)

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = RandomForestClassifier(n_estimators=1000, random_state=0)

features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)

sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_22_0.png

[626]:

selected_categories = ['Roofs', 'Path']

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]
model = RandomForestClassifier(n_estimators=1000, random_state=0)



features = selected_data.drop(['Categories','NDVI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            y='NDBI',
            x='MNDWI',
            hue='Categories',s=10)
plt.xlim(0.65,0.80)
plt.ylim(-0.74,-0.66)
ax = plt.gca()
ax.set_aspect(1.5)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_23_0.png

[629]:

selected_categories = ['Grass','Trees']

cmap = sns.diverging_palette(20,200,sep=20,as_cmap=True)

selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
                       (input_pd['Categories'] == selected_categories[1])]

model = RandomForestClassifier(n_estimators=1000, random_state=0)



features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)


sns.relplot(data=selected_data,
            x='NDVI',
            y='MNDWI',
            hue='Categories',s=10)

plt.grid(b=False)

plot_mlp_decision_function(model)

plt.savefig(
    f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
    bbox_inches='tight')

../../../_images/1stPart_Chapter6.ImageClustering_PPT-code_random-forest_24_0.png