6.3.1 决策树和随机森林
[1]:
from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')
img.shape
[1]:
(191, 1280, 307)
[2]:
import numpy as np
from skimage.exposure import rescale_intensity
#选取第60、27和17波段,组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])
#对图像进行对比度拉伸,放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))
img_rescale = rescale_intensity(img_RGB,
in_range=(p_10, p_90),
out_range=(0, 255))
img_rescale = img_rescale.astype('uint8')
imsave('hyperspectral_img_RGB.png', img_rescale)
[3]:
from skimage.io import imread, imsave
img = imread('Hyperspectral_Project/dc.tif')
import numpy as np
from skimage.exposure import rescale_intensity
#选取第60、27和17波段,组合成RGB图像
img_RGB = np.dstack([img[59, :, :], img[26, :, :], img[16, :, :]])
#对图像进行对比度拉伸,放缩到0-255的数值范围
p_10, p_90 = np.percentile(img_RGB, (10, 90))
img_rescale = rescale_intensity(img_RGB,
in_range=(p_10, p_90),
out_range=(0, 255))
img_rescale = img_rescale.astype('uint8')
[4]:
def get_label(file_path):
with open(file_path, 'r') as label_file:
label_data = label_file.readlines()[13:]
label_list = []
region_list = []
for i in range(len(label_data) // 3):
#解析文件,获得标注区域的标签、左上角像素坐标和右下角像素坐标
label = label_data[i * 3].split('\t')[4]
r_1, c_1 = label_data[i * 3 + 1][:-1].split('\t')[1:]
r_2, c_2 = label_data[i * 3 + 2][:-1].split('\t')[1:]
label_list.append(int(label)-1)
region_list.append(
(int(r_1) - 1, int(c_1) - 1, int(r_2), int(c_2)))
return label_list, region_list
[13]:
img_file_path = 'Hyperspectral_Project/dc.tif'
label_file_path = 'Hyperspectral_Project/dctest.project'
X,y,pixel_position = load_hyperspectral_data(img_file_path,label_file_path)
[49]:
NDVI = (NIR-R)/(NIR+R)
MNDWI= (G-MIR)/(G+MIR)
NDBI = (MIR-NIR)/(MIR+NIR)
[75]:
input_x = np.concatenate(
[NDVI.reshape((-1, 1)),
MNDWI.reshape((-1, 1)),
NDBI.reshape((-1, 1))],
axis=1)
input_y = [labels[item] for item in y]
[76]:
input_x.shape
[76]:
(8079, 3)
[80]:
import pandas as pd
import seaborn as sns
input_pd = pd.DataFrame(input_x,columns=['NDVI','MNDWI','NDBI'])
input_pd['Categories']= input_y
[313]:
def plot_svc_decision_function(model, ax=None, plot_support=True):
"""Plot the decision function for a 2D SVC"""
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
x = np.linspace(xlim[0], xlim[1], 30)
y = np.linspace(ylim[0], ylim[1], 30)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
# plot decision boundary and margins
ax.contour(X, Y, P, colors='k',
levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors
if plot_support:
ax.scatter(model.support_vectors_[:, 0],
model.support_vectors_[:, 1],
s=100, linewidth=1, edgecolors='k' ,facecolors='none');
ax.set_xlim(xlim)
ax.set_ylim(ylim)
[314]:
def select_categories_visulization(selected_categories):
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = SVC(kernel='linear', C=1e10)
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',)
plot_svc_decision_function(model)
[231]:
def select_multiple_categories_visulization(selected_categories):
selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
regex=True)
selected_data = input_pd[selected_data_index]
model = SVC(kernel='linear', C=1e10)
features = selected_data.drop(['Categories', 'NDBI'], axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(
data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',
)
plot_svc_decision_function(model)
[331]:
selected_categories = ['Roofs', 'Path']
selected_data_index = input_pd['Categories'].str.contains('|'.join(selected_categories),
regex=True)
selected_data = input_pd[selected_data_index]
X = selected_data.drop(['Categories', 'NDBI'], axis=1)
y = selected_data['Categories']
[613]:
from matplotlib import cm
cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True)
def plot_mlp_decision_function(model, ax=None):
"""Plot the decision function"""
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0], xlim[1], 200)
y = np.linspace(ylim[0], ylim[1], 200)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
if hasattr(model, "decision_function"):
Z = model.decision_function(xy)
else:
Z = model.predict_proba(xy)[:, 1]
Z = Z.reshape(X.shape)
norm = cm.colors.Normalize(vmax=1, vmin=0)
levels = np.arange(0,1.01,0.1)
cset1 = ax.contourf(X, Y, Z, levels,
norm=norm,
alpha=.9,
zorder = -2,
cmap=cm.get_cmap(cmap, len(levels) - 1))
cset2 = ax.contour(X, Y, Z, cset1.levels,linewidths=0.5,
alpha=.5,
colors='k',
zorder = -1)
for c in cset2.collections:
c.set_linestyle('--')
cset3 = ax.contour(X, Y, Z, (0.5,), colors='k', alpha=.8,linewidths=1.5,zorder = -1)
plt.colorbar(cset1,
orientation='horizontal',
fraction=0.05,
ax=ax)
ax.set_xlim(xlim)
[ ]:
def visualize_classifier(model, X, y, ax=None, cmap='rainbow'):
ax = ax or plt.gca()
ax.axis('tight')
ax.axis('off')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# fit the estimator
model.fit(X, y)
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
np.linspace(*ylim, num=200))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
# Create a color plot with the results
n_classes = len(np.unique(y))
contours = ax.contourf(xx, yy, Z, alpha=0.3,
levels=np.arange(n_classes + 1) - 0.5,
cmap=cmap,
#clim=(y.min(), y.max()),
zorder=1)
ax.set(xlim=xlim, ylim=ylim)
[ ]:
def visualize_tree(estimator, X, y, boundaries=True,
xlim=None, ylim=None, ax=None):
ax = ax or plt.gca()
# Plot the training points
ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap='viridis',
clim=(y.min(), y.max()), zorder=3)
ax.axis('tight')
ax.axis('off')
if xlim is None:
xlim = ax.get_xlim()
if ylim is None:
ylim = ax.get_ylim()
# fit the estimator
estimator.fit(X, y)
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
np.linspace(*ylim, num=200))
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
n_classes = len(np.unique(y))
Z = Z.reshape(xx.shape)
contours = ax.contourf(xx, yy, Z, alpha=0.3,
levels=np.arange(n_classes + 1) - 0.5,
cmap='viridis',
#clim=(y.min(), y.max()),
zorder=1)
ax.set(xlim=xlim, ylim=ylim)
# Plot the decision boundaries
def plot_boundaries(i, xlim, ylim):
if i >= 0:
tree = estimator.tree_
if tree.feature[i] == 0:
ax.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k', zorder=2)
plot_boundaries(tree.children_left[i],
[xlim[0], tree.threshold[i]], ylim)
plot_boundaries(tree.children_right[i],
[tree.threshold[i], xlim[1]], ylim)
elif tree.feature[i] == 1:
ax.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k', zorder=2)
plot_boundaries(tree.children_left[i], xlim,
[ylim[0], tree.threshold[i]])
plot_boundaries(tree.children_right[i], xlim,
[tree.threshold[i], ylim[1]])
if boundaries:
plot_boundaries(0, xlim, ylim)
[616]:
selected_categories = ['Street', 'Path']
from sklearn.tree import DecisionTreeClassifier
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[615]:
selected_categories = ['Shadow','Water']
from sklearn.tree import DecisionTreeClassifier
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[614]:
from sklearn.neural_network import MLPClassifier
selected_categories = ['Grass','Trees']
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[617]:
selected_categories = ['Roofs', 'Path']
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = DecisionTreeClassifier()
features = selected_data.drop(['Categories','NDVI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
y='NDBI',
x='MNDWI',
hue='Categories',s=10)
plt.xlim(0.65,0.80)
plt.ylim(-0.74,-0.66)
ax = plt.gca()
ax.set_aspect(1.5)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'DecisionTree_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[624]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=1000, random_state=0)
selected_categories = ['Street', 'Path']
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[625]:
selected_categories = ['Shadow','Water']
cmap = sns.diverging_palette(200,20,sep=20,as_cmap=True)
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = RandomForestClassifier(n_estimators=1000, random_state=0)
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[626]:
selected_categories = ['Roofs', 'Path']
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = RandomForestClassifier(n_estimators=1000, random_state=0)
features = selected_data.drop(['Categories','NDVI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
y='NDBI',
x='MNDWI',
hue='Categories',s=10)
plt.xlim(0.65,0.80)
plt.ylim(-0.74,-0.66)
ax = plt.gca()
ax.set_aspect(1.5)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')
[629]:
selected_categories = ['Grass','Trees']
cmap = sns.diverging_palette(20,200,sep=20,as_cmap=True)
selected_data = input_pd[(input_pd['Categories'] == selected_categories[0]) |
(input_pd['Categories'] == selected_categories[1])]
model = RandomForestClassifier(n_estimators=1000, random_state=0)
features = selected_data.drop(['Categories','NDBI'],axis=1)
target = selected_data['Categories']
model.fit(features, target)
sns.relplot(data=selected_data,
x='NDVI',
y='MNDWI',
hue='Categories',s=10)
plt.grid(b=False)
plot_mlp_decision_function(model)
plt.savefig(
f'RandomForest_{selected_categories[0]}_{selected_categories[1]}.pdf',
bbox_inches='tight')