{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 随机森林" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "吕**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['imread']\n", "`%matplotlib` prevents importing * from pylab and numpy\n", " \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n" ] } ], "source": [ "%pylab inline\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.linear_model import SGDClassifier\n", "from sklearn.datasets.samples_generator import make_blobs\n", "from sklearn.preprocessing import StandardScaler\n", "from matplotlib.colors import ListedColormap\n", "from skimage import io,data\n", "\n", "from sklearn.feature_selection import RFE\n", "from sklearn.ensemble import GradientBoostingClassifier, IsolationForest\n", "from sklearn.externals import joblib\n", "from sklearn.model_selection import train_test_split\n", "import numpy.ma as ma\n", "import os, shutil" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "读取数据" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(392960,)\n", "(392960, 29)\n" ] }, { "data": { "text/plain": [ "array([ 2, 3, 51, 102, 153, 204, 255], dtype=int16)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from skimage import io,data\n", "from skimage.io import imread\n", "img = imread('E:\\Hyperspectral_Project\\dc.tif')\n", "roi = io.imread('E:\\Hyperspectral_Project\\protest.tif')\n", "\n", "img = np.transpose(img,(1,2,0))#(1280, 307,191)\n", "labels = np.unique(roi[roi > 0])\n", "X =img.reshape(392960,191)\n", "t =img.reshape(392960,191)\n", "\n", "X = X[:,1:30]\n", "t = t[:,1:30]\n", "\n", "Y=roi[:,:,0]\n", "np.unique(Y)\n", "\n", "Y=Y.ravel()\n", "\n", "print(Y.shape)\n", "print(X.shape)\n", "\n", "X = X[Y>0,:]\n", "Y = Y[Y>0]\n", "\n", "np.unique(Y)\n" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "from ipywidgets import interact,interact_manual" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "创建训练集" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(\n", " X,\n", " Y,\n", " train_size=0.75,\n", " random_state= 42,\n", " stratify=Y)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train, X_valid, y_train, y_valid = train_test_split(\n", " X_train,\n", " y_train,\n", " train_size=0.66,\n", " random_state= 0,\n", " stratify=y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "模型参数调试,创建最佳分类器" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0231707317073\n", "0.0227642276423\n", "0.0233739837398\n", "0.0231707317073\n", "0.0231707317073\n", "0.0231707317073\n", "0.0229674796748\n", "0.0229674796748\n", "0.0229674796748\n", "0.0231707317073\n", "0.0229674796748\n", "0.0233739837398\n", "0.0235772357724\n", "0.0233739837398\n", "0.0235772357724\n", "0.0233739837398\n", "0.0229674796748\n", "0.0229674796748\n", "0.0227642276423\n", "0.0227642276423\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:439: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.\n", " warn(\"Some inputs do not have OOB scores. \"\n", "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:444: RuntimeWarning: invalid value encountered in true_divide\n", " predictions[k].sum(axis=1)[:, np.newaxis])\n" ] }, { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=None, max_features=3, max_leaf_nodes=None,\n", " min_impurity_split=1e-07, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " n_estimators=2, n_jobs=1, oob_score=True, random_state=None,\n", " verbose=0, warm_start=True)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "OOB=np.zeros(20)\n", "f=open(r\"E:\\output.txt\",'w')\n", "for i in range(100,120,1):\n", " model = RandomForestClassifier(n_estimators=i, random_state=0,oob_score=True)\n", " model.fit(X_train,y_train) \n", " \n", " f.write('-----------------------------------\\n')\n", " f.write(\"TreesNumber: %d\"%(i))\n", " f.write('\\n');\n", " OOBerror=1-model.oob_score_\n", " f.write(\"OOB error: %.6f\"%OOBerror)\n", " f.write('\\n');\n", " f.write('-------------------------\\n')\n", " OOB[i-100]=OOBerror\n", " print(OOBerror)\n", "\n", "f.close()\n", "minError=np.min(OOB)\n", "ind=np.where(OOB==minError)\n", "trees=ind[0][0]\n", "rfc=RandomForestClassifier(n_estimators=trees+1,max_features=3,oob_score=True,warm_start=True)\n", "rfc.fit(X_train,y_train)" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "y_model=model.predict(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "混淆矩阵" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'y_model' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mseaborn\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msns\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mmat\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_model\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0msns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mheatmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msquare\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mannot\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mfmt\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'd'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcbar\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'y_model' is not defined" ] } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "mat = confusion_matrix(y_test, y_model)\n", "\n", "sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)\n", "plt.xlabel('predicted value')\n", "plt.ylabel('true value');" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "分类精度" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "model.score(X_train, y_train)\n", "model.score(X_valid, y_valid)\n", "y_model = model.predict(X_test)\n", "accuracy_score(y_test, y_model)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#对图像进行分类\n", "img_class=model.predict(t)\n", "img_class=img_class.reshape(1280,307)\n", "#绘制 plt.imshow(img_class)\n", "#保存\n", "filename=r'E:\\Hyperspectral_Project'\n", "io.imsave(filename+os.path.sep+\"1.tif\",img_class)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }