{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 随机森林"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "吕**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\IPython\\core\\magics\\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['imread']\n",
      "`%matplotlib` prevents importing * from pylab and numpy\n",
      "  \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"
     ]
    }
   ],
   "source": [
    "%pylab inline\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.datasets.samples_generator import make_blobs\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from matplotlib.colors import ListedColormap\n",
    "from skimage import io,data\n",
    "\n",
    "from sklearn.feature_selection import RFE\n",
    "from sklearn.ensemble import GradientBoostingClassifier, IsolationForest\n",
    "from sklearn.externals import joblib\n",
    "from sklearn.model_selection  import train_test_split\n",
    "import numpy.ma as ma\n",
    "import os, shutil"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(392960,)\n",
      "(392960, 29)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([  2,   3,  51, 102, 153, 204, 255], dtype=int16)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from skimage import io,data\n",
    "from skimage.io import imread\n",
    "img = imread('E:\\Hyperspectral_Project\\dc.tif')\n",
    "roi = io.imread('E:\\Hyperspectral_Project\\protest.tif')\n",
    "\n",
    "img = np.transpose(img,(1,2,0))#(1280, 307,191)\n",
    "labels = np.unique(roi[roi > 0])\n",
    "X =img.reshape(392960,191)\n",
    "t =img.reshape(392960,191)\n",
    "\n",
    "X = X[:,1:30]\n",
    "t = t[:,1:30]\n",
    "\n",
    "Y=roi[:,:,0]\n",
    "np.unique(Y)\n",
    "\n",
    "Y=Y.ravel()\n",
    "\n",
    "print(Y.shape)\n",
    "print(X.shape)\n",
    "\n",
    "X = X[Y>0,:]\n",
    "Y = Y[Y>0]\n",
    "\n",
    "np.unique(Y)\n"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score\n",
    "from ipywidgets import interact,interact_manual"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "创建训练集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "        X,\n",
    "        Y,\n",
    "        train_size=0.75,\n",
    "        random_state= 42,\n",
    "        stratify=Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, X_valid, y_train, y_valid = train_test_split(\n",
    "        X_train,\n",
    "        y_train,\n",
    "        train_size=0.66,\n",
    "        random_state= 0,\n",
    "        stratify=y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "模型参数调试，创建最佳分类器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0231707317073\n",
      "0.0227642276423\n",
      "0.0233739837398\n",
      "0.0231707317073\n",
      "0.0231707317073\n",
      "0.0231707317073\n",
      "0.0229674796748\n",
      "0.0229674796748\n",
      "0.0229674796748\n",
      "0.0231707317073\n",
      "0.0229674796748\n",
      "0.0233739837398\n",
      "0.0235772357724\n",
      "0.0233739837398\n",
      "0.0235772357724\n",
      "0.0233739837398\n",
      "0.0229674796748\n",
      "0.0229674796748\n",
      "0.0227642276423\n",
      "0.0227642276423\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:439: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.\n",
      "  warn(\"Some inputs do not have OOB scores. \"\n",
      "d:\\Users\\Lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:444: RuntimeWarning: invalid value encountered in true_divide\n",
      "  predictions[k].sum(axis=1)[:, np.newaxis])\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=None, max_features=3, max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=1,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            n_estimators=2, n_jobs=1, oob_score=True, random_state=None,\n",
       "            verbose=0, warm_start=True)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "OOB=np.zeros(20)\n",
    "f=open(r\"E:\\output.txt\",'w')\n",
    "for i in range(100,120,1):\n",
    "    model = RandomForestClassifier(n_estimators=i, random_state=0,oob_score=True)\n",
    "    model.fit(X_train,y_train)  \n",
    "    \n",
    "    f.write('-----------------------------------\\n')\n",
    "    f.write(\"TreesNumber: %d\"%(i))\n",
    "    f.write('\\n');\n",
    "    OOBerror=1-model.oob_score_\n",
    "    f.write(\"OOB error: %.6f\"%OOBerror)\n",
    "    f.write('\\n');\n",
    "    f.write('-------------------------\\n')\n",
    "    OOB[i-100]=OOBerror\n",
    "    print(OOBerror)\n",
    "\n",
    "f.close()\n",
    "minError=np.min(OOB)\n",
    "ind=np.where(OOB==minError)\n",
    "trees=ind[0][0]\n",
    "rfc=RandomForestClassifier(n_estimators=trees+1,max_features=3,oob_score=True,warm_start=True)\n",
    "rfc.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "y_model=model.predict(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "混淆矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'y_model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-6-12f02ec26980>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mseaborn\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msns\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mmat\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_model\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0msns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mheatmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msquare\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mannot\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mfmt\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'd'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcbar\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'y_model' is not defined"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "mat = confusion_matrix(y_test, y_model)\n",
    "\n",
    "sns.heatmap(mat, square=True, annot=True,fmt='d', cbar=False)\n",
    "plt.xlabel('predicted value')\n",
    "plt.ylabel('true value');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "分类精度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.score(X_train, y_train)\n",
    "model.score(X_valid, y_valid)\n",
    "y_model = model.predict(X_test)\n",
    "accuracy_score(y_test, y_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#对图像进行分类\n",
    "img_class=model.predict(t)\n",
    "img_class=img_class.reshape(1280,307)\n",
    "#绘制  plt.imshow(img_class)\n",
    "#保存\n",
    "filename=r'E:\\Hyperspectral_Project'\n",
    "io.imsave(filename+os.path.sep+\"1.tif\",img_class)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}