630 lines
66 KiB
Plaintext
630 lines
66 KiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <th>0.1</th>\n",
|
|||
|
|
" <th>1.1</th>\n",
|
|||
|
|
" <th>2.1</th>\n",
|
|||
|
|
" <th>3.1</th>\n",
|
|||
|
|
" <th>4.1</th>\n",
|
|||
|
|
" <th>0.2</th>\n",
|
|||
|
|
" <th>1.2</th>\n",
|
|||
|
|
" <th>2.2</th>\n",
|
|||
|
|
" <th>3.2</th>\n",
|
|||
|
|
" <th>4.2</th>\n",
|
|||
|
|
" <th>label</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>0.715031</td>\n",
|
|||
|
|
" <td>0.608764</td>\n",
|
|||
|
|
" <td>0.631124</td>\n",
|
|||
|
|
" <td>0.650236</td>\n",
|
|||
|
|
" <td>0.512070</td>\n",
|
|||
|
|
" <td>0.277071</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.125949</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.003074</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>hupu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>0.662252</td>\n",
|
|||
|
|
" <td>0.541335</td>\n",
|
|||
|
|
" <td>0.570617</td>\n",
|
|||
|
|
" <td>0.584465</td>\n",
|
|||
|
|
" <td>0.433609</td>\n",
|
|||
|
|
" <td>0.451821</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.106180</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>hupu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>0.894304</td>\n",
|
|||
|
|
" <td>0.861196</td>\n",
|
|||
|
|
" <td>0.824562</td>\n",
|
|||
|
|
" <td>0.870190</td>\n",
|
|||
|
|
" <td>0.850370</td>\n",
|
|||
|
|
" <td>0.414012</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.047087</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.033857</td>\n",
|
|||
|
|
" <td>hupu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>0.743064</td>\n",
|
|||
|
|
" <td>0.612346</td>\n",
|
|||
|
|
" <td>0.575538</td>\n",
|
|||
|
|
" <td>0.649530</td>\n",
|
|||
|
|
" <td>0.692186</td>\n",
|
|||
|
|
" <td>0.158237</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.032681</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.031898</td>\n",
|
|||
|
|
" <td>hupu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>0.667459</td>\n",
|
|||
|
|
" <td>0.508679</td>\n",
|
|||
|
|
" <td>0.472092</td>\n",
|
|||
|
|
" <td>0.547407</td>\n",
|
|||
|
|
" <td>0.598427</td>\n",
|
|||
|
|
" <td>0.375792</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.184405</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.112612</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.027564</td>\n",
|
|||
|
|
" <td>hupu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1596</th>\n",
|
|||
|
|
" <td>0.809034</td>\n",
|
|||
|
|
" <td>0.709333</td>\n",
|
|||
|
|
" <td>0.673414</td>\n",
|
|||
|
|
" <td>0.743852</td>\n",
|
|||
|
|
" <td>0.776838</td>\n",
|
|||
|
|
" <td>0.046705</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.049131</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.027310</td>\n",
|
|||
|
|
" <td>zhihu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1597</th>\n",
|
|||
|
|
" <td>0.911794</td>\n",
|
|||
|
|
" <td>0.816672</td>\n",
|
|||
|
|
" <td>0.847797</td>\n",
|
|||
|
|
" <td>0.946110</td>\n",
|
|||
|
|
" <td>0.986384</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.057710</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.029776</td>\n",
|
|||
|
|
" <td>zhihu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1598</th>\n",
|
|||
|
|
" <td>0.965672</td>\n",
|
|||
|
|
" <td>0.957016</td>\n",
|
|||
|
|
" <td>0.934561</td>\n",
|
|||
|
|
" <td>0.991138</td>\n",
|
|||
|
|
" <td>0.995043</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.097930</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.045403</td>\n",
|
|||
|
|
" <td>zhihu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1599</th>\n",
|
|||
|
|
" <td>0.961447</td>\n",
|
|||
|
|
" <td>0.945493</td>\n",
|
|||
|
|
" <td>0.927631</td>\n",
|
|||
|
|
" <td>0.987624</td>\n",
|
|||
|
|
" <td>0.994379</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.084861</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.066429</td>\n",
|
|||
|
|
" <td>zhihu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1600</th>\n",
|
|||
|
|
" <td>0.969485</td>\n",
|
|||
|
|
" <td>0.967490</td>\n",
|
|||
|
|
" <td>0.940832</td>\n",
|
|||
|
|
" <td>0.994306</td>\n",
|
|||
|
|
" <td>0.995641</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.117410</td>\n",
|
|||
|
|
" <td>zhihu</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>4670 rows × 16 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" 0 1 2 3 4 0.1 1.1 2.1 3.1 4.1 0.2 1.2 2.2 3.2 4.2 label\n",
|
|||
|
|
"0 0.715031 0.608764 0.631124 0.650236 0.512070 0.277071 0.0 0.0 0.000000 0.000000 0.125949 0.0 0.003074 0.0 0.000000 hupu\n",
|
|||
|
|
"1 0.662252 0.541335 0.570617 0.584465 0.433609 0.451821 0.0 0.0 0.000000 0.000000 0.106180 0.0 0.000000 0.0 0.000000 hupu\n",
|
|||
|
|
"2 0.894304 0.861196 0.824562 0.870190 0.850370 0.414012 0.0 0.0 0.000000 0.000000 0.047087 0.0 0.000000 0.0 0.033857 hupu\n",
|
|||
|
|
"3 0.743064 0.612346 0.575538 0.649530 0.692186 0.158237 0.0 0.0 0.000000 0.000000 0.032681 0.0 0.000000 0.0 0.031898 hupu\n",
|
|||
|
|
"4 0.667459 0.508679 0.472092 0.547407 0.598427 0.375792 0.0 0.0 0.184405 0.000000 0.112612 0.0 0.000000 0.0 0.027564 hupu\n",
|
|||
|
|
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
|
|||
|
|
"1596 0.809034 0.709333 0.673414 0.743852 0.776838 0.046705 0.0 0.0 0.000000 0.049131 0.000000 0.0 0.000000 0.0 0.027310 zhihu\n",
|
|||
|
|
"1597 0.911794 0.816672 0.847797 0.946110 0.986384 0.000000 0.0 0.0 0.000000 0.057710 0.000000 0.0 0.000000 0.0 0.029776 zhihu\n",
|
|||
|
|
"1598 0.965672 0.957016 0.934561 0.991138 0.995043 0.000000 0.0 0.0 0.000000 0.097930 0.000000 0.0 0.000000 0.0 0.045403 zhihu\n",
|
|||
|
|
"1599 0.961447 0.945493 0.927631 0.987624 0.994379 0.000000 0.0 0.0 0.000000 0.084861 0.000000 0.0 0.000000 0.0 0.066429 zhihu\n",
|
|||
|
|
"1600 0.969485 0.967490 0.940832 0.994306 0.995641 0.000000 0.0 0.0 0.000000 0.000000 0.000000 0.0 0.000000 0.0 0.117410 zhihu\n",
|
|||
|
|
"\n",
|
|||
|
|
"[4670 rows x 16 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"train_path = '../datasets/2019-12-21/train.csv'\n",
|
|||
|
|
"test_path = '../datasets/2019-12-21/test.csv'\n",
|
|||
|
|
"train_df = pd.read_csv(train_path,index_col=0)\n",
|
|||
|
|
"test_df = pd.read_csv(test_path,index_col=0)\n",
|
|||
|
|
"test_df"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"metadata": {
|
|||
|
|
"collapsed": true
|
|||
|
|
},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
|
"from sklearn.svm import SVC\n",
|
|||
|
|
"from sklearn.naive_bayes import GaussianNB\n",
|
|||
|
|
"from sklearn import tree\n",
|
|||
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|||
|
|
"from sklearn.metrics import f1_score,recall_score,precision_score"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:430: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default solver will be changed to 'lbfgs' in 0.22. \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:467: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default multi_class will be changed to 'auto' in\"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\svm\\base.py:189: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
|||
|
|
" warnings.warn(\"The default value of gamma will change \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\ensemble\\forest.py:244: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
|
|||
|
|
" warn(\"The default value of n_estimators will change from \"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"precision 0.801927\n",
|
|||
|
|
"recall 0.801927\n",
|
|||
|
|
"f1 0.801927\n",
|
|||
|
|
"Name: RandomForest, dtype: float64\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAFcCAYAAADYnEUIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3debxVdb3/8debI4gDasmxlEFQyTFFBdRrEuk1h1+JUldBDdOUUMzMHLDpVuZVr2V6UyM0cUgkzSFUUsvEIYcARRTURAQ5agmYA4gyfX5/rHVksznDBs45a7PW+/l4nId7rfVlnc/Zwvus/V3f9f0qIjAzs/Vfu6wLMDOzluFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm7USSbMl/WfWdVhxONAtUw49s5bjQDdrYZI2yLoGKyYHumVG0k1Ad+BuSQslnSvpXknfKms3TdKR6euQdIakWZLmS7pUUruStidJekHSvyXdL2nbRr53j/RcJ0qam7YfLqlv+v3ekXRlSfvtJf1V0oL0+94saYuS47MlnSdpGrCoPNQl7STpVUmD0+2dJU1Mv890SUek+/eV9E9JNSV/9qj0vGZNiwh/+SuzL2A28J8l20cDT5Vs7wEsADqk2wE8BHyS5JfBP4CT02NHAjOBnYENgB8AjzfyfXuk5xoFdAS+CHwI3AVsBXQB3gI+n7bfATgY2BCoBR4BLi/7OaYC3YCNSn82YC/gNeBL6f72aZ3fAzoABwLvAzumx18BDi45923AyKz/X/mr+r98hW7V5o9AL0m90u2vAb+PiCUlbS6JiLcj4jXgcmBIuv+bwEUR8UJELAP+B+jd2FV66oKI+DAiHgAWAbdExFsR8TrwKLAnQETMjIg/R8RHETEPuAz4fNm5/i8i5kbE4pJ9BwDjgRMi4p50377ApsDFEbEkIv4K3FPyc9xS/1pSJ+DwdJ9ZkxzoVlUi4iPgVuD4tCtlCHBTWbO5Ja/nANukr7cFrki7Md4B3gZEcrXdmH+VvF7cwPamAJK2kjRO0uuS3gN+B3Ruoq56w0k+JTxUsm8bYG5ErCj7OerrHAsMkrQhMAh4OiLmNPEzmAEOdMteQ9N93gAcBxwEfBART5Qd71byujvwRvp6LvDNiNii5GujiHi8Beq8KK1194jYDDie5JdFqYZ+luFAd0m/LNn3BtCttO8//TleB4iIGSQBfxhwLEnAmzXLgW5Z+xewXemONMBXAL9g9atzgHMkfUJSN+DbwO/T/aOA8yXtCiBpc0n/1UJ1dgIWAu9I6gKcU+Gfex84FOgv6eJ031Mk3TvnSmovaQDwZWBcyZ8bC5wB9CfpQzdrlgPdsnYR8IO0m+Tskv03Ap8l6doo90dgCslNyHuB3wJExJ3AJcC4tFvkeZKr3JbwE5Kbm++m3/OOSv9gRLxDckP1MEkXpPcDjkhrmw9cDQyNiBdL/tgtwADgrxExv0V+Ass9RXiBC6s+koYCwyLic2X7A+gVETOzqcysevkK3aqOpI2B04DRWdditj5xoFtVkXQIMI+kb903A83WgLtczMxywlfoZmY54UA3M8uJzGaF69y5c/To0SOrb29mtl6aMmXK/IiobehYZoHeo0cPJk+enNW3NzNbL0lqdBoId7mYmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHKiokCXdKiklyTNlDSygeObS7pb0rPpgrcntnypZmbWlGYDPV19/CqSuZt3AYZI2qWs2QhgRkTsQTKH8y8kdWjhWs3MrAmVPFjUD5gZEbMAJI0DBgIzStoE0EmSSNZgfBtY1sK1mtka6jHy3orbzu54bMVtP9uze8VtnzvhuYrb2rqpJNC7sOrit3XAPmVtriRZ2fwNkqW6jilbANfMLFNF+OVWSR96+UK4sPpiuIeQLAe2DdAbuFLSZqudSBomabKkyfPmzVvjYs3MrHGVBHodq66y3pWVq6zXOxG4IxIzgVeBncpPFBGjI6JPRPSprW1wbhkzM1tLlQT6JKCXpJ7pjc7BJN0rpV4DDgKQ9ClgR2BWSxZqZmZNa7YPPSKWSToduB+oAa6LiOmShqfHRwEXANdLeo6ki+Y8r1RuZta2Kpo+NyImABPK9o0qef0G8MWWLc3MzNaEnxQ1M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMVTc5l1a8Iq7GYWdN8hW5mlhMOdDOznHCgm5nlhAPdzCwnKropKulQ4AqSJeiujYiLy46fAxxXcs6dgdqIeLsFa12NbwSama3U7BW6pBrgKuAwYBdgiKRdSttExKUR0TsiegPnAw+3dpibmdmqKuly6QfMjIhZEbEEGAcMbKL9EOCWlijOzMwqV0mgdwHmlmzXpftWI2lj4FDg9kaOD5M0WdLkefPmrWmtZmbWhEoCXQ3si0bafhn4W2PdLRExOiL6RESf2traSms0M7MKVBLodUC3ku2uwBuNtB2Mu1vMzDJRSaBPAnpJ6impA0lojy9vJGlz4PPAH1u2RDMzq0SzwxYjYpmk04H7SYYtXhcR0yUNT4+PSpseBTwQEYtarVqzCng4qxVVRePQI2ICMKFs36iy7euB61uqMDMzWzN+UtTMLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlhAPdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5URFgS7pUEkvSZopaWQjbQZImippuqSHW7ZMMzNrTrMrFkmqAa4CDiZZMHqSpPERMaOkzRbA1cChEfGapK1aq2AzM2tYJVfo/YCZETErIpYA44CBZW2OBe6IiNcAIuKtli3TzMyaU0mgdwHmlmzXpftKfQb4hKSJkqZIGtpSBZqZWWUqWSRaDeyLBs6zN3AQsBHwhKQnI+Ifq5xIGgYMA+jevfIV1M3MrHmVXKHXAd1KtrsCbzTQ5r6IWBQR84FHgD3KTxQRoyOiT0T0qa2tXduazcysAZUE+iSgl6SekjoAg4HxZW3+CBwgaQNJGwP7AC+0bKlmZtaUZrtcImKZpNOB+4Ea4LqImC5peHp8VES8IOk+YBqwArg2Ip5vzcLNzGxVlfShExETgAll+0aVbV8KXNpypZmZ2Zrwk6JmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOVFRoEs6VNJLkmZKGtnA8QGS3pU0Nf36UcuXamZmTWl2CTpJNcBVwMFAHTBJ0viImFHW9NGI+FIr1GhmZhWo5Aq9HzAzImZFxBJgHDCwdcsyM7M1VUmgdwHmlmzXpfvK7SfpWUl/krRri1RnZmYVa7bLBVAD+6Js+2lg24hYKOlw4C6g12onkoYBwwC6d+++hqWamVlTKrlCrwO6lWx3Bd4obRAR70XEwvT1BKC9pM7lJ4qI0RHRJyL61NbWrkPZZmZWrpJAnwT0ktRTUgdgMDC+tIGkT0tS+rpfet4FLV2smZk1rtkul4hYJul04H6gBrguIqZLGp4eHwV8FThV0jJgMTA4Isq7ZczMrBVV0ode340yoWzfqJLXVwJXtmxpZma2JvykqJlZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOVBTokg6V9JKkmZJGNtGur6Tlkr7aciWamVklmg10STXAVcBhwC7AEEm7NNLuEpK1R83MrI1VcoXeD5gZEbMiYgkwDhjYQLtvAbcDb7VgfWZmVqFKAr0LMLdkuy7d9zFJXYCjgFE0QdIwSZMlTZ43b96a1mpmZk2oJNDVwL4o274cOC8iljd1oogYHRF9IqJPbW1tpTWamVkFNqigTR3QrWS7K/BGWZs+wDhJAJ2BwyUti4i
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 432x288 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {
|
|||
|
|
"needs_background": "light"
|
|||
|
|
},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#type markov\n",
|
|||
|
|
"x_train = train_df.iloc[:,0:5].values.copy()\n",
|
|||
|
|
"y_train = train_df['label'].values.copy()\n",
|
|||
|
|
"x_test = test_df.iloc[:,0:5].values.copy()\n",
|
|||
|
|
"y_test = test_df['label'].values.copy()\n",
|
|||
|
|
"lr_classifer = LogisticRegression()\n",
|
|||
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|||
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"svm_classifer = SVC()\n",
|
|||
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|||
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"gn_classifer = GaussianNB()\n",
|
|||
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|||
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|||
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|||
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|||
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
|||
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|||
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|||
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|||
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|||
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|||
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|||
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|||
|
|
"ax = score_df.plot.bar(title='type markov')\n",
|
|||
|
|
"fig = ax.get_figure()\n",
|
|||
|
|
"fig.savefig('../figure/type.svg')\n",
|
|||
|
|
"print(score_df.loc['RandomForest'])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 15,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:430: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default solver will be changed to 'lbfgs' in 0.22. \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:467: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default multi_class will be changed to 'auto' in\"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\svm\\base.py:189: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
|||
|
|
" warnings.warn(\"The default value of gamma will change \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\ensemble\\forest.py:244: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
|
|||
|
|
" warn(\"The default value of n_estimators will change from \"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"precision 0.993148\n",
|
|||
|
|
"recall 0.993148\n",
|
|||
|
|
"f1 0.993148\n",
|
|||
|
|
"Name: RandomForest, dtype: float64\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAFcCAYAAADYnEUIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3dd5xU5d3+8c9FV8GOUUGKvYMClhgJiU9sTyJq8qhYwB4VE0s0YqqJ+lNjYknUEDRiA43GEgtRowmWRA2giGJFBF01CsYKKO37++Ockdlllx1gd8/sOdf79doXc8qc/c4A19xzn3PuWxGBmZm1fm2yLsDMzJqGA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW6tjqSQtGkGv3eQpJqW/r0NkXSOpJuyrsOqhwPdlknSDEn/k3UdWcjqg8NsRTnQrSpJuk7SkVnXUa0ktcu6Bqs+DnRrkKQbgR7APZI+lfRDSfdJ+l6d/aZI2j99HJK+L2m6pNmSLpbUpmzfoyW9KOkDSQ9I6rmSNXaU9GtJb0h6V9JISauk2wZJqpH0A0nvSXpH0lFlz11H0j2SPpY0QdJ5kh5Ptz2a7vZs+toPLntevcerp7bx6TH/lR7jnvR3jin7nb3K9r9c0pvptkmSdi/bdo6kP0u6SdLHwJF1fld7STdLul1Sh/R9uUzS2+nPZZI6pvu+KOmbZc9tl/5d7bhCfwlWNRzo1qCIOAJ4A/hWRHSOiF8B1wOHl/aR1AfoBowre+oBQH9gR2AwcHS67/7Aj4ADga7AY8DNK1nmRcDmQF9g07SWn5VtXx9YI11/DHClpLXSbVcCc9J9hqU/pdc+MH3YJ33tf6rgePU5BDgi3X8T4AlgNLA28CLw87J9J6SvY21gLHCbpE5l2wcDfwbWBMaUVqYfYHcBnwMHRcR84MfALunx+gA7AT9Jn3IzMKTsuHsBsyPi6WW8DmsNIsI//mnwB5gB/E/Zckfgv8Bm6fKvgavKtgewd9nyScDD6eO/AseUbWsDzAV61vN7rwOObKCmIAlvkQTyJmXbdgVeTx8PAuYB7cq2v0cSdG2BBcAWZdvOAx6v+3vKlhs8XgN1jgd+XLb8G+CvZcvfAiYv473/gOQDBeAc4NE6288B7gYeAX4LqGzba8C+Zct7ATPSx5sCnwCrpstjgJ9l/W/NPyv/4xa6LZeI+By4FTg87UoZAtxYZ7c3yx7PBDZMH/cELpf0oaQPST4YRNJ6LXXdlLYdClxVWpZ0VT3ldAVWBSaVPe/+dH3J+xGxsGx5LtA53addnVrLHzekoeM15N2yx/PqWf7iuWlXzouSPkpfyxrAuo3UtwuwPXBhpOmc2pDkvS/54u8hIqaRfDv4lqRVgf1IvhFYK+cTK9aY+objvJ4kxB8H5kbEE3W2bwRMTR/3AN5OH78JnB8RY6hHRGxfeizpOmB8RFy3jNpmk4TiNhHx1rJfxlJmAQuB7sArZXVnIu0vPwvYA5gaEYslfUDygVdS39/Fg8AU4GFJgyKi9IHxNskHaH1/D7Ck26UN8EIa8tbKuYVujXkX2Lh8RRrgi0m6EOq2zgHOlLSWpI2AU4BS//NI4GxJ2wBIWkPS/61oYRGxGLgauFTSeukxu0naq4LnLgLuAM6RtKqkLYGhdXZb6rU3oy4kHzCzgHaSfgasXskTIzm3MZYk1Est+puBn0jqmq77GVB+zfotwJ7Aibh1nhsOdGvMBSTB8KGkM8rW3wBsR+2QKPkLMAmYDNwH/BEgIu4kOYl5S3qlxvPAPitZ31nANODJ9JgPAVtU+NyTSbo1/kPywXQzyYnFknOA69PXftBK1tmYB0jOMbxC0j3yGZV1AQEQEeeSnBh9SNLaJOcDJpK03p8Dnk7XlfZ/h+QE7ZdZ8oFrrZxqd7uZVUbSUOD4iPhKnfVBcsK01X2Fl3QRsH5EDGt0Z7Mq5Ba6Lbf0RNpJwKisa1kZkraUtL0SO5Fchnhn1nWZrSgHui2XtH96Fkn/cmvve+1C0o8+h+TKnd+QdBeZtUrucjEzywm30M3McsKBbmaWE5ndWLTuuutGr169svr1Zmat0qRJk2ZHRNf6tmUW6L169WLixIlZ/Xozs1ZJ0syGtrnLxcwsJxzoZmY54UA3M8uJRgNd0rXp7CzPN7Bdkn4raVo6/KlnPTEzy0AlLfTrgL2XsX0fYLP053jg9ytflpmZLa9GAz0iHiWZiKAhg4EbIvEksKakDZqqQDMzq0xT9KF3o/YwnzXpOjMza0FNEeiqZ129A8RIOl7SREkTZ82a1QS/2szMSprixqIaak/d1Z3aU119ISJGkQ652r9/f48KZtbMeo24r+J9Z3Q6tOJ9t+vdo+J9nxv2XMX7NqcivBdN0UK/GxiaXu2yC/BROhuKmZm1oEZb6JJuBgYB60qqAX4OtAeIiJHAOGBfkmnA5gJHNVexZmbWsEYDPSKGNLI9gOFNVpGZma2QzAbnagpF6BMzM6uUb/03M8sJB7qZWU606i4XW8LdT2bmFrqZWU440M3McsKBbmaWEw50M7Oc8ElRyx2fILaicgvdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlREWBLmlvSS9LmiZpRD3b15B0j6RnJU2VdFTTl2pmZsvSaKBLagtcCewDbA0MkbR1nd2GAy9ERB9gEPAbSR2auFYzM1uGSlroOwHTImJ6RMwHbgEG19kngC6SBHQG/gssbNJKzcxsmSoJ9G7Am2XLNem6clcAWwFvA88Bp0TE4iap0MzMKlJJoKuedVFneS9gMrAh0Be4QtLqSx1IOl7SREkTZ82atdzFmplZwyoJ9Bpgo7Ll7iQt8XJHAXdEYhrwOrBl3QNFxKiI6B8R/bt27bqiNZuZWT0qCfQJwGaSeqcnOg8B7q6zzxvAHgCSvgRsAUxvykLNzGzZ2jW2Q0QslHQy8ADQFrg2IqZKOiHdPhI4F7hO0nMkXTRnRcTsZqzbzMzqaDTQASJiHDCuzrqRZY/fBvZs2tLMzGx5+E5RM7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlhAPdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznKgo0CXtLellSdMkjWhgn0GSJkuaKumRpi3TzMwa066xHSS1Ba4EvgHUABMk3R0RL5TtsyZwFbB3RLwhab3mKtjMzOpXSQt9J2BaREyPiPnALcDgOvscCtwREW8ARMR7TVummZk1ppJA7wa8WbZck64rtzmwlqTxkiZJGlrfgSQdL2mipImzZs1asYrNzKxelQS66lkXdZbbAf2A/wX2An4qafOlnhQxKiL6R0T/rl27LnexZmbWsEb70Ela5BuVLXcH3q5nn9kRMQeYI+lRoA/wSpNUaWZmjaqkhT4B2ExSb0kdgEOAu+vs8xdgd0ntJK0K7Ay82LSlmpnZsjTaQo+IhZJOBh4A2gLXRsRUSSek20dGxIuS7gemAIuBayLi+eYs3MzMaquky4WIGAeMq7NuZJ3li4GLm640MzNbHr5T1MwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlhAPdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeVERYEuaW9JL0uaJmn
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 432x288 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {
|
|||
|
|
"needs_background": "light"
|
|||
|
|
},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#type+length markov\n",
|
|||
|
|
"x_train = train_df.iloc[:,0:10].values.copy()\n",
|
|||
|
|
"y_train = train_df['label'].values.copy()\n",
|
|||
|
|
"x_test = test_df.iloc[:,0:10].values.copy()\n",
|
|||
|
|
"y_test = test_df['label'].values.copy()\n",
|
|||
|
|
"lr_classifer = LogisticRegression()\n",
|
|||
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|||
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"svm_classifer = SVC()\n",
|
|||
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|||
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"gn_classifer = GaussianNB()\n",
|
|||
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|||
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|||
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|||
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|||
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
|||
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|||
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|||
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|||
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|||
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|||
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|||
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|||
|
|
"ax = score_df.plot.bar(title='type+length markov')\n",
|
|||
|
|
"fig = ax.get_figure()\n",
|
|||
|
|
"fig.savefig('../figure/type_length.svg')\n",
|
|||
|
|
"print(score_df.loc['RandomForest'])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 17,
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:430: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default solver will be changed to 'lbfgs' in 0.22. \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:467: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
|
|||
|
|
" warnings.warn(\"Default multi_class will be changed to 'auto' in\"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\svm\\base.py:189: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
|
|||
|
|
" warnings.warn(\"The default value of gamma will change \"\n",
|
|||
|
|
"c:\\python38\\lib\\site-packages\\sklearn\\ensemble\\forest.py:244: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
|
|||
|
|
" warn(\"The default value of n_estimators will change from \"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"precision 0.99743\n",
|
|||
|
|
"recall 0.99743\n",
|
|||
|
|
"f1 0.99743\n",
|
|||
|
|
"Name: RandomForest, dtype: float64\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAFcCAYAAADYnEUIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZxWZf3/8debYVNBMcFSkMV9FwVc0oiy3Cq3SkUN3FOxNFPDVjP9qV/LJZcITRQTTXPJhdQyccklRAHFJRFZRk2BcgOU7fP745wbboYZ5gZm5tyc834+HvPwPsuc+3PfMu/7uq9zznUpIjAzszVfq6wLMDOzpuFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgW7OTFJI2z+B5B0iqbYLj3CjpgqaoqdpJ6pn+/2qddS228hzoOSNpqqSvZF1HFrL64GhKeXgNlh0HujWJtBV7TNZ1rKq0NT+mBZ6nalu+1VybVcaBniOSbga6A/dJ+ljSOZIekPS9OvtNlHRw+jgkfV/SFEmzJF0qqVXZvsdJekXS/yQ9JKnHatbYTtKvJU2X9K6kYZLWSrcNkFQr6YeS3pP0jqRjy353A0n3SfpQ0lhJF0h6Mt32eLrbhPS1H172e/UebyV1lvQ3SR9Jeqz0PtTXRSFpjKQT0sfHSPqnpMsl/Rc4T9Lm6TE+SN/zPzX2GsqOXX6899P/b59P189IX+fgsv2/JumF9D2bIem8sm2l2o+XNB34Rz3P9830W9/26fKBkialzz1G0jbp+qGS/lznd6+U9NtVfL9tVUSEf3L0A0wFvlK2fBjwbNnyTsBsoG26HMCjwGdIPgz+DZyQbjsYmAxsA7QGfgo81cDz3ggc08C2ADZPH18B3Js+X0fgPuCidNsAYCFwPtAGOACYC6yfbr8t/Vkb2BaYATxZ3/NUcrw6NQ4AxqzgtX0E9AfaAVeWnhfomT5v67L9x5S9h8ekNXwvfQ/XAm4FfkLSoGoP7NXQa6inltLxjgVqgAuA6cA1aW37pLV2KHtdO6TPtSPwLnBwndpHAuuktS15PelzTC77f7clMAf4avp+npNubwv0SN/bddN9a4B3gN2z/pso0k/mBfinif+HLh/o7YD/Aluky78Gri3bHsB+ZcunAo+kj/8KHF+2rVX6R9ujnue9kUYCHVAaCJuVbdsDeDN9PACYVycc3wN2TwNiAbBV2bYLaDzQ6z1ePTUOYMWBflvZcgdgEbAJlQX69DrHGwkMB7o19F6t4P/vMcDrZcs7pL/z2bJ1s4HeDfz+FcDl6eNS7ZuWbS+tOwt4ubxG4GfA7XX+PbwFDEiXnwQGpY+/CryR9d9D0X7c5ZJzEfEpcDtwdNqVMhC4uc5uM8oeTwM2Th/3AK5Mv16/T/LBIKArLOm6KW07Eri2tCzp2nrK6ULSuh5X9nsPputLZkfEwrLluSQB2oWk1Vhea/njhjR0vFI3QamO+4G9yup/v85xljxXRHycvhcbU5m6dZ5D8j7+K+2+OK7C45S8W/Z4XlpT3XWl17ibpEclzZT0AXAy0LmR+gDOBq6JiPKrhDYm+fdB+pyL09/tmq4aRfLvC5J/D6NW5kXZ6nOg5099w2feBBwF7A3MjYin62zfpOxxd+Dt9PEM4LsR0ansZ62IeAogInYsrSf54z21bL9T66ljFknYbFe233oR0aGC1zWTpKuhWwN1r7SIuLis/q+TtPY7la0rt+S5JHUg6TJ6m+QbByQfVCWfq/tUdZ73PxFxYkRsDHyX5IOwua5sGUXSxbVJRKwHDCP5MGmwvtQ+wE8lfbNs3dskH/IASBLJ+/JWuuoOYICkbsAhONBbnAM9f94FNi1fkQb4YuA3LN86Bzhb0vqSNgFOB/6Urh8GnCtpOwBJ60n69qoWlrborgMul7Rhesyukvat4HcXAXeRnFRcW9LWwKA6uy332pvQAZL2ktQW+BXJeYkZETGTJNCOllSTtrY3W9GBJH07DT2A/5EE6qJ0ualfQ0fgvxHxiaRdSVrOlZgE7AdcI+nAdN3twNck7S2pDfBD4FOg9AE/k6S7aQRJN9orTfcyrBIO9Py5iKRl9b6ks8rWjyTpb/1jPb/zF2AcMB54APgDQETcDVwC3CbpQ+AlYP/VrO9HJCfSnkmP+Xdgqwp/9zRgPeA/JB9Mt5IESsl5wE3paz9sNeusaxTwC5Kulj4k33hKTiTpopgNbEcacCvQD3hW0sckrefTI+LNdNt5NO1rOBU4X9JHwM9JQrkiETGB5JvLdZL2j4jXgKOBq0i+bX0D+EZEzC/7tVHAV3DrPBOK8AQXRSBpEHBSROxVZ32QnDCdnE1lq07SJcDnImJwozubFYBb6AUgaW2SltrwrGtZHZK2lrSjErsCxwN3Z12XWbVwoOdc2j89k6Rvdk3/GtyRpB99DknXwW9IuovMDHe5mJnlhlvoZmY54UA3M8uJzEZX69y5c/Ts2TOrpzczWyONGzduVkR0qW9bZoHes2dPnnvuuaye3sxsjSRpWkPb3OViZpYTDnQzs5xwoJuZ5YQD3cwsJxoNdEk3pNNavdTAdkn6raTJ6fjYuzR9mWZm1phKWug3kgyj2ZD9gS3Sn5OA361+WWZmtrIaDfSIeJxkyNCGHASMjMQzQCdJGzVVgWZmVpmm6EPvyrJTWNWydEoqMzNrIU1xY1Hd6ayg/imtkHQSSbcM3bt3b4KnNjOrTM+hD1S879T2lU7sBDv0qjzLXhz8YsX7roqmCPRalp3bsRtL56RcRkQMJx2Tu2/fvh7m0ayZFSHEbKmm6HK5FxiUXu2yO/BBRLzTBMc1M7OV0GgLXdKtwACgs6RaknkV2wBExDBgNHAAyTyRc4Fjm6vYutz6MDNbqtFAj4iBjWwPYEiTVWRmZqvEd4qameWEA93MLCcyGw/dmpbPJ5iZA91yxx9uVlTucjEzywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlhAPdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5yoKNAl7SfpNUmTJQ2tZ/t6ku6TNEHSJEnHNn2pZma2Io0GuqQa4Bpgf2BbYKCkbevsNgR4OSJ2AgYAv5HUtolrNTOzFaikhb4rMDkipkTEfOA24KA6+wTQUZKADsB/gYVNWqmZma1QJYHeFZhRtlybrit3NbAN8DbwInB6RCyueyBJJ0l6TtJzM2fOXMWSzcysPpUEuupZF3WW9wXGAxsDvYGrJa273C9FDI+IvhHRt0uXLitdrJmZNaySQK8FNilb7kbSEi93LHBXJCYDbwJbN02JZmZWiUoCfSywhaRe6YnOI4B76+wzHdgbQNJnga2AKU1ZqJmZrVjrxnaIiIWSTgMeAmqAGyJikqST0+3DgF8BN0p6kaSL5kcRMasZ6zYzszoaDXSAiBgNjK6zbljZ47eBfZq2NDMzWxm+U9TMLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlhAPdzCwnHOhmZjnhQDczywkHuplZTjjQzcxywoFuZpYTDnQzs5xwoJuZ5YQD3cwsJxzoZmY54UA3M8sJB7qZWU440M3McsKBbmaWEw50M7OccKCbmeWEA93MLCcc6GZmOeFANzPLCQe6mVlOONDNzHLCgW5mlhMOdDOznHCgm5nlREWBLmk/Sa9JmixpaAP7DJA0XtIkSY81bZlmZtaY1o3tIKkGuAb4KlALjJV0b0S8XLZPJ+BaYL+ImC5pw+Yq2MzM6ldJC31XYHJETImI+cBtwEF19jkSuCsipgNExHtNW6aZmTWmkkDvCswoW65N15XbElhf0hhJ4yQNaqoCzcysMo12uQCqZ13Uc5w+wN7AWsDTkp6JiH8vcyDpJOAkgO7du698tWZm1qBKWui1wCZly92At+vZ58GImBMRs4DHgZ3qHig
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 432x288 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {
|
|||
|
|
"needs_background": "light"
|
|||
|
|
},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#type+length+burst markov\n",
|
|||
|
|
"x_train = train_df.iloc[:,0:15].values.copy()\n",
|
|||
|
|
"y_train = train_df['label'].values.copy()\n",
|
|||
|
|
"x_test = test_df.iloc[:,0:15].values.copy()\n",
|
|||
|
|
"y_test = test_df['label'].values.copy()\n",
|
|||
|
|
"lr_classifer = LogisticRegression()\n",
|
|||
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|||
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"svm_classifer = SVC()\n",
|
|||
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|||
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"gn_classifer = GaussianNB()\n",
|
|||
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|||
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|||
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|||
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|||
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|||
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
|||
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|||
|
|
"\n",
|
|||
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|||
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|||
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|||
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|||
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|||
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|||
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|||
|
|
"ax = score_df.plot.bar(title='type+length+burst markov')\n",
|
|||
|
|
"fig = ax.get_figure()\n",
|
|||
|
|
"fig.savefig('../figure/type_length_burst.svg')\n",
|
|||
|
|
"print(score_df.loc['RandomForest'])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"metadata": {
|
|||
|
|
"collapsed": true
|
|||
|
|
},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": []
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.6.2"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 2
|
|||
|
|
}
|