2019-12-23 01:20:51 +08:00
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 35,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"end2\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import numpy as np\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"N = 20\n",
|
2019-12-23 01:20:51 +08:00
|
|
|
"date = '2019-12-20_21'\n",
|
|
|
|
|
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
|
|
|
|
|
"train_path = root_dir + 'Experiment/MarkovModel/CsvFile/' + date + '/train.csv'\n",
|
|
|
|
|
"test_path = root_dir + 'Experiment/MarkovModel/CsvFile/' + date + '/test.csv'\n",
|
|
|
|
|
"train_df = pd.read_csv(train_path,index_col=0)\n",
|
|
|
|
|
"test_df = pd.read_csv(test_path,index_col=0)\n",
|
|
|
|
|
"print('end2')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 36,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
|
|
|
"from sklearn.svm import SVC\n",
|
|
|
|
|
"from sklearn.naive_bayes import GaussianNB\n",
|
|
|
|
|
"from sklearn import tree\n",
|
|
|
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
|
|
|
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"%matplotlib inline\n",
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import matplotlib.pyplot as plt"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 25,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
" precision recall f1\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"LogisticRegression 0.682227 0.682227 0.682227\n",
|
|
|
|
|
"SVM 0.706852 0.706852 0.706852\n",
|
|
|
|
|
"GaussianNB 0.680086 0.680086 0.680086\n",
|
|
|
|
|
"tree 0.736188 0.736188 0.736188\n",
|
|
|
|
|
"RandomForest 0.736831 0.736831 0.736831\n"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2020-01-07 17:29:25 +08:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFcCAYAAAAzq/4LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucFmXdx/HPlxXEBLN0LeOsoqmoKODhsYg081CBmQdQ\nEzOlPGRpatjBSvMhs8fDU5ahyaMmongKldJSSa00QBEFJRFRViuBNBVRTr/nj5mVm2VhZ/Hend2Z\n7/v12pf3zFzM/vYWvnvd18xclyICMzMrlg55F2BmZtXncDczKyCHu5lZATnczcwKyOFuZlZADncz\nswJyuJu1EEnzJX0q7zqsnBzulisHoFnLcLibVZmkjfKuwczhbrmRdD3QE7hT0puSzpF0t6SvNWg3\nU9Kh6euQdLqkeZIWSbpYUoeKtidIelrSq5LukdRrHd+7d3quL0lakLb/qqRB6fd7TdLPK9pvK+l+\nSYvT73uDpM0rjs+X9C1JM4ElDQNe0kclPS9peLq9o6Qp6feZJWloun9vSf+UVFPxZz+fntcsu4jw\nl79y+wLmA5+q2D4SeLRiezdgMdAp3Q7gAeCDJL8Y/g6cmB47FJgL7AhsBHwX+Ms6vm/v9FxXAp2B\nTwNvA3cAWwHdgFeAT6TttwMOADYGaoEHgcsa/BwzgB7AJpU/G7AH8CLw2XR/x7TObwOdgP2AN4Ad\n0uPPAQdUnHsiMDrv/1f+al9f7rlbW/NboK+kvun2F4GbImJZRZuLIuLfEfEicBkwIt3/FWBMRDwd\nESuA/wb6r6v3nrogIt6OiHuBJcCNEfFKRLwEPATsDhARcyPiDxHxTkQsBC4BPtHgXP8bEQsiYmnF\nvo8Dk4CREXFXum9voAvw44hYFhH3A3dV/Bw31r+W1BU4JN1nlpnD3dqUiHgHuBk4Nh1uGQFc36DZ\ngorXLwAfSV/3Ai5PhzpeA/4NiKQXvi7/qni9tJHtLgCStpI0QdJLkl4HfgNsuZ666n2V5NPDAxX7\nPgIsiIhVDX6O+jrHA4dJ2hg4DHgsIl5Yz89gthaHu+WtsWlJrwWOAfYH3oqIvzY43qPidU/g5fT1\nAuArEbF5xdcmEfGXKtQ5Jq1114jYDDiW5BdHpcZ+lq8CPSVdWrHvZaBH5bWC9Od4CSAiZpOE/cHA\n0SRhb9YsDnfL27+AbSp3pGG+Cvgf1u61A5wt6QOSegBfB25K918JnCtpZwBJ75d0RJXq7Aq8Cbwm\nqRtwdsY/9wZwEDBY0o/TfY+SDAGdI6mjpCHA54AJFX9uPHA6MJhkzN2sWRzulrcxwHfToZSzKvZf\nB+xCMvzR0G+B6SQXMO8Gfg0QEbcDFwET0qGTp0h6v9XwQ5ILo/9Jv+dtWf9gRLxGcjH2YEkXpNcP\nhqa1LQJ+ARwXEc9U/LEbgSHA/RGxqCo/gZWKIrxYh7U9ko4DRkXExxrsD6BvRMzNpzKz9sE9d2tz\nJL0POAUYm3ctZu2Vw93aFEkHAgtJxuJ9IdFsA3lYxsysgNxzNzMrIIe7mVkB5TZ73ZZbbhm9e/fO\n69ubmbVL06dPXxQRtU21yy3ce/fuzbRp0/L69mZm7ZKkTFNReFjGzKyAHO5mZgXkcDczKyCHu5lZ\nATnczcwKyOFuZlZADnczswJyuJuZFVBuDzGZWcvrPfruzG3ndz46c9td+vTM3PbJkU9mbtuSyvZe\nuOduZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3MyugTOEu6SBJcyTNlTS6keOXSpqRfv1d0mvV\nL9XMzLJq8j53STXAFcABQB0wVdKkiJhd3yYizqho/zVg9xao1czMMsryENOewNyImAcgaQIwDJi9\njvYjgO9XpzzLqmwPaJjZ+mUZlukGLKjYrkv3rUVSL6APcP86jo+SNE3StIULFza3VjMzyyhLuKuR\nfbGOtsOBWyJiZWMHI2JsRAyMiIG1tU2u72pmZhsoS7jXAT0qtrsDL6+j7XDgxvdalJmZvTdZwn0q\n0FdSH0mdSAJ8UsNGknYAPgD8tbolmplZczUZ7hGxAjgNuAd4Grg5ImZJOl/S0IqmI4AJEbGuIRsz\nM2slmab8jYjJwOQG+85rsP2D6pWVje8QMTNrnOdzt8LxL30zTz9gZlZIDnczswJyuJuZFZDD3cys\ngBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7\nmVkBOdzNzArI4W5mVkAOdzOzAsoU7pIOkjRH0lxJo9fR5khJsyXNkjS+umWamVlzNLmGqqQa4Arg\nAKAOmCppUkTMrmjTFzgX2DciXpW0VUsVbGZmTcvSc98TmBsR8yJiGTABGNagzUnAFRHxKkBEvFLd\nMs3MrDmyhHs3YEHFdl26r9L2wPaS/izpEUkHVatAMzNrviaHZQA1si8aOU9fYAjQHXhIUr+IeG2N\nE0mjgFEAPXv2bHaxZmaWTZaeex3Qo2K7O/ByI21+GxHLI+J5YA5J2K8hIsZGxMCIGFhbW7uhNZuZ\nWROyhPtUoK+kPpI6AcOBSQ3a3AF8EkDSliTDNPOqWaiZmWXXZLhHxArgNOAe4Gng5oiYJel8SUPT\nZvcAiyXNBh4Azo6IxS1VtJmZrV+WMXciYjIwucG+8ypeB3Bm+mVmZjnzE6pmZgXkcDczKyCHu5lZ\nATnczcwKyOFuZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53\nM7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBZQp3SQdJmiNprqTRjRw/XtJCSTPSrxOrX6qZ\nmWXV5ALZkmqAK4ADgDpgqqRJETG7QdObIuK0FqjRzMyaKUvPfU9gbkTMi4hlwARgWMuWZWZm70WW\ncO8GLKjYrkv3NfQFSTMl3SKpR1WqMzOzDZIl3NXIvmiwfSfQOyJ2Bf4IXNvoiaRRkqZJmrZw4cLm\nVWpmZpllCfc6oLIn3h14ubJBRCyOiHfSzauAAY2dKCLGRsTAiBhYW1u7IfWamVkGWcJ9KtBXUh9J\nnYDhwKTKBpK2rtgcCjxdvRLNzKy5mrxbJiJWSDoNuAeoAa6JiFmSzgemRcQk4HRJQ4EVwL+B41uw\nZjMza0KT4Q4QEZOByQ32nVfx+lzg3OqWZmZmG8pPqJqZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE5\n3M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBOdzNzArI4W5mVkAOdzOz\nAnK4m5kVkMPdzKyAHO5mZgWUKdwlHSRpjqS5kkavp93hkkLSwOqVaGZmzdVkuEuqAa4ADgZ2AkZI\n2qmRdl2B04FHq12kmZk1T5ae+57A3IiYFxHLgAnAsEbaXQD8BHi7ivWZmdkGyBLu3YAFFdt16b53\nSdod6BERd1WxNjMz20BZwl2N7It3D0odgEuBbzZ5ImmUpGmSpi1cuDB7lWZm1ixZwr0O6FGx3R14\nuWK7K9APmCJpPrA3MKmxi6oRMTYiBkbEwNra2g2v2szM1itLuE8F+krqI6kTMByYVH8wIv4TEVtG\nRO+I6A08AgyNiGktUrGZmTWpyXCPiBXAacA9wNPAzRExS9L5koa2dIFmZtZ8G2VpFBGTgckN9p23\njrZD3ntZZmb2XvgJVTOzAnK4m5kVkMPdzKyAHO5mZgXkcDczKyCHu5lZATnczcwKyOFuZlZADncz\nswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAy\nhbukgyTNkTRX0uhGjn9V0pOSZkh6WNJO1S/VzMyyajLcJdUAVwAHAzsBIxoJ7/ERsUtE9Ad+AlxS\n9UrNzCyzLD33PYG5ETEvIpYBE4BhlQ0i4vWKzU2BqF6JZmbWXBtlaNMNWFCxXQfs1bCRpFOBM4FO\nwH5Vqc7MzDZIlp67Gtm
|
2019-12-23 01:20:51 +08:00
|
|
|
"text/plain": [
|
2020-01-07 17:29:25 +08:00
|
|
|
"<matplotlib.figure.Figure at 0x1a10748940>"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"#type markov\n",
|
|
|
|
|
"x_train = train_df.iloc[:,0:5].values.copy()\n",
|
|
|
|
|
"y_train = train_df['label'].values.copy()\n",
|
|
|
|
|
"x_test = test_df.iloc[:,0:5].values.copy()\n",
|
|
|
|
|
"y_test = test_df['label'].values.copy()\n",
|
|
|
|
|
"lr_classifer = LogisticRegression()\n",
|
|
|
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|
|
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"svm_classifer = SVC()\n",
|
|
|
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|
|
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"gn_classifer = GaussianNB()\n",
|
|
|
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|
|
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|
|
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|
|
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|
|
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
|
|
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|
|
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|
|
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|
|
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|
|
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|
|
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|
|
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|
|
|
|
"print(score_df)\n",
|
|
|
|
|
"ax = score_df.plot.bar(title='type markov')\n",
|
|
|
|
|
"fig = ax.get_figure()\n",
|
|
|
|
|
"#fig.savefig('../figure/type.svg')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 13,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
" precision recall f1\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"LogisticRegression 0.833619 0.833619 0.833619\n",
|
|
|
|
|
"SVM 0.817773 0.817773 0.817773\n",
|
|
|
|
|
"GaussianNB 0.738116 0.738116 0.738116\n",
|
|
|
|
|
"tree 0.967452 0.967452 0.967452\n",
|
|
|
|
|
"RandomForest 0.967452 0.967452 0.967452\n"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2020-01-07 17:29:25 +08:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFcCAYAAAAzq/4LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm8VWXd9/HPlwOIqWgllokMziIyiziRZab5GA6ZQDml\nyZNoWlp39tTLfKyeskzFckhvQzTEIW8NDXPEebgZBBQQQ0I9N6ZIDggq0+/5Y60Nm8OBsw9uztpn\nre/79Tov9lrrOvv8zga+e+1rXeu6FBGYmVm+tMm6ADMzqz6Hu5lZDjnczcxyyOFuZpZDDnczsxxy\nuJuZ5ZDD3VodSSFplwx+7sGS6lv6566PpAsl/TnrOqw2OdxtgyTNl/SlrOvIQlZvImbV4HC3miTp\nBkmnZF1HrZLUNusarLY53G29JN0EdAHulvS+pP+Q9DdJ323Qboako9PHIelsSfMkvSXpt5LalLU9\nVdJsSW9Luk9S149Z42aSLpH0qqQ3JF0jafP02MGS6iWdJ+lNSa9L+lbZ935a0t2S3pM0SdIvJD2R\nHnssbTY9/d2Hln1fo8/XSG2PpM/5VPocd6c/c2zZz+xW1n6UpNfSY1MkHVR27EJJf5H0Z0nvAac0\n+FntJI2TdIek9unrcrmkBenX5ZI2S9vOlnRk2fe2Tf+u+m3UX4LVJIe7rVdEnAi8Cnw1IraMiN8A\nY4ATSm0k9QZ2ACaUfesxwACgH3AUcGra9mjg/wDHAp2Ax4FxH7PMi4HdgD7ALmktF5Qd/yywdbr/\nNOBKSZ9Mj10JLEnbnJx+lX73wenD3unvfmsFz9eYYcCJafudgaeB0cCngNnAz8raTkp/j08BNwO3\nS+pQdvwo4C/ANsDY0s70zewu4CPg+IhYBvwEGJQ+X29gIPDT9FvGAcPLnvcw4K2ImLqB38Nam4jw\nl7/W+wXMB75Utr0Z8G9g13T7EuCqsuMBHF62PRJ4KH18L3Ba2bE2wFKgayM/9wbglPXUFCRBLpJw\n3rns2H7AP9PHBwMfAG3Ljr9JEnp1wHJg97JjvwCeaPhzyrbX+3zrqfMR4Cdl278D7i3b/iowbQOv\n/dskby4AFwKPNTh+ITAeeBS4AlDZsZeBI8q2DwPmp493ARYDn0i3xwIXZP1vzV/V/fKZuzVLRHwE\n3AackHa3DAduatDstbLHrwCfSx93BUZJekfSOyRvEiI5qy1175SOfQO4qrQt6apGyukEfAKYUvZ9\nf0/3lyyKiBVl20uBLdM2bRvUWv54fdb3fOvzRtnjDxrZXv29aXfPbEnvpr/L1sC2TdQ3COgF/DrS\npE59juS1L1n99xARc0k+NXxV0ieAISSfFCxHfFHGmtLYtKFjSAL9CWBpRDzd4PiOwMz0cRdgQfr4\nNeCXETGWRkREr9JjSTcAj0TEDRuo7S2SgNwrIv5nw7/GOhYCK4DOwEtldWci7V//EXAIMDMiVkl6\nm+TNr6Sxv4v7gRnAQ5IOjojSm8cCkjfTxv4eYE3XTBtgVhr4liM+c7emvAHsVL4jDfNVJN0MDc/a\nAX4o6ZOSdgTOAUr91dcAP5a0F4CkrSV9fWMLi4hVwHXAZZK2S59zB0mHVfC9K4H/Ai6U9AlJewAn\nNWi2zu++CW1F8mazEGgr6QKgYyXfGMm1kJtJAr50pj8O+KmkTum+C4DyMfG3AF8GzsBn7bnkcLem\n/IokJN6R9IOy/TcCe7N2YJT8FZgCTAP+BlwPEBF3klwAvSUd8fEC8JWPWd+PgLnAM+lzPgjsXuH3\nnkXS9fEvkjepcSQXJUsuBMakv/vxH7POptxHck3iJZIulA+prJsIgIj4OclF1QclfYrk+sFkkrP6\n54Gp6b5S+9dJLu7uz5o3X8sRrd1NZ1YZSScBIyLiwAb7g+Ria6v7mC/pYuCzEXFyk43NapzP3K3Z\n0otwI4Frs67l45C0h6ReSgwkGdp4Z9Z1mVWDw92aJe3PXkjSH93a+2q3Iul3X0IyAuh3JF1KZq2e\nu2XMzHLIZ+5mZjnkcDczy6Emb2KS9CfgSODNiOjZyHEBo4AjSO7WOyUqmKNi2223jW7dujW7YDOz\nIpsyZcpbEdGpqXaV3KF6A/AHknHNjfkKsGv6tS9wdfrnBnXr1o3JkydX8OPNzKxE0itNt6qgWyYi\nHiOZA2R9jgJujMQzwDaStq+sTDMz2xSq0ee+A2vfSVef7luHpBGSJkuavHDhwir8aDMza0w1wl2N\n7Gt0fGVEXBsRAyJiQKdOTXYZmZnZRqrGrJD1rD2bXmfWnn3ObC3Lly+nvr6eDz/8MOtSWqUOHTrQ\nuXNn2rVrl3UpVsOqEe7jgbMk3UJyIfXddFIis0bV19ez1VZb0a1bN5LBVlapiGDRokXU19fTvXv3\nrMuxGlbJUMhxJCvQbCupnmRZsHYAEXENyfJqR5DMzLcUWO+akmYAH374oYN9I0ni05/+NL5mZU1p\nMtwjYngTxwM4s2oVWSE42DeeXzurhO9QNaui/ffff4PHjzjiCN55550WqsaKzMvsWea6nf+3qj7f\n/F//r6o8z8qVK6mrq2vW9zz11FMbPD5hwoSPU1KzNee1nd/hGxW33bt7l4rbPn/y8xW33ZSK9lr4\nzN0Kaf78+eyxxx6cfPLJ9OrVi+OOO46lS5fSrVs3LrroIg488EBuv/12Xn75ZQ4//HD69+/PQQcd\nxIsvvgjAG2+8wTHHHEPv3r3p3bv36lDfcstkvevXX3+dwYMH06dPH3r27Mnjjz8OJHdmv/XWWwBc\neuml9OzZk549e3L55ZevrmvPPffk9NNPZ6+99uLLX/4yH3zwQUu/PJYDDncrrDlz5jBixAhmzJhB\nx44dueqqq4BkqOETTzzBsGHDGDFiBL///e+ZMmUKl1xyCSNHjgTg7LPP5vOf/zzTp09n6tSp7LXX\nXms9980338xhhx3GtGnTmD59On369Fnr+JQpUxg9ejTPPvsszzzzDNdddx3PPfccAP/4xz8488wz\nmTlzJttssw133HFHC7waljfulrHC2nHHHTnggAMAOOGEE7jiiisAGDp0KADvv/8+Tz31FF//+po1\nvD/6KFli9eGHH+bGG5Pplurq6th6663Xeu599tmHU089leXLl3P00UevE+5PPPEExxxzDFtssQUA\nxx57LI8//jhDhgyhe/fuq9v379+f+fPnV/k3tyLwmbsVVsNRJ6XtUuCuWrWKbbbZhmnTpq3+mj17\ndkXPPXjwYB577DF22GEHTjzxxNVvBCUbWiRns802W/24rq6OFStWVPQzzco53K2wXn31VZ5++mkA\nxo0bx4EHrrXWNx07dqR79+7cfvvtQBLI06dPB+CQQw7h6quvBpILr++9995a3/vKK6+w3Xbbcfrp\np3Paaacxderas2APHjyYu+66i6VLl7JkyRLuvPNODjrooE3ye1oxOdytsPbcc0/GjBlDr169+Pe/\n/80ZZ5yxTpuxY8dy/fXX07t3b/baay/++tdkidVRo0YxceJE9t57b/r378/MmTPX+r5HHnmEPn36\n0LdvX+644w7OOeectY7369ePU045hYEDB7Lvvvvy7W9/m759+266X9YKJ7M1VAcMGBCez72YZs+e\nzZ577plpDfPnz+fII4/khRdeyLSOjVXpa1i04X8bkpfXQtKUiBjQVDufuZuZ5ZDD3QqpW7durfas\n3awSDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3q5L58+fTs2dPIBnnfuSRR2ZckRWZ55ax7F24ddNt\nmvV87zareUQQEbRp43Mdyw//a7ZCKk2tO3LkSPr168dNN93EfvvtR79+/fj617/O+++/D8CkSZPY\nf//96d27NwMHDmTx4sXMnz+fgw46iH79+tGvX78m53A3y4LD3Qprzpw5nHTSSTzwwANcf/31PPjg\ng0ydOpUBAwZw6aWXsmzZMoYOHcqoUaOYPn06Dz74IJtvvjnbbbcdDzzwAFOnTuXWW2/l7LPPzvpX\nMVuHu2WssLp27cqgQYO
|
2019-12-23 01:20:51 +08:00
|
|
|
"text/plain": [
|
2020-01-07 17:29:25 +08:00
|
|
|
"<matplotlib.figure.Figure at 0x1a1deb3438>"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"#type+length markov\n",
|
|
|
|
|
"x_train = train_df.iloc[:,0:10].values.copy()\n",
|
|
|
|
|
"y_train = train_df['label'].values.copy()\n",
|
|
|
|
|
"x_test = test_df.iloc[:,0:10].values.copy()\n",
|
|
|
|
|
"y_test = test_df['label'].values.copy()\n",
|
|
|
|
|
"lr_classifer = LogisticRegression()\n",
|
|
|
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|
|
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"svm_classifer = SVC()\n",
|
|
|
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|
|
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"gn_classifer = GaussianNB()\n",
|
|
|
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|
|
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|
|
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|
|
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|
|
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
|
|
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|
|
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|
|
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|
|
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|
|
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|
|
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|
|
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|
|
|
|
"print(score_df)\n",
|
|
|
|
|
"ax = score_df.plot.bar(title='type+length markov')\n",
|
|
|
|
|
"fig = ax.get_figure()\n",
|
|
|
|
|
"#fig.savefig('../figure/type_length.svg')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 37,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2020-01-07 17:29:25 +08:00
|
|
|
"[4670, 5]\n",
|
|
|
|
|
"0.32762312633832974\n",
|
|
|
|
|
"0.998692810458\n",
|
|
|
|
|
"0.998692810458\n",
|
|
|
|
|
"0.998692810458\n"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2020-01-07 17:29:25 +08:00
|
|
|
"\"\\nrf_precision = precision_score(y_test, y_pred, average='micro')\\nrf_recall = recall_score(y_test, y_pred, average='micro')\\nrf_f1 = f1_score(y_test, y_pred, average='micro')\\nscore_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], columns = ['precision', 'recall', 'f1'])\\nscore_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\\nscore_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\\nscore_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\\nscore_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\\nscore_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\\nax = score_df.plot.bar(title='type+length+burst markov')\\nfig = ax.get_figure()\\nprint(score_df)\\n#fig.savefig('../figure/type_length_burst.svg')\\n\""
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
2020-01-07 17:29:25 +08:00
|
|
|
"execution_count": 37,
|
2019-12-23 01:20:51 +08:00
|
|
|
"metadata": {},
|
2020-01-07 17:29:25 +08:00
|
|
|
"output_type": "execute_result"
|
2019-12-23 01:20:51 +08:00
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"#type+length+burst markov\n",
|
|
|
|
|
"x_train = train_df.iloc[:,0:15].values.copy()\n",
|
|
|
|
|
"y_train = train_df['label'].values.copy()\n",
|
|
|
|
|
"x_test = test_df.iloc[:,0:15].values.copy()\n",
|
|
|
|
|
"y_test = test_df['label'].values.copy()\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"\n",
|
|
|
|
|
"def my_pred(y_pred, y_test, proba):\n",
|
|
|
|
|
" y_pred1 = list()\n",
|
|
|
|
|
" y_test1 = list()\n",
|
|
|
|
|
" [rows, clos] = proba.shape\n",
|
|
|
|
|
" print([rows, clos])\n",
|
|
|
|
|
" for i in range(rows):\n",
|
|
|
|
|
" temp = max(proba[i])\n",
|
|
|
|
|
" if temp < 0.95:\n",
|
|
|
|
|
" continue\n",
|
|
|
|
|
" y_pred1.append(y_pred[i])\n",
|
|
|
|
|
" y_test1.append(y_test[i])\n",
|
|
|
|
|
" f1 = f1_score(y_test1, y_pred1, average='micro')\n",
|
|
|
|
|
" recall = recall_score(y_test1, y_pred1, average='micro')\n",
|
|
|
|
|
" precision = precision_score(y_test1, y_pred1, average='micro')\n",
|
|
|
|
|
" print(len(y_test1) / len(y_test))\n",
|
|
|
|
|
" print(precision)\n",
|
|
|
|
|
" print(recall)\n",
|
|
|
|
|
" print(f1)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"'''\n",
|
2019-12-23 01:20:51 +08:00
|
|
|
"lr_classifer = LogisticRegression()\n",
|
|
|
|
|
"lr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = lr_classifer.predict(x_test)\n",
|
|
|
|
|
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"svm_classifer = SVC()\n",
|
|
|
|
|
"svm_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = svm_classifer.predict(x_test)\n",
|
|
|
|
|
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"gn_classifer = GaussianNB()\n",
|
|
|
|
|
"gn_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = gn_classifer.predict(x_test)\n",
|
|
|
|
|
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"tr_classifer = tree.DecisionTreeClassifier()\n",
|
|
|
|
|
"tr_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = tr_classifer.predict(x_test)\n",
|
|
|
|
|
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"'''\n",
|
2019-12-23 01:20:51 +08:00
|
|
|
"\n",
|
|
|
|
|
"rf_classifer = RandomForestClassifier()\n",
|
|
|
|
|
"rf_classifer.fit(x_train, y_train)\n",
|
|
|
|
|
"y_pred = rf_classifer.predict(x_test)\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"proba = rf_classifer.predict_proba(x_test)\n",
|
|
|
|
|
"my_pred(y_pred, y_test, proba)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"'''\n",
|
2019-12-23 01:20:51 +08:00
|
|
|
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
|
|
|
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|
|
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|
|
|
|
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
|
|
|
|
|
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
|
|
|
|
|
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
|
|
|
|
|
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
|
|
|
|
|
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
|
|
|
|
|
"ax = score_df.plot.bar(title='type+length+burst markov')\n",
|
|
|
|
|
"fig = ax.get_figure()\n",
|
|
|
|
|
"print(score_df)\n",
|
2020-01-07 17:29:25 +08:00
|
|
|
"#fig.savefig('../figure/type_length_burst.svg')\n",
|
|
|
|
|
"'''"
|
2019-12-23 01:20:51 +08:00
|
|
|
]
|
|
|
|
|
},
|
2020-01-07 17:29:25 +08:00
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
2019-12-23 01:20:51 +08:00
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.6.2"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|