This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
cuiyiming-gradproj/Experiment/MarkovModel/markov_tofig.ipynb

616 lines
57 KiB
Plaintext
Raw Normal View History

2019-12-23 01:20:51 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 169,
2019-12-23 01:20:51 +08:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"end2\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"#date = '2019-12-20_21'\n",
"date = 'noProxy/All'\n",
2019-12-23 01:20:51 +08:00
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
"train_path = root_dir + 'Experiment/MarkovModel/CsvFile/' + date + '/train_15_0.8.csv'\n",
"test_path = root_dir + 'Experiment/MarkovModel/CsvFile/' + date + '/test_15_0.8.csv'\n",
2019-12-23 01:20:51 +08:00
"train_df = pd.read_csv(train_path,index_col=0)\n",
"test_df = pd.read_csv(test_path,index_col=0)\n",
"print('end2')"
]
},
{
"cell_type": "code",
"execution_count": 170,
2019-12-23 01:20:51 +08:00
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn import tree\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
"\n",
"%matplotlib inline\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 171,
2019-12-23 01:20:51 +08:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1\n",
"LogisticRegression 0.447211 0.447211 0.447211\n",
"SVM 0.451245 0.451245 0.451245\n",
"GaussianNB 0.371946 0.371946 0.371946\n",
"tree 0.623329 0.623329 0.623329\n",
"RandomForest 0.626095 0.626095 0.626095\n"
2019-12-23 01:20:51 +08:00
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAFXCAYAAAC2rmX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHwBJREFUeJzt3X+8VXWd7/HX2x9xTJQAT6NCCv7IX2OKFwQV64AJTIMj\nUc44hKKk3tvth2VQQNNENqOIPjLnTlnOlWuZk3PLWxqZioL5IwSOFioBJaJ1NAlQUTQE9HP/WOvI\ncbdh73Pc56y913o/H4/zYK21v3udz1mPw/us/V3r+12KCMzMLD92y7oAMzOrLQe7mVnOONjNzHLG\nwW5mljMOdjOznHGwm5nljIPdrEYkDZL0VNZ1mDnYre5I8uAKs7fBwW5mljMOdqsbkq6StCFd3iBp\ndbo8VtKiDu2uljRdUouk5ZJ+IWmdpK92aPMhSasl/UnS7Arf9wZJP5X0rKQrJD0t6br0tQsl/UHS\nc5K+WPKeT0iaJ+l3Zfb5AUmPS2pO10+Q9Ei6n29I2l3SwZJWdnjP5yT9a5cPoFm7iPCXv+rqK/m1\nfMv6HsCfgP7p+u+AwUALsB0YBvQFngRGAM3AmrRNH+A3wJBdfL8bgHnAp4HfAkPSfTUBDwIDgd7A\nemCfDu/5PfBxoF+6bRDwFHA0sBI4JN2+Z7q/D6X7XAB8In1tKTA4Xb5rV3X6y1/VfvmM3epeRGwH\nfgacIelo4MWIWJu+/GhELIuIF4DbSYJ9BDAAWAysBg4AjqnwbX4JvEwStC8Au0XEFuAc4GPAzSR/\nPPp3eM/tEXF9RDzfYVtv4IfAq0B7jUcCWyPi9nSf3yIJeYBbgL+R9E5gQET8qtrjYrYzDnZrFLcA\nE4C/IwnOduqwvBvwRrptUUTsHxH7AwcBP66w/+0l/yLpEOAXJGfqFwNtJe95qMx+9gU+S3LmP6nD\n9tILwu11/xD4G+A0YH6FGs2q4mC3erRR0mBJe0p6V7rtLmA48PfAjzq0fZ+k4ZL6kZwFLyYJ3BMk\nHS2pCbiHJDg76wTgD8B3geOB91TxnmcjYgHwFeCrknoBq4Be6bWCJuB/kHy6ICKeJPkk8OGSn8us\nyxzsVo++QNK3/RxwLEBEbCUJ6DfSMGz3a+BrJH3a89JumT+R9H3/BHgauD8ibutCHXeTnGk/B5xJ\n0m//3mreGBG/Be4DPhkR24CPAnPSelYB3+nQ/HagJSKWdaFGs7+gCN8ybPVP0juAL5L0VV+RbmsB\nZkdES4almdWdPbIuwKxKS0nuLvlA1oWY1TufsZuZ5Yz72M3McsbBbmaWMw52M7OcyeTi6X777ReD\nBg3K4lubmTWshx9+eENENFdql0mwDxo0iNbW1iy+tZlZw5L0dDXt3BVjZpYzDnYzs5xxsJuZ5Uzd\njDzdtm0bbW1tbNmyJetSGlJTUxMDBw5kzz33zLoUM8tY3QR7W1sb++yzD4MGDUJS5TfYmyKCjRs3\n0tbWxuDBg7Mux8wyVjddMVu2bKF///4O9S6QRP/+/f1px8yAOgp2wKH+NvjYmVm7uumKKTVoxs9q\nur+n5vxtTfdXjeeee4558+Yxa9asmrQzM6tG3QZ7Huy///5VhXW17cysazpzovhU06TKjVLHDj6o\n6raPTXms6rZvl4O9g9mzZ7N06VI2bdrEgAED+MEPfsBpp53GSSedxK9+9SvuuOMO1q1bx5QpU3jh\nhReYMGECM2fOZM2aNVxwwQW8/PLLjBkzhssuuwyAp556itmzZ3PDDTcAsHr1aqZOncrWrVuZMGEC\nX/rSl8q2W7t2LVOnTuWVV17hrLPOYvr06Zx33nkceuih3H777Uhi4cKFNDU1ZXGYrEEULcxsh7rq\nY68HJ598Mg8++CD9+/fn1ltvZcmSJQwbNow77rgDgMsvv5yzzz6bJUuWcOutt7Jx40amT5/OZZdd\nRmtrK1u2bGHz5s1l9z1//nwmTpzIsmXLOOignf/nmD59OpdeeikPPfQQP//5z1m5ciUAL774IosX\nL+aII47gkUceqf0Pb2a54GAvMWzYMACOP/541q5dyzHHHMPEiRPffH316tVce+21tLS0sHnzZp59\n9llWrVr15vvmzp3L3nvvXXbf55xzDitWrGD8+PE7DX+AlStXctJJJ7Hbbrtx4oknsmrVKgDOP/98\nAA4++GC2bt1ak5/XzPLHwV5iyZIlADzyyCMceuih9O7d+y2vH3HEEcyZM4d7772XadOm0bdvX448\n8kiWLl0KwLhx43jiiSfK7nvhwoXMmDGD2267jSuuuIJt27aVbXf00Ufz0EMPEREsW7aMo446CuAv\najEzK8fBXqK1tZWRI0eyadMmzjjjjL94fcaMGVx55ZWMGDGCu+++m/3335+5c+cya9YsRowYwSmn\nnMLhhx9edt+HHXYY55xzDsOGDWPcuHE7HSU6d+5cvvzlLzN8+HDGjRvHkUceWdOf0czyreIzTyU1\nAT8C3gM8CpwbZd4k6QvAROAF4MyI2GlfwdChQ6N02t6VK1e+eWaaldmzZ9PS0kJLS0umdXRVPRxD\nqx++eLpDXo6FpIcjYmildtWcsU8G2iLiOKAvcHqZb3YIcExEjAB+DgzsZL11oT3YzcwaWTXBPhpY\nkC4vBEaVaXMa0FfSfcCpwNralGdmZp1VTbD3Bzalyy8B/cq0aQbWR8T7Sc7WR5Y2kHSRpFZJrevX\nr+9qvWZmVkE1wb4B6JMu90nXS70ErE6XnwQGlDaIiOsiYmhEDG1urvjIPjMz66Jqgv0eYEy6PBpY\nVKbNw0B7h/5hJOFuZmYZqGZKgZuAiZIeBZYDayRdFRHT2htExGJJ50paBqyMiKVvu7LZfSq36dT+\nNlVuU2P33nsv9957L7Nnz97lNjOzWqoY7BHxGjC+ZPO0Mu0+UauizMys6zxAqYOWlhZmzpzJuHHj\nAFi3bh3jxo1j+PDhXH755QCsWbOGUaNGMXTo0DdnZFyxYgXDhg1j+PDhXHvttZnVb2YGDva36OqE\nX8888wzXX3898+fPZ968eRn/FGZWdJ62t4NyE34tXryYG264YacTfu2+++7svvvuzJo1i/3224/t\n27dnVb6ZGeAz9rfo6oRfs2fP5jvf+Q5z5szh9ddfz6J0M7M3+Yx9F2bMmMHUqVOZOXMmhx12GJMn\nT2bu3LlccMEFbNmyhbFjx3L44YczceJExo4dyyGHHML27dvZsmWLH4JhZpmpOAlYd6jXScAanY+h\ndZSXia9qIS/HopaTgJmZWQNxsJuZ5YyD3cwsZxzsZmY542A3M8uZur3d8djvHlvT/VV7RXrTpk1M\nmDCBbdu2cckllzB27FjOPfdcbrnllprWY2bWXXzGXmL58uWccsopPPDAAwwZMoQTTzyR5cuXZ12W\nmVnVHOwdXH311Xzyk5/ke9/7HiNHjqR3796sWLEi67LMzDqlbrtisvC5z32OIUOGeL50M2toPmM3\nM8sZB7uZWc442M3McqZu+9izmjyopaWFlpaWt2x74oknMqnFzKwrfMZuZpYzDnYzs5ypq2DPYm74\nvPCxM7N2dRPsTU1NbNy40QHVBRHBxo0b/dQmMwPq6OLpwIEDaWtrY/369VmX0pCampoYOHBg1mWY\nWR2om2Dfc889GTx4cNZlmJk1vF12xUhqkjRf0nJJN0pSmTbDJLVJeiD9OqL7yjUzs0oq9bFPBtoi\n4jigL3B6mTZ9gWsjYmT6tbrWRZqZWfUqBftoYEG6vBAYVaZNX+AjkpZKuqXcWb2ZmfWcSsHeH9iU\nLr8E9CvT5gngyxFxInAA8IFyO5J0kaRWSa2+QGpm1n0qBfsGoE+63CddL/UUcHeH5XeX21FEXBcR\nQyNiaHNzc+crNTOzqlQK9nuAMenyaGBRmTaXAGdL2g34a+Dx2pVnZmadVSnYbwIGSHoUeB5YI+mq\nkjb/DpwPLAF+HBG/qX2ZZmZWrV3exx4RrwHjSzZPK2nzR6CltmWZmVlX1c2UAmZmVhsOdjOznHGw\nm5nljIPdzCxnHOxmZjn
2019-12-23 01:20:51 +08:00
"text/plain": [
"<matplotlib.figure.Figure at 0x1a1f0e6278>"
2019-12-23 01:20:51 +08:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#type markov\n",
"x_train = train_df.iloc[:,0:5].values.copy()\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = test_df.iloc[:,0:5].values.copy()\n",
"y_test = test_df['label'].values.copy()\n",
"lr_classifer = LogisticRegression()\n",
"lr_classifer.fit(x_train, y_train)\n",
"y_pred = lr_classifer.predict(x_test)\n",
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"svm_classifer = SVC()\n",
"svm_classifer.fit(x_train, y_train)\n",
"y_pred = svm_classifer.predict(x_test)\n",
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"gn_classifer = GaussianNB()\n",
"gn_classifer.fit(x_train, y_train)\n",
"y_pred = gn_classifer.predict(x_test)\n",
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"tr_classifer = tree.DecisionTreeClassifier()\n",
"tr_classifer.fit(x_train, y_train)\n",
"y_pred = tr_classifer.predict(x_test)\n",
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"rf_classifer = RandomForestClassifier()\n",
"rf_classifer.fit(x_train, y_train)\n",
"y_pred = rf_classifer.predict(x_test)\n",
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
" columns = ['precision', 'recall', 'f1'])\n",
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
"print(score_df)\n",
"ax = score_df.plot.bar(title='type markov')\n",
"fig = ax.get_figure()\n",
"#fig.savefig('../figure/type.svg')"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LogisticRegression: \n",
"[ 0.81011609 0.86749717 0.9655914 0.46787479 0.75154004 0.72459016\n",
" 0.79276018 0.80684327 0.95348837 0.94557097]\n",
"[ 0.92782526 0.91408115 0.90707071 0.52592593 0.4187643 0.88844221\n",
" 0.89296636 0.73246493 0.95571096 0.848659 ]\n",
"[ 0.86498451 0.89018013 0.93541667 0.49520488 0.53783982 0.79819413\n",
" 0.83988495 0.76785714 0.95459837 0.89449773]\n",
"0.808587244328\n",
"0.801191080311\n",
"0.797865832655\n",
"SVM: \n",
"[ 0.85129118 0.8948626 0.91411043 0.4501992 0.43452958 0.62978723\n",
" 0.80304472 0.80848749 0.96424315 0.95048439]\n",
"[ 0.90788224 0.89379475 0.9030303 0.62777778 0.51258581 0.44623116\n",
" 0.86034659 0.74448898 0.94289044 0.84578544]\n",
"[ 0.87867647 0.89432836 0.90853659 0.52436195 0.47034121 0.52235294\n",
" 0.83070866 0.77516954 0.95344726 0.89508363]\n",
"0.770103998261\n",
"0.768481348614\n",
"0.765300659766\n",
"GaussianNB: \n",
"[ 0.76221198 0.76582278 0.86720322 0.2641844 0.36886633 0.83389831\n",
" 0.84943538 0.78398983 0.83895539 0.82690406]\n",
"[ 0.78537512 0.86634845 0.87070707 0.27592593 0.74828375 0.24723618\n",
" 0.69011213 0.61823647 0.8986014 0.80076628]\n",
"[ 0.77362021 0.81298992 0.86895161 0.26992754 0.49414431 0.38139535\n",
" 0.76152981 0.69131653 0.86775464 0.8136253 ]\n",
"0.716147167538\n",
"0.680159278335\n",
"0.673525522188\n",
"DecisionTree: \n",
"[ 0.79748823 0.86084142 0.95974576 0.48046875 0.91911765 0.95440085\n",
" 0.74148472 0.90111643 0.92674806 0.96805112]\n",
"[ 0.9648623 0.9522673 0.91515152 0.68333333 0.71510297 0.90452261\n",
" 0.86544343 0.56613226 0.97319347 0.87068966]\n",
"[ 0.87322733 0.90424929 0.9369183 0.56422018 0.8043758 0.92879257\n",
" 0.79868297 0.69538462 0.94940307 0.91679274]\n",
"0.850946297765\n",
"0.841069885565\n",
"0.837204688097\n",
"RandomForest: \n",
"[ 0.79686275 0.85353003 0.97494781 0.55539568 0.92587209 0.95278069\n",
" 0.77433628 0.92671756 0.94038245 0.97300104]\n",
"[ 0.9648623 0.96658711 0.94343434 0.71481481 0.72883295 0.91256281\n",
" 0.89194699 0.60821643 0.97435897 0.89750958]\n",
"[ 0.87285223 0.90654729 0.95893224 0.62510121 0.815621 0.93223819\n",
" 0.828991 0.73442226 0.95706926 0.93373194]\n",
"0.867382638472\n",
"0.860312631327\n",
"0.856550662602\n"
]
}
],
"source": [
"#type+length markov\n",
"\n",
"\n",
"def classify(classifer, name):\n",
" f1_score_list = list()\n",
" recall_score_list = list()\n",
" precision_score_list = list()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" precision_score_list.append(precision_score(y_test, y_pred, average=None))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average=None))\n",
" f1_score_list.append(f1_score(y_test, y_pred, average=None))\n",
" scores = [np.mean(precision_score_list, axis=0), np.mean(recall_score_list, axis=0), np.mean(f1_score_list, axis=0)]\n",
" print(name + \": \")\n",
" for score in scores:\n",
" print(score)\n",
" print(np.mean(np.mean(precision_score_list, axis=0)))\n",
" print(np.mean(np.mean(recall_score_list, axis=0)))\n",
" print(np.mean(np.mean(f1_score_list, axis=0)))\n",
" \n",
" \n",
"x_train = train_df.iloc[:,0:15].values.copy()\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = test_df.iloc[:,0:15].values.copy()\n",
"y_test = test_df['label'].values.copy()\n",
"\n",
"\n",
"classifer = LogisticRegression()\n",
"classify(classifer, \"LogisticRegression\")\n",
"\n",
"classifer = SVC()\n",
"classify(classifer, \"SVM\")\n",
"\n",
"classifer = GaussianNB()\n",
"classify(classifer, \"GaussianNB\")\n",
"\n",
"classifer = tree.DecisionTreeClassifier()\n",
"classify(classifer, \"DecisionTree\")\n",
"\n",
"classifer = RandomForestClassifier()\n",
"classify(classifer, \"RandomForest\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 173,
2019-12-23 01:20:51 +08:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1\n",
"LogisticRegression 0.767174 0.767174 0.767174\n",
"SVM 0.759451 0.759451 0.759451\n",
"GaussianNB 0.657561 0.657561 0.657561\n",
"tree 0.837137 0.837137 0.837137\n",
"RandomForest 0.843707 0.843707 0.843707\n"
]
}
],
"source": [
"#type+burst markov\n",
"x_train = np.append(train_df.iloc[:,0:5].values.copy(), train_df.iloc[:,10:15].values.copy(), axis = 1)\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = np.append(test_df.iloc[:,0:5].values.copy(), test_df.iloc[:,10:15].values.copy(), axis = 1)\n",
"y_test = test_df['label'].values.copy()\n",
"\n",
"\n",
"lr_classifer = LogisticRegression()\n",
"lr_classifer.fit(x_train, y_train)\n",
"y_pred = lr_classifer.predict(x_test)\n",
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"svm_classifer = SVC()\n",
"svm_classifer.fit(x_train, y_train)\n",
"y_pred = svm_classifer.predict(x_test)\n",
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"gn_classifer = GaussianNB()\n",
"gn_classifer.fit(x_train, y_train)\n",
"y_pred = gn_classifer.predict(x_test)\n",
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"tr_classifer = tree.DecisionTreeClassifier()\n",
"tr_classifer.fit(x_train, y_train)\n",
"y_pred = tr_classifer.predict(x_test)\n",
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"rf_classifer = RandomForestClassifier()\n",
"rf_classifer.fit(x_train, y_train)\n",
"y_pred = rf_classifer.predict(x_test)\n",
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
" columns = ['precision', 'recall', 'f1'])\n",
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
"print(score_df)\n",
"#ax = score_df.plot.bar(title='type+length markov')\n",
"#fig = ax.get_figure()\n",
"#fig.savefig('../figure/type_length.svg')\n",
"#print(score_df.loc['RandomForest']) "
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0.83686527 0.72249169 0.65245721 0.71014108 0.63185015 0.69424787\n",
" 0.68489078 0.52464035 0.82587551 0.84851884]\n",
"[ 0.83686527 0.72249169 0.65245721 0.71014108 0.63185015 0.42307153\n",
" 0. 0.15176948 0. 0. ]\n"
]
}
],
"source": [
"\n",
"x_train = train_df.iloc[:,0:10].values.copy()\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = test_df.iloc[:,0:15].values.copy()\n",
"y_test = test_df['label'].values.copy()\n",
"\n",
"x1_train = np.append(train_df.iloc[:,0:5].values.copy(), train_df.iloc[:,10:15].values.copy(), axis = 1)\n",
"y1_train = train_df['label'].values.copy()\n",
"\n",
"print(x_train[10])\n",
"print(x1_train[10])"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1\n",
"LogisticRegression 0.509336 0.509336 0.509336\n",
"SVM 0.501153 0.501153 0.501153\n",
"GaussianNB 0.413439 0.413439 0.413439\n",
"tree 0.624020 0.624020 0.624020\n",
"RandomForest 0.627593 0.627593 0.627593\n"
2019-12-23 01:20:51 +08:00
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAFXCAYAAAC2rmX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucVXW9//HX20uOeUHEKRRT8HK8ZWqBgGINqMCvg0VU\nJzNUJPM8unoyKbAssn5e0F8euxzLfnIws8uvrDQsTAVKjasWqAEn8ZJYEqCCaIjo5/fHWiOb3R72\nnnHPrL3Xej8fj3nMWmt/Z+3PLJj3rPmu9f0uRQRmZpYfO2RdgJmZ1ZeD3cwsZxzsZmY542A3M8sZ\nB7uZWc442M3McsbBbj1K0gRJM3r4PedKauuu9s0ii2Nv2XCwF5Skbh3AIKlN0tTufI8O3neGpAmd\n/Jq53VPNdt+zLYv3tWJwsJuZ5YyDvWAkXSVpbbq8VtKKdHmUpDkl7a6WNCk9s1wi6beSVkv6ckmb\nd0paIenvr+XsvNJ+JPWX9JikCyStkbRUUt/0tTdL+pOkv0j6rqQHJL0t/b5OB76Rfm8jS97m6LTd\n05I+WENZH5L0hKTFkg5J33ebvwZK/+qRFJLemrb/SrqtRdLNaf3/I2loun0ecAtwYlrn9CrH5zFJ\nP5D0N0mXpvs7S9IOkq5N/10eKf1+068ZIWmOpBsq7HOKpJ9J2iFdH59+zePt36OksyV9u+RrbpF0\nag3HzrIWEf4o4EfyT7/N+k7A34E+6fqfgQFAG7AFGAT0Bh4BhgCtwMq0TS/gT8BxwBeBp4CngY3p\n8qyS95kAzChZ72g//YEXgf+T1vZr4IL0a34MfBQ4Anii7PuYAUwo2zYXWAq8Efhguvz6tLangM0l\ny6ek7X+Zvu8k4LZK+y49hkAAdwHHAK9Pt40DFpGcQI0ELilp3wbMrfHf6jHgLOBmYDrwyfTz4HTb\nzsBQYGHZ19wPDAP2KD32wBnAb4GWdPvhwJPAgcCbgCeAo4G9gD+nbV6X7nOnrP/v+qP6h8/YDYCI\n2ALcBpwm6Ujg2Yh4NH15aUQsiohngF+RBPsQoB8wD1gB7AscFRGXRERfklC7KiL6RsTo7bx1xf2k\nrwm4OK1tIbBnun0TSei2f9TiGxGxGpgP7BkRL6S19QV+374cEXem7a9P3/cG4ITynUlShff4fEQs\niYgX0vUl6fd2CfAK8KUaa63k98BzJZ93iIgFwFXAl4FrgDeUfc20iLgnIp4r2fY24OvAHyNiU7rt\nVOCXEfF4RDwB/AwYGRHPAg+n/x9OAu5Kj4k1OAe7lboZGAu8C/hJyfbSENuBJKQEzCkJxwOAn3fh\nPbe3n6dKQrL0Yu8K4N9JujM+VuP7rKywn+15peRzpRDvV74hIuaXra8E3gI8CHyG5Cy7q7aUfSbt\nUvomsJjKx2F+hW17kZzp/y9JB5WWW9au/Xv+KfDO9OOnnS/bsuBgL651kgZI2lnSXum235D80P8b\n2/4Qv0XSYEl7k/yAzyMJjbdKOlJSC0k3xMntXxARcyNiag11bG8/r3TwNR8GhkfEQRFR/stkLUm3\nDpJaS7Z3tC8ioq3C5rMl7UjSbbEg3baBpLsC4PyO9tdO0kSSs/WfAleSdJeU1vkmSTtK2jt9r846\nkeR4/RL4QI1fc1f6C+cK4KvptjtJ/lI7QFI/4D3A7elrvyA5ox+atrMm4GAvrs8C95L0Kx8NEBGb\nSYLilYh4pKTtH4GvAMuA6Wm3zN9JAvYXwOPA3RFxa2eL6OJ+bgNWSHpK0iJJI0pe+xbwLklP89q6\nPraQ9DVPAD6Vbvsu8EFJd5Bcg6jmJ8D+wGrgv4GL21+IiAeBO0iO/4MkfdidNQN4H/AoyS+uVkm9\navzaG0h+ob4tIpYBk4HfkXT1fDEiHkjrXEdyNv/niHipCzVaBhTh+dgtIel1wOeAzRFxRbqtDZja\nwVltj5O0D0lgnkISZh8DTomI92RamFkDqfXCkxXDQpI7LN6RdSHb8SzJmfQqkr84/0pyl4iZpXzG\nbmaWM+5jNzPLGQe7mVnOONjNzHImk4un++yzT/Tv3z+LtzYza1r33Xff2ohordYuk2Dv378/ixcv\nzuKtzcyalqTHa2nnrhgzs5xxsJuZ5YyD3cwsZxpm5OlLL73EqlWr2LRpU/XG9k9aWlrYf//92Xnn\nnbMuxcwy1jDBvmrVKvbYYw/69+9P5amurSMRwbp161i1ahUDBgzIuhwzy1jDdMVs2rSJPn36ONS7\nQBJ9+vTxXztmBjRQsAMO9dfAx87M2jVMV0y5/pNvq+v+Hrv8X+u6v1o89dRTTJ8+nYsuuqgu7czM\natGwwZ4Hffv2rSmsa21nZl3TmRPFx1rOqLnt0QMOqLntA2c/UHPb18rBXmLq1KksXLiQ9evX069f\nP374wx9y8sknM3ToUP7whz8wa9YsVq9ezdlnn80zzzzD2LFjmTJlCitXruTcc8/lueeeY+TIkVx6\n6aUAPPbYY0ydOpUZM2YAsGLFCiZOnMjmzZsZO3Ysn//85yu2e/TRR5k4cSLPP/8873//+5k0aRIT\nJkzg4IMP5le/+hWSmD17Ni0tLVkcJmsSRQsz26qh+tgbwQknnMC9995Lnz59uOWWW1iwYAGDBg1i\n1qxZAFx22WWcfvrpLFiwgFtuuYV169YxadIkLr30UhYvXsymTZvYuHFjxX3PnDmTcePGsWjRIg44\noOMfjkmTJnHJJZcwf/58fv3rX7Ns2TIAnn32WebNm8dhhx3G/fffX/9v3sxywcFeZtCgQQAce+yx\nPProoxx11FGMGzfu1ddXrFjBtddeS1tbGxs3buSvf/0ry5cvf/Xrpk2bxm677VZx32eeeSYPPfQQ\nY8aM6TD8AZYtW8bQoUPZYYcdOP7441m+fDkA55xzDgAHHnggmzdvrsv3a2b542Avs2BB8kD6+++/\nn4MPPpjdd999m9cPO+wwLr/8cubOncuFF15I7969Ofzww1m4cCEAo0eP5uGHH66479mzZzN58mRu\nvfVWrrjiCl56qfKzgY888kjmz59PRLBo0SKOOOIIgH+qxcysEgd7mcWLFzNs2DDWr1/Paaed9k+v\nT548mSuvvJIhQ4Zw55130rdvX6ZNm8ZFF13EkCFDOPHEEzn00EMr7vuQQw7hzDPPZNCgQYwePbrD\nUaLTpk3j4osvZvDgwYwePZrDDz+8rt+jmeVb1WeeSmoBfgq8CVgKnBUVvkjSZ4FxwDPAuyOiw76C\ngQMHRvm0vcuWLXv1zDQrU6dOpa2tjba2tkzr6KpGOIbWOHzxdKu8HAtJ90XEwGrtajljHw+siohj\ngN7AqRXe7CDgqIgYAvwa2L+T9TaE9mA3M2tmtQT7COCOdHk2MLxCm5OB3pJ+B5wEPFqf8szMrLNq\nCfY+wPp0eQOwd4U2rcCaiHg7ydn6sPIGks6TtFjS4jVr1nS1XjMzq6KWYF8L9EqXe6Xr5TYAK9Ll\nR4B+5Q0i4rqIGBgRA1tbqz6yz8zMuqiWYL8LGJkujwDmVGhzH9DeoX8ISbibmVkGaplS4CZgnKSl\nwBJgpaSrIuLC9gYRMU/SWZIWAcsiYuFrrmxqr+ptOrW/9dXb1NncuXOZO3cuU6dO3e42M7N6qhrs\nEfEiMKZs84UV2n20XkWZmVnXeYBSiba2NqZMmcLo0aMBWL16NaNHj2bw4MFcdtllAKxcuZLhw4cz\ncODAV2dkfOihhxg0aBCDBw/m2muvzax+MzNwsG+jqxN+Pfnkk1x//fXMnDmT6dOnZ/xdmFnRedre\nEpUm/Jo3bx4zZszocMKvHXfckR133JGLLrqIffbZhy1btmRVvpkZ4DP2bXR1wq+pU6fyne98h8sv\nv5yXX345i9LNzF7lM/btmDx5MhMnTmTKlCkccsghjB8/nmnTpnHuueeyadMmRo0axaGHHsq4ceMY\nNWoUBx10EFu2bGHTpk1+CIaZZabqJGDdoVEnAWt2PoZWKi8TX9VDXo5FPScBMzOzJuJgNzPLGQe7\nmVnOONjNzHLGwW5mljM
2019-12-23 01:20:51 +08:00
"text/plain": [
"<matplotlib.figure.Figure at 0x1135ff390>"
2019-12-23 01:20:51 +08:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#type+length markov\n",
"x_train = train_df.iloc[:,0:10].values.copy()\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = test_df.iloc[:,0:10].values.copy()\n",
"y_test = test_df['label'].values.copy()\n",
"\n",
"\n",
"# x_test = np.append(test_df.iloc[:,0:5].values.copy(), test_df.iloc[:,5:10].values.copy(), axis = 1)\n",
"# y_test = test_df['label'].values.copy()\n",
"\n",
"def my_pred(y_pred, y_test, proba):\n",
" y_pred1 = list()\n",
" y_test1 = list()\n",
" [rows, clos] = proba.shape\n",
" print([rows, clos])\n",
" for i in range(rows):\n",
" temp = max(proba[i])\n",
" if temp < 0.5:\n",
" continue\n",
" y_pred1.append(y_pred[i])\n",
" y_test1.append(y_test[i])\n",
" f1 = f1_score(y_test1, y_pred1, average=None)\n",
" recall = recall_score(y_test1, y_pred1, average=None)\n",
" precision = precision_score(y_test1, y_pred1, average=None)\n",
" print(precision)\n",
" print(recall)\n",
" print(f1)\n",
" print(np.mean(precision))\n",
" print(np.mean(recall))\n",
" print(np.mean(f1))\n",
" print(str(len(y_test)) + \": \" + str(len(y_test1)) + \": \" + str(len(y_test1) / len(y_test)))\n",
" \n",
"\n",
2019-12-23 01:20:51 +08:00
"lr_classifer = LogisticRegression()\n",
"lr_classifer.fit(x_train, y_train)\n",
"y_pred = lr_classifer.predict(x_test)\n",
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"svm_classifer = SVC()\n",
"svm_classifer.fit(x_train, y_train)\n",
"y_pred = svm_classifer.predict(x_test)\n",
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"gn_classifer = GaussianNB()\n",
"gn_classifer.fit(x_train, y_train)\n",
"y_pred = gn_classifer.predict(x_test)\n",
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"tr_classifer = tree.DecisionTreeClassifier()\n",
"tr_classifer.fit(x_train, y_train)\n",
"y_pred = tr_classifer.predict(x_test)\n",
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"\n",
2019-12-23 01:20:51 +08:00
"rf_classifer = RandomForestClassifier()\n",
"rf_classifer.fit(x_train, y_train)\n",
"y_pred = rf_classifer.predict(x_test)\n",
"proba = rf_classifer.predict_proba(x_test)\n",
"#my_pred(y_pred, y_test, proba)\n",
"\n",
"\n",
2019-12-23 01:20:51 +08:00
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
" columns = ['precision', 'recall', 'f1'])\n",
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
"ax = score_df.plot.bar(title='type+length+burst markov')\n",
2019-12-23 01:20:51 +08:00
"fig = ax.get_figure()\n",
"print(score_df)\n",
"fig.savefig('type_length_burst.svg')"
2019-12-23 01:20:51 +08:00
]
},
{
"cell_type": "code",
"execution_count": 176,
2019-12-23 01:20:51 +08:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1\n",
"LogisticRegression 0.811319 0.811319 0.811319\n",
"SVM 0.767865 0.767865 0.767865\n",
"GaussianNB 0.683956 0.683956 0.683956\n",
"tree 0.826418 0.826418 0.826418\n",
"RandomForest 0.850161 0.850161 0.850161\n"
2019-12-23 01:20:51 +08:00
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAFXCAYAAAC2rmX2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucHGWd7/HPl4sMcglJGA0EIUBYbosRndxI0Ekgl8MG\nRXRdxAARNftyXXXBoARFI6hAyNHFy6J4yIKIR4+iglHAkMsKmNvAcSKYZCVcZFAwlyUxsgkEfvtH\n1ZBO0zPdM/RMzVR936/XvKaq+unq33Qy36l+qp6nFBGYmVl+7JF1AWZmVl8OdjOznHGwm5nljIPd\nzCxnHOxmZjnjYDczyxkHu/UqSTMk3dTLr7lUUnNPte8vsnjvLRsO9oKS1KMDGCQ1S5rTk6/Rweve\nJGlGF5+ztGeq6fQ1m7N4XSsGB7uZWc442AtG0jxJG9PljZLWpctTJC0pafcVSZekR5atkv5D0jOS\nPl/S5gxJ6yT9+dUcnVfaj6Rhkh6XdLGkDZJWSxqSPva3kn4n6Q+Svi3pt5Lekv5c5wBfS3+2ySUv\nc1LabrOk99ZQ1vskPSmpRdLw9HV3+zRQ+qlHUkh6c9r+ynRbg6Tb0vr/U9LYdPsy4HZgXFrn/Crv\nz+OSvifpT5K+lO7vfEl7SLo+/Xd5tPTnTZ8zUdISSTdX2OdsST+WtEe6Pj19zhPtP6OkCyR9s+Q5\nt0uaVMN7Z1mLCH8V8Cv5p99tfS/gz8DgdP33wJFAM7ATGAkMBB4FxgCNwPq0zQDgd8DJwGeBp4HN\nwLZ0+a6S15kB3FSy3tF+hgE7gP+d1nYncHH6nB8AHwaOB54s+zluAmaUbVsKrAZeD7w3XX5tWtvT\nwPMly6en7X+Wvu4lwM8r7bv0PQQCWASMAF6bbjsbWEVyADUZuKKkfTOwtMZ/q8eB84HbgPnAR9Pv\no9NtewNjgZVlz3kQGA8cUPreA+cC/wE0pNuPA54CjgDeADwJnAQcBPw+bfOadJ97Zf1/11/Vv3zE\nbgBExE7g58CZkk4Ano2Ix9KHV0fEqoj4L+AXJME+BhgKLAPWAYcAJ0bEFRExhCTU5kXEkIiY2slL\nV9xP+piAy9PaVgIHptu3k4Ru+1ctvhYRzwDLgQMj4rm0tiHAr9uXI+KetP2N6eveDJxSvjNJqvAa\nn46I1oh4Ll1vTX+2K4CXgM/VWGslvwb+UvJ9j4hYAcwDPg9cB7yu7DlzI+K+iPhLyba3AF8FfhMR\n29Ntk4CfRcQTEfEk8GNgckQ8CzyS/n84FViUvifWxznYrdRtwFnA24EflmwvDbE9SEJKwJKScDwc\n+Ek3XrOz/TxdEpKlJ3vXAf9I0p3xTzW+zvoK++nMSyXfK4X40PINEbG8bH098EbgIeATJEfZ3bWz\n7Dtpl9LXgRYqvw/LK2w7iORI/39JOqq03LJ27T/zj4Az0q8fdb1sy4KDvbg2STpS0t6SDkq3/ZLk\nl/497P5L/EZJoyUNIvkFX0YSGm+WdIKkBpJuiNPanxARSyNiTg11dLaflzp4zgeACRFxVESU/zHZ\nSNKtg6TGku0d7YuIaK6w+QJJe5J0W6xIt20l6a4A+HhH+2sn6UKSo/UfAdeSdJeU1vkGSXtKGpS+\nVleNI3m/fgb8Q43PWZT+wbkG+EK67R6ST2qHSxoKvBO4O33spyRH9GPTdtYPONiL65PA/ST9yicB\nRMTzJEHxUkQ8WtL2N8CVwBpgftot82eSgP0p8ARwb0Tc0dUiurmfnwPrJD0taZWkiSWPfQN4u6TN\nvLquj50kfc0zgI+l274NvFfSQpJzENX8EDgMeAb4d+Dy9gci4iFgIcn7/xBJH3ZX3QS8G3iM5A9X\no6QBNT73ZpI/qG+JiDXApcCvSLp6PhsRv03r3ERyNP/7iHihGzVaBhTh+dgtIek1wKeA5yPimnRb\nMzCng6PaXifpYJLAPJ0kzP4JOD0i3plpYWZ9SK0nnqwYVpJcYfG2rAvpxLMkR9JtJJ84/0hylYiZ\npXzEbmaWM+5jNzPLGQe7mVnOONjNzHImk5OnBx98cAwbNiyLlzYz67ceeOCBjRHRWK1dJsE+bNgw\nWlpasnhpM7N+S9ITtbRzV4yZWc442M3McsbBbmaWMx55ar3qhRdeoK2tje3bt1dvbK/Q0NDAYYcd\nxt577511KdaHOditV7W1tXHAAQcwbNgwKk9pbh2JCDZt2kRbWxtHHnlk1uVYH+auGOtV27dvZ/Dg\nwQ71bpDE4MGD/WnHqnKwW69zqHef3zurhbtiLFPDLv15Xff3+NV/V9f91eLpp59m/vz5XHbZZXVp\nZ/ZqOdjNXqUhQ4bUFNa1trP668oBxOMN59bc9qQjD6+57W8v+G3NbV8td8VY4cyZM4czzjiDcePG\n8Z73vIcXX3yR5uZmZs+ezdSpyX23n3nmGaZOncro0aO56qqrAFi/fj0TJkygqalpt4B+/PHHmTFj\nxsvr69atY9y4cYwcOZIvfvGLHbZ77LHHmDBhAqNGjeLaa68FYMaMGVx55ZWMHTuWU045xf3p1i0O\ndiukU045hfvvv5/Bgwdz++23s2LFCkaOHMldd90FwFVXXcU555zDihUruP3229m0aROXXHIJX/rS\nl2hpaWH79u1s27at4r4XLFjA2WefzapVqzj88I6P6C655BKuuOIKli9fzp133smaNWsAePbZZ1m2\nbBnHHnssDz74YP1/eMs9B7sV0siRIwF405vexGOPPcaJJ57I2Wef/fLj69at4/rrr6e5uZlt27bx\nxz/+kbVr1778vLlz57LffvtV3Pd5553Hww8/zLRp0zoMf4A1a9YwduxY9thjD0aNGsXatWsBeP/7\n3w/AEUccwfPPP1+Xn9eKxcFuhbRixQoAHnzwQY4++mj233//3R4/9thjufrqq1m6dCmzZs1i4MCB\nHHfccaxcuRKAqVOn8sgjj1Tc9+LFi7n00ku54447uOaaa3jhhcr3gD7hhBNYvnw5EcGqVas4/vjj\nAV5Ri1lXOditkFpaWhg/fjxbtmzhzDPPfMXjl156Kddeey1jxozhnnvuYciQIcydO5fLLruMMWPG\nMG7cOI455piK+x4+fDjnnXceI0eOZOrUqR2OEp07dy6XX345o0ePZurUqRx33HF1/RmtuDK552lT\nU1N42t5iWrNmzctHplmZM2cOzc3NNDc3Z1pHd9X6HhbtSpDO5OW9kPRARDRVa+fLHa1w5syZk3UJ\nZj3KXTFmZjlT9YhdUgPwI+ANwGrg/Cjrv5G0H/A94GDg/oj4ZA/Uupu8fLQyM6u3Wo7YpwNtETEC\nGAhMqtDmfcDyiBgHnCgp205UM7MCqyXYJwIL0+XFwIQKbZ4F9pe0J7Av4ItvzcwyUsvJ08HAlnR5\nK3BshTY/AT5FcuT+84hYX95A0kxgJtDpaDwrmDkD6ry/LdXb1NnSpUtZunTpbidlK20z6y21HLFv\nBNp/+wak6+VmA9dHxDBgkKRTyhtExA0R0RQRTY2Njd2t18zMqqgl2BcBk9PlicCSCm0OANpnK9oB\neOic9VndnfDr4YcfZuTIkYwePZrrr78+s/rNqqkl2G8FhkpaDWwG1kuaV9bmG8CHJS0j6WNfVN8y\nzeqnuxN+PfXUU9x4440sWLCA+fPnZ/xTmHWsah97ROwAppVtnlXW5nFgXP3KMus5lSb8WrZsGTfd\ndFOHE37tueee7Lnnnlx22WUcfPDB7Ny5M6vyzaryACUrnO5O+DVnzhy+9a1vcfXVV/Piiy9mUbpZ\nTTylgBXepZdeyoUXXsjs2bMZPnw406dPZ+7cuXzwgx9k+/btTJkyhWOOOYazzz6bKVOmcNRRR7Fz\n5062b99OQ0ND1uWbvYKD3bKV0eWJpYYMGcIvfvGL3bYNHz78Fe0uuugiLrroolfsr9KEYv15kjHr\n/xzsOeDpFcyslPvYzcxyxsFuZpYzDnYzs5xxsJuZ5YxPnlqmTrr5pLrur9aTuFu2bOGss87ihRde\n4OKLL2bKlCmcf/753Hb
2019-12-23 01:20:51 +08:00
"text/plain": [
"<matplotlib.figure.Figure at 0x1a1f919da0>"
2019-12-23 01:20:51 +08:00
]
},
"metadata": {},
"output_type": "display_data"
2019-12-23 01:20:51 +08:00
}
],
"source": [
"#type+length+burst markov\n",
"x_train = train_df.iloc[:,0:15].values.copy()\n",
"y_train = train_df['label'].values.copy()\n",
"x_test = test_df.iloc[:,0:15].values.copy()\n",
"y_test = test_df['label'].values.copy()\n",
2020-01-07 17:29:25 +08:00
"\n",
"\n",
"# x_test = np.append(test_df.iloc[:,0:5].values.copy(), test_df.iloc[:,5:10].values.copy(), axis = 1)\n",
"# y_test = test_df['label'].values.copy()\n",
"\n",
2020-01-07 17:29:25 +08:00
"def my_pred(y_pred, y_test, proba):\n",
" y_pred1 = list()\n",
" y_test1 = list()\n",
" [rows, clos] = proba.shape\n",
" print([rows, clos])\n",
" for i in range(rows):\n",
" temp = max(proba[i])\n",
" if temp < 0.5:\n",
2020-01-07 17:29:25 +08:00
" continue\n",
" y_pred1.append(y_pred[i])\n",
" y_test1.append(y_test[i])\n",
" f1 = f1_score(y_test1, y_pred1, average=None)\n",
" recall = recall_score(y_test1, y_pred1, average=None)\n",
" precision = precision_score(y_test1, y_pred1, average=None)\n",
2020-01-07 17:29:25 +08:00
" print(precision)\n",
" print(recall)\n",
" print(f1)\n",
" print(np.mean(precision))\n",
" print(np.mean(recall))\n",
" print(np.mean(f1))\n",
" print(str(len(y_test)) + \": \" + str(len(y_test1)) + \": \" + str(len(y_test1) / len(y_test)))\n",
" \n",
2020-01-07 17:29:25 +08:00
"\n",
2019-12-23 01:20:51 +08:00
"lr_classifer = LogisticRegression()\n",
"lr_classifer.fit(x_train, y_train)\n",
"y_pred = lr_classifer.predict(x_test)\n",
"lr_precision = precision_score(y_test, y_pred, average='micro')\n",
"lr_recall = recall_score(y_test, y_pred, average='micro')\n",
"lr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"svm_classifer = SVC()\n",
"svm_classifer.fit(x_train, y_train)\n",
"y_pred = svm_classifer.predict(x_test)\n",
"svm_precision = precision_score(y_test, y_pred, average='micro')\n",
"svm_recall = recall_score(y_test, y_pred, average='micro')\n",
"svm_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"gn_classifer = GaussianNB()\n",
"gn_classifer.fit(x_train, y_train)\n",
"y_pred = gn_classifer.predict(x_test)\n",
"gn_precision = precision_score(y_test, y_pred, average='micro')\n",
"gn_recall = recall_score(y_test, y_pred, average='micro')\n",
"gn_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
"tr_classifer = tree.DecisionTreeClassifier()\n",
"tr_classifer.fit(x_train, y_train)\n",
"y_pred = tr_classifer.predict(x_test)\n",
"tr_precision = precision_score(y_test, y_pred, average='micro')\n",
"tr_recall = recall_score(y_test, y_pred, average='micro')\n",
"tr_f1 = f1_score(y_test, y_pred, average='micro')\n",
"\n",
2019-12-23 01:20:51 +08:00
"\n",
"rf_classifer = RandomForestClassifier()\n",
"rf_classifer.fit(x_train, y_train)\n",
"y_pred = rf_classifer.predict(x_test)\n",
2020-01-07 17:29:25 +08:00
"proba = rf_classifer.predict_proba(x_test)\n",
"#my_pred(y_pred, y_test, proba)\n",
2020-01-07 17:29:25 +08:00
"\n",
"\n",
2019-12-23 01:20:51 +08:00
"rf_precision = precision_score(y_test, y_pred, average='micro')\n",
"rf_recall = recall_score(y_test, y_pred, average='micro')\n",
"rf_f1 = f1_score(y_test, y_pred, average='micro')\n",
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
" columns = ['precision', 'recall', 'f1'])\n",
"score_df.loc['LogisticRegression'] = [lr_precision, lr_recall, lr_f1]\n",
"score_df.loc['SVM'] = [svm_precision, svm_recall, svm_f1]\n",
"score_df.loc['GaussianNB'] = [gn_precision, gn_recall, gn_f1]\n",
"score_df.loc['tree'] = [tr_precision, tr_recall, tr_f1]\n",
"score_df.loc['RandomForest'] = [rf_precision, rf_recall, rf_f1]\n",
"ax = score_df.plot.bar(title='type+length+burst markov')\n",
"fig = ax.get_figure()\n",
"print(score_df)\n",
"fig.savefig('type_length_burst.svg')"
2019-12-23 01:20:51 +08:00
]
},
2020-01-07 17:29:25 +08:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
2019-12-23 01:20:51 +08:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}