596 lines
30 KiB
Plaintext
596 lines
30 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"zhihu 3488\n",
|
||
"weibo 2705\n",
|
||
"douyin 2072\n",
|
||
"hupu 1217\n",
|
||
"toutiao 1058\n",
|
||
"Name: 4, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"date = '2019-12-20_21'\n",
|
||
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
|
||
"example_label_file = root_dir + 'DataSet/result/' + date + '/stream_tag.txt'\n",
|
||
"example_label_df = pd.read_table(example_label_file, sep='\\s+', header=None)\n",
|
||
"example_label_df[3] = 443\n",
|
||
"example_label_df[4].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import json"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"ciper_suits = {\n",
|
||
" '1305':0,\n",
|
||
" 'C030':1,\n",
|
||
"\t'C02C':2,\n",
|
||
"\t'C028':3,\n",
|
||
"\t'C024':4,\n",
|
||
"\t'C014':5,\n",
|
||
"\t'C00A':6,\n",
|
||
"\t'00A5':7,\n",
|
||
"\t'00A3':8,\n",
|
||
"\t'00A1':9,\n",
|
||
"\t'009F':10,\n",
|
||
"\t'006B':11,\n",
|
||
"\t'006A':12,\n",
|
||
"\t'0069':13,\n",
|
||
"\t'0068':14,\n",
|
||
"\t'0039':15,\n",
|
||
"\t'0038':16,\n",
|
||
"\t'0037':17,\n",
|
||
"\t'0036':18,\n",
|
||
"\t'0088':19,\n",
|
||
"\t'0087':20,\n",
|
||
"\t'0086':21,\n",
|
||
"\t'0085':22,\n",
|
||
"\t'C019':23,\n",
|
||
"\t'00A7':24,\n",
|
||
"\t'006D':25,\n",
|
||
"\t'003A':26,\n",
|
||
"\t'0089':27,\n",
|
||
"\t'C032':28,\n",
|
||
"\t'C02E':29,\n",
|
||
"\t'C02A':30,\n",
|
||
"\t'C026':31,\n",
|
||
"\t'C00F':32,\n",
|
||
"\t'C005':33,\n",
|
||
"\t'009D':34,\n",
|
||
"\t'003D':35,\n",
|
||
"\t'0035':36,\n",
|
||
"\t'0084':37,\n",
|
||
"\t'008D':38,\n",
|
||
"\t'C02F':39,\n",
|
||
"\t'C02B':40,\n",
|
||
"\t'C027':41,\n",
|
||
"\t'C023':42,\n",
|
||
"\t'C013':43,\n",
|
||
"\t'C009':44,\n",
|
||
"\t'00A4':45,\n",
|
||
"\t'00A2':46,\n",
|
||
"\t'00A0':47,\n",
|
||
"\t'009E':48,\n",
|
||
"\t'0067':49,\n",
|
||
"\t'0040':50,\n",
|
||
"\t'003F':51,\n",
|
||
"\t'003E':52,\n",
|
||
"\t'0033':53,\n",
|
||
"\t'0032':54,\n",
|
||
"\t'0031':55,\n",
|
||
"\t'0030':56,\n",
|
||
"\t'009A':57,\n",
|
||
"\t'0099':58,\n",
|
||
"\t'0098':59,\n",
|
||
"\t'0097':60,\n",
|
||
"\t'0045':61,\n",
|
||
"\t'0044':62,\n",
|
||
"\t'0043':63,\n",
|
||
"\t'0042':64,\n",
|
||
"\t'C018':65,\n",
|
||
"\t'00A6':66,\n",
|
||
"\t'006C':67,\n",
|
||
"\t'0034':68,\n",
|
||
"\t'009B':69,\n",
|
||
"\t'0046':70,\n",
|
||
"\t'C031':71,\n",
|
||
"\t'C02D':72,\n",
|
||
"\t'C029':73,\n",
|
||
"\t'C025':74,\n",
|
||
"\t'C00E':75,\n",
|
||
"\t'C004':76,\n",
|
||
"\t'009C':77,\n",
|
||
"\t'003C':78,\n",
|
||
"\t'002F':79,\n",
|
||
"\t'0096':80,\n",
|
||
"\t'0041':81,\n",
|
||
"\t'008C':82,\n",
|
||
"\t'C012':83,\n",
|
||
"\t'C008':84,\n",
|
||
"\t'0016':85,\n",
|
||
"\t'0013':86,\n",
|
||
"\t'0010':87,\n",
|
||
"\t'000D':88,\n",
|
||
"\t'C017':89,\n",
|
||
"\t'001B':90,\n",
|
||
"\t'C00D':91,\n",
|
||
"\t'C003':92,\n",
|
||
"\t'000A':93,\n",
|
||
"\t'0007':94,\n",
|
||
"\t'008B':95,\n",
|
||
"\t'0021':96,\n",
|
||
"\t'001F':97,\n",
|
||
"\t'0025':98,\n",
|
||
"\t'0023':99,\n",
|
||
"\t'C011':100,\n",
|
||
"\t'C007':101,\n",
|
||
"\t'C016':102,\n",
|
||
"\t'0018':103,\n",
|
||
"\t'C00C':104,\n",
|
||
"\t'C002':105,\n",
|
||
"\t'0005':106,\n",
|
||
"\t'0004':107,\n",
|
||
"\t'008A':108,\n",
|
||
"\t'0020':109,\n",
|
||
"\t'0024':110,\n",
|
||
"\t'C010':111,\n",
|
||
"\t'C006':112,\n",
|
||
"\t'C015':113,\n",
|
||
"\t'C00B':114,\n",
|
||
"\t'C001':115,\n",
|
||
"\t'003B':116,\n",
|
||
"\t'0002':117,\n",
|
||
"\t'0001':118,\n",
|
||
" '1301':119,\n",
|
||
"\t'1302':120,\n",
|
||
"\t'1303':121,\n",
|
||
"\t'1304':122\n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"extensions = { \n",
|
||
" 0:0, \n",
|
||
" 1:1, \n",
|
||
" 2:2, \n",
|
||
" 3:3, \n",
|
||
" 4:4, \n",
|
||
" 5:5, \n",
|
||
" 6:6, \n",
|
||
" 7:7, \n",
|
||
" 8:8, \n",
|
||
" 9:9, \n",
|
||
" 10:10, \n",
|
||
" 11:11, \n",
|
||
" 12:12, \n",
|
||
" 13:13, \n",
|
||
" 14:14, \n",
|
||
" 15:15, \n",
|
||
" 16:16, \n",
|
||
" 17:17, \n",
|
||
" 18:18, \n",
|
||
" 19:19, \n",
|
||
" 20:20, \n",
|
||
" 21:21, \n",
|
||
" 22:22, \n",
|
||
" 23:23, \n",
|
||
" 24:24, \n",
|
||
" 25:25, \n",
|
||
" 26:26, \n",
|
||
" 27:27, \n",
|
||
" 28:28, \n",
|
||
" 29:29, \n",
|
||
" 30:30, \n",
|
||
" 31:31, \n",
|
||
" 35:32, \n",
|
||
" 65281:33 \n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"#TODO: 加入cipher suites,extensions特征\n",
|
||
"stream_stat_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\n",
|
||
"stm2cipherDict = dict()\n",
|
||
"stm2extenDict = dict()\n",
|
||
"with open(stream_stat_json_file) as f:\n",
|
||
" lines = f.readlines()\n",
|
||
" for line in lines:\n",
|
||
" line = json.loads(line)\n",
|
||
" flow_key = (line['sip'], line['sport'], line['dip'], line['dport'])\n",
|
||
" cipher_suites = line['tls']['cipher_suites']\n",
|
||
" extension_list = line['tls']['extensions_list']\n",
|
||
" stm2cipherDict[flow_key] = cipher_suites\n",
|
||
" stm2extenDict[flow_key] = extension_list"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"end\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'\\nkeys = set(example_label.keys()).difference(set(result_key))\\nexample_keys = example_label_df.iloc[:,0:4].values.copy()\\nfor i,value in enumerate(list(example_keys)):\\n #print(tuple(value))\\n if tuple(value) in keys:\\n print(i)\\n'"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"example_json_file = root_dir + 'DataSet/result/' + date + '/ssl_stat.txt'\n",
|
||
"example_json_f = open(example_json_file, 'r')\n",
|
||
"#array_shape = (1771,6)\n",
|
||
"result_data = list()\n",
|
||
"result_label = list()\n",
|
||
"result_key = list()\n",
|
||
"i = 0\n",
|
||
"for line in example_json_f.readlines():\n",
|
||
" example_json = ''\n",
|
||
" try:\n",
|
||
" example_json = json.loads(line)\n",
|
||
" except Exception:\n",
|
||
" continue\n",
|
||
" #标签\n",
|
||
" try:\n",
|
||
" flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])\n",
|
||
" result_label.append(example_label[flow_key])\n",
|
||
" result_key.append(flow_key)\n",
|
||
" ciphers = stm2cipherDict[flow_key]\n",
|
||
" extensions_list = stm2extenDict[flow_key]\n",
|
||
" except Exception:\n",
|
||
" continue\n",
|
||
" #print(example_json)\n",
|
||
" san_count = 0\n",
|
||
" cert_length = [0,0,0,0]\n",
|
||
" if 'san' in example_json:\n",
|
||
" san_count = len(example_json['san'].split(';'))\n",
|
||
" cert = example_json['Cert']\n",
|
||
" cert_count = cert['cert_count']\n",
|
||
" if cert_count != 0:\n",
|
||
" cert_length = [c['length'] for c in cert['cert_list']]\n",
|
||
" for i in range(4 - len(cert_length)):\n",
|
||
" cert_length.append(0)\n",
|
||
" result = [san_count, cert_count]\n",
|
||
" result += cert_length\n",
|
||
" #print(len(result))\n",
|
||
" \n",
|
||
" #tls\n",
|
||
" extensions_arr = np.zeros(34, dtype=np.uint8)\n",
|
||
" cipher_suits_arr = np.zeros(123, dtype=np.uint8)\n",
|
||
" for extension in extensions_list:\n",
|
||
" try:\n",
|
||
" extensions_arr[extensions[extension]]=1\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" for cipher in ciphers:\n",
|
||
" try:\n",
|
||
" cipher = cipher.upper()\n",
|
||
" cipher_suits_arr[ciper_suits[cipher]]=1\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" result += list(cipher_suits_arr)\n",
|
||
" result += list(extensions_arr)\n",
|
||
" result_data.append(result)\n",
|
||
" i += 1\n",
|
||
" \n",
|
||
"extensions_head = list()\n",
|
||
"for i in range(len(extensions)):\n",
|
||
" extensions_head.append('extension'+str(i))\n",
|
||
"cipher_head = ['cipher'+str(i) for i in range(len(ciper_suits))]\n",
|
||
"base_head = ['san_count', 'cert_count', 'cert_length1', 'cert_length2', 'cert_length3','cert_length4']\n",
|
||
"header = base_head+cipher_head+extensions_head\n",
|
||
"result_df = pd.DataFrame(result_data, columns=header)\n",
|
||
"result_df['label'] = np.array(result_label)\n",
|
||
"\n",
|
||
"print('end')\n",
|
||
"'''\n",
|
||
"keys = set(example_label.keys()).difference(set(result_key))\n",
|
||
"example_keys = example_label_df.iloc[:,0:4].values.copy()\n",
|
||
"for i,value in enumerate(list(example_keys)):\n",
|
||
" #print(tuple(value))\n",
|
||
" if tuple(value) in keys:\n",
|
||
" print(i)\n",
|
||
"'''"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"%matplotlib inline\n",
|
||
"import os\n",
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"164\n",
|
||
"[2357, 5]\n",
|
||
"0.999400838826\n",
|
||
"0.999400838826\n",
|
||
"0.999400838826\n",
|
||
" precision recall f1\n",
|
||
"LogisticRegression 0.00000 0.00000 0.00000\n",
|
||
"SVM 0.00000 0.00000 0.00000\n",
|
||
"GaussianNB 0.00000 0.00000 0.00000\n",
|
||
"tree 0.00000 0.00000 0.00000\n",
|
||
"RandomForest 0.89563 0.89563 0.89563\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFcCAYAAAAzq/4LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8VXWd//HXmyMXQ5FG0VG5HDIvIHIHwQuWZJk/Q00L\nKFPLYBLNSudCU9M4NjNl46hUmlmGZt5zNHQsL4l3cbgIKCCKeNATXknlpnLx8/tjrQObw4Gzwc1Z\n+6z1fj4e58Fea33PPp+zgfde+7u+6/tVRGBmZvnSJusCzMys8hzuZmY55HA3M8shh7uZWQ453M3M\ncsjhbmaWQw53q2qSaiWFpJ220ubfJb0p6VVJ3SWtlFTTknVuL0n/LOnXWddh+SOPc7dqJqkWeBFo\nGxHrmjjeDXgO6BERr7dsdS1L0gXAxyPi1KxrsernM3dr7XoAy7IO9q19sqjk95iVy+FuLU7SP0n6\ni6QVkhZKGilpqKQZkpZLek3SJWU8z6eA+4B90q6Yaxp340h6UNIPJT2W/rx7Je1R8hynSVoiaZmk\nf5FUlz4vktpImijphfT4LZL+Jj3W8HPOlPQS8EDJvvGSlkp6RdL5JT/rAkm/l/Q7ScuBM9J9v2v0\nnKdLeintavpeeuxY4J+B0envOqdSfx+WTw53a1GSDgTOAYZExK7AZ4A6YBIwKSI6AfsBtzT3XBFx\nP/BZYGlE7BIRZ2yh6ZeArwJ7Au2Av09r6Q1cAXwZ2BvYDdi35PvOBU4EjgL2Ad4CLm/03EcBvdLf\no8Engf2BTwMTG94sUicAvwc6A9dvod4jgAOBkcAPJPWKiD8B/wncnP6u/bbwvWaAw91a3nqgPdBb\nUtuIqIuIF4C1wMcl7RERKyNiWgV/5uSIeC4i3iV50+if7j8FuDMiHo2INcAPgNKLUH8HfC8i6iPi\nfeAC4JRG3SkXRMSq9Lkb/Fu672lgMjC25NgTEXFHRHzQ6Hto9P3vRsQcYA7gILdt5nC3FhURi4Bv\nkwTl65JukrQPcCZwAPCspOmSjm/8vZKOTLskVkqatw0/9tWSx6uBXdLH+wAvl9S2GlhW0rYHcLuk\ntyW9DSwgeXPaq6TNy2yudN+S9OdsrX259ZqVzeFuLS4iboiII0jCM4CLIuL5iBhL0nVyEfB7SR0b\nfd8jaZfELhFxcAVKeQXo2rAhaWdg95LjLwOfjYjOJV8dIuIvpWU18bzdSh53B5Y2075cHtpmZXO4\nW4uSdKCkoyW1B94D3gXWSzpVUpeI+AB4O22+fgeX83vgc5IOk9QO+DdAJcevBP5DUo+09i6STijj\nef9F0kckHUzS139zhep9DaiV5P+31iz/I7GW1h74MfAmSffDniSjQI4F5klaSXJxdUxEvLcjC4mI\necA3gZtIzuJXAK8D76dNJgFTgHslrQCmAYeW8dQPAYuAPwMXR8S9FSr51vTPZZJmVeg5Lad8E5NZ\nStIuJJ8a9o+IF7fj+2vZyg1XZi3JZ+5WaJI+l3ahdAQuBp4mGZpp1qo53K3oTiC54LmUZGz6mPDH\nWcsBd8uYmeWQz9zNzHLI4W5mlkOZzUq3xx57RG1tbVY/3sysVZo5c+abEdGluXaZhXttbS0zZszI\n6sebmbVKkpaU087dMmZmOeRwNzPLIYe7mVkOVdUyX2vXrqW+vp733tuhU4rkVocOHejatStt27bN\nuhQzy1hVhXt9fT277rortbW1SGr+G2yDiGDZsmXU19fTs2fPrMsxs4xVVbfMe++9x+677+5g3w6S\n2H333f2px8yAKgt3wMH+Ifi1M7MGVRfueXXYYYdt9fhxxx3H22+/vdU2Zmblqqo+98ZqJ/5vRZ+v\n7sf/ryLPs379empqarbpex5//PGtHr/77rs/TElm1oxtyZO6Dl8qu+0hPbuX3fbp058uu+2H5TP3\nRurq6jjooIM4/fTT6du3L6eccgqrV6+mtraWCy+8kCOOOIJbb72VF154gWOPPZZBgwZx5JFH8uyz\nzwLw2muvcdJJJ9GvXz/69eu3IdR32SVZ4/iVV15hxIgR9O/fnz59+vDII48AyR27b775JgCXXHIJ\nffr0oU+fPlx22WUb6urVqxfjxo3j4IMP5tOf/jTvvvtuS788ZtZKONybsHDhQsaPH8/cuXPp1KkT\nV1xxBZAMNXz00UcZM2YM48eP52c/+xkzZ87k4osvZsKECQCce+65HHXUUcyZM4dZs2Zx8MGbruN8\nww038JnPfIbZs2czZ84c+vfvv8nxmTNnMnnyZJ588kmmTZvGr371K5566ikAnn/+ec4++2zmzZtH\n586due2221rg1TCz1qiqu2Wy0q1bNw4//HAATj31VH76058CMHr0aABWrlzJ448/zhe+8IUN3/P+\n+8mymw888AC//e1vAaipqWG33Xbb5LmHDBnC1772NdauXcuJJ564Wbg/+uijnHTSSXTs2BGAz3/+\n8zzyyCOMGjWKnj17bmg/aNAg6urqKvybm1le+My9CY1HnTRsNwTuBx98QOfOnZk9e/aGrwULFpT1\n3CNGjODhhx9m33335Stf+cqGN4IGW1s8pX379hse19TUsG6dl+k0s6Y53Jvw0ksv8cQTTwBw4403\ncsQRR2xyvFOnTvTs2ZNbb00Wo48I5syZA8DIkSP5xS9+ASQXXpcvX77J9y5ZsoQ999yTcePGceaZ\nZzJr1qaL2I8YMYI77riD1atXs2rVKm6//XaOPPLIHfJ7mll+Odyb0KtXL6699lr69u3LX//6V846\n66zN2lx//fVcffXV9OvXj4MPPpg//OEPAEyaNImpU6dyyCGHMGjQIObNm7fJ9z344IP079+fAQMG\ncNttt/Gtb31rk+MDBw7kjDPOYOjQoRx66KF8/etfZ8CAATvulzWzXMpsDdXBgwdH4/ncFyxYQK9e\nvTKpp0FdXR3HH388zzzzTKZ1bK9qeA3NqlFehkJKmhkRg5tr5zN3M7Mccrg3Ultb22rP2s3MGjjc\nzcxyyOFuZpZDDnczsxxyuJuZ5ZDDvQXU1dXRp08fIBnnfvzxx2dckZnlXXXPLXPBbs232abne2eb\nmkcEEUGbNn4PNLPWxanVSMPUuhMmTGDgwIFcd911DB8+nIEDB/KFL3yBlStXAjB9+nQOO+ww+vXr\nx9ChQ1mxYgV1dXUceeSRDBw4kIEDBzY7h7uZ2Y7icG/CwoULOe2007jvvvu4+uqruf/++5k1axaD\nBw/mkksuYc2aNYwePZpJkyYxZ84c7r//fnbeeWf23HNP7rvvPmbNmsXNN9/Mueeem/WvYmYFVd3d\nMhnp0aMHw4YN46677mL+/Pkbpv9ds2YNw4cPZ+HChey9994MGTIESCYSA1i1ahXnnHMOs2fPpqam\nhueeey6z38HMiq2scJd0LDAJqAF+HRE/bnS8O3At0DltMzEiWu26cQ1T+0YExxxzDDfeeOMmx+fO\nndvkYtSXXnope+21F3PmzOGDDz6gQ4cOLVKvmVljzXbLSKoBLgc+C/QGxkrq3ajZ94FbImIAMAa4\notKFZmHYsGE89thjLFq0CIDVq1fz3HPPcdBBB7F06VKmT58OwIoVK1i3bh3vvPMOe++9N23atOG6\n665j/fr1WZZvZgVWTp/7UGBRRCyOiDXATcAJjdoE0Cl9vBuwtHIlZqdLly5cc801jB07lr59+zJs\n2DCeffZZ2rVrx80338w3v/lN+vXrxzHHHMN7773HhAkTuPbaaxk2bBjPPffchk8AZmYtrdkpfyWd\nAhwbEV9Pt78CHBoR55S02Ru4F/go0BH4VETMbOK5xgPjAbp37z5oyZIlmxz3dLUfnl9Ds6Z5yt8m\nnquJfY3fEcYC10REV+A44DpJmz13RFwVEYMjYnCXLl3K+NFmZrY9ygn3eqBbyXZXNu92ORO4BSAi\nngA6AHtUokAzM9t25YT7dGB/ST0ltSO5YDqlUZuXgJEAknqRhPsblSzUzMzK12y4R8Q64BzgHmAB\nyaiYeZIulDQqbXY+ME7SHOBG4IzIav0+MzMrb5x7Omb97kb7flDyeD5weGVLMzOz7eXpB8zMcsjh\n3shPf/pTevXqxcknn8zw4cNp3749F198cdZlmZltk6qeW+aQaw+p6POVM8b0iiuu4I9//CMdO3Zk\nyZIl3HHHHRWtwcysJfjMvcQ3vvENFi9ezKhRo7j++usZMmQIbdu2zbosM7NtVtVn7i3tyiuv5E9/\n+hNTp05ljz08TN/MWi+fuZuZ5ZDD3cwshxzuZmY55D73LXj11VcZPHgwy5cvp02bNlx22WXMnz9/\nw6pLZmbVrKrDvRLTY26rurq6DY/r6+tb/OebmVWCu2XMzHLI4W5mlkMOdzOzHKq6cPdMwdvPr52Z\nNaiqcO/QoQPLli1zSG2HiGDZsmV06NAh61LMrApU1WiZrl27Ul9fzxtveBGn7dGhQwe6du2adRlm\nVgWqKtzbtm1Lz549sy7DzKzVq6puGTMzqwyHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cws\nhxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7\nmVkOOdzNzHLI4W5mlkNlhbukYyUtlLRI0sQttPmipPmS5km6obJlmpnZttipuQaSaoDLgWOAemC6\npCkRMb+kzf7Ad4HDI+ItSXvuqILNzKx55Zy5DwUWRcTiiFgD3ASc0KjNOODyiHgLICJer2yZZma2\nLcoJ932Bl0u269N9pQ4ADpD0mKRpko5t6okkjZc0Q9KMN954Y/sqNjOzZpUT7mpiXzTa3gnYH/gE\nMBb4taTOm31TxFURMTgiBnfp0mVbazUzszKVE+71QLeS7a7A0iba/CEi1kbEi8BCkrA3M7MMlBPu\n04H9JfWU1A4YA0xp1OYO4JMAkvYg6aZZXMlCzcysfM2Ge0SsA84B7gEWALdExDxJF0oalTa7B1gm\naT4wFfiHiFi2o4o2M7Ota3YoJEBE3A3c3WjfD0oeB3Be+mVmZhnzHapmZjnkcDczyyGHu5lZDjnc\nzcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7Mc\ncribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5m\nZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ45\n3M3McsjhbmaWQ2WFu6RjJS2UtEjSxK20O0VSSBpcuRLNzGxbNRvukmqAy4HPAr2BsZJ6N9FuV+Bc\n4MlKF2lmZtumnDP3ocCiiFgcEWuAm4ATmmj3Q+AnwHsVrM/MzLZDOeG+L/ByyXZ9um8DSQOAbhFx\nVwVrMzOz7VROuKuJfbHhoNQGuBQ4v9knksZLmiFpxhtvvFF+lWZmtk3KCfd6oFvJdldgacn2rkAf\n4EFJdcAwYEpTF1Uj4qqIGBwRg7t06bL9VZuZ2VaVE+7Tgf0l9ZTUDhgDTGk4GBHvRMQeEVEbEbXA\nNGBURMzYIRWbmVmzmg33iFgHnAPcAywAbomIeZIulDRqRxdoZmbbbqdyGkXE3cDdjfb9YAttP/Hh\nyzIzsw/Dd6iameWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPd\nzCyHHO5mZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8sh\nh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5m\nlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDpUV7pKOlbRQ0iJJE5s4fp6k+ZLmSvqz\npB6VL9XMzMrVbLhLqgEuBz4L9AbGSurdqNlTwOCI6Av8HvhJpQs1M7PylXPmPhRYFBGLI2INcBNw\nQmmDiJgaEavTzWlA18qWaWZm26KccN8XeLlkuz7dtyVnAn/8MEWZmdmHs1MZbdTEvmiyoXQqMBg4\nagvHxwPjAbp3715miWZmtq3KOXOvB7qVbHcFljZuJOlTwPeAURHxflNPFBFXRcTgiBjcpUuX7anX\nzMzKUE64Twf2l9RTUjtgDDCltIGkAcAvSYL99cqXaWZm26LZcI+IdcA5wD3AAuCWiJgn6UJJo9Jm\n/wXsAtwqabakKVt4OjMzawHl9LkTEXcDdzfa94OSx5+qcF1mZvYh+A5VM7MccribmeWQw93MLIcc\n7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZ\nDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53\nM7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyH\nHO5mZjnkcDczyyGHu5lZDpUV7pKOlbRQ0iJJE5s43l7SzenxJyXVVrpQMzMrX7PhLqkGuBz4LNAb\nGCupd6NmZwJvRcTHgUuBiypdqJmZla+cM/ehwKKIWBwRa4CbgBMatTkBuDZ9/HtgpCRVrkwzM9sW\nO5XRZl/g5ZLteuDQLbWJiHWS3gF2B94sbSRpPDA+3VwpaeH2FF1he9CozgLza5Hw67BRIV+LLZyZ\nbuG1eKb85z2jIue8PcppVE64N1VNbEcbIuIq4KoyfmaLkTQjIgZnXUc18GuR8OuwkV+LjVrba1FO\nt0w90K1kuyuwdEttJO0E7Ab8tRIFmpnZtisn3KcD+0vqKakdMAaY0qjNFOD09PEpwAMRsdmZu5mZ\ntYxmu2XSPvRzgHuAGuA3ETFP0oXAjIiYAlwNXCdpEckZ+5gdWXSFVVU3Ucb8WiT8Omzk12KjVvVa\nyCfYZmb54ztUzcxyyOFuZpZDDnczsxxyuJuZbYGkw8vZV40KeUFV0gHAP5Dc6bVhxFBEHJ1ZUS1I\n0twtHQIiIvq2ZD1ZktQBGA28BdwJ/CNwJPAC8MOIKN7dmdJHgPOB7hExTtL+wIERcVfGpbU4SbMi\nYmBz+6pROXeo5tGtwJXAr4D1GdeShQ9I7iC+gSTQ3s22nEz9FlgLdCQJtGeAnwNHANcAx2dWWXYm\nAzOB4el2Pcn/mcKEu6ThwGFAF0nnlRzqRDIkvOoVNdzXRcQvsi4iKxHRX9JBwFiSgJ+f/nlvRKzL\ntLiW1zsi+qR3VtdHxFHp/j9JmpNlYRnaLyJGSxoLEBHvFnAiwHbALiQZuWvJ/uUkN2pWvaKG+52S\nJgC3A+837IyIwkyZEBHPAv8K/Kuk0SRnsBcB/5VpYS1vDWy4Wa/xtBpF/FQHsEbSzqTzQ0naj5L/\nJ0UQEQ8BD0m6JiKWAEhqA+wSEcuzra48Re1zf7GJ3RERH2vxYjIiaV+SO4lPIulvvgW4PSJWZlpY\nC5P0Osk01iLpe7+p4RDwxYjYK6vasiLpGOD7JOs33AscDpwREQ9mWVcWJN0AfIPkjX4mybxZl0RE\n1Z8EFTLci07SQyQfNW8hmX9/k08sRfoEI+n0rR2PiGu3djyvJO0ODCN5k5tWxAvLAJJmp92YXwYG\nAf8EzGwNgw4KGe6S2gJnASPSXQ8Cv4yItZkV1YIk1bFxSubSfwANo2UK8wnGNpf2r38Z+FhEXCip\nO/C3EfF/GZfW4iTNA/qTXJP6eUQ8JGlORPTLuLRmFbXP/RdAW+CKdPsr6b6vZ1ZRC4qI2qxrqBaS\nJtPE2gOpiIgzW7KeKnEFyYiqo4ELgRXAbcCQLIvKyC+BOmAO8LCkHiQXVateUc/cN3vnbS3vxpUg\naT7wO+CmiFicdT1ZknRyE7u7A98GaiKiawuXlLmGcdySnoqIAem+wvz/aI6knVrDqLKi3qG6Ph0B\nAICkj1GskRFjSfrc75P0pKRvS9on66KyEBG3NXwBT5EsBH8W8GOgqN1TayXVsHG0TBeSM/nCkbSX\npKsl/THd7s3GtSuqWlHP3EeS3KixmKSfuQfw1YiYmmlhGZA0jGSUyMnAIuDGiPhVtlW1LEm9gO8B\nA0iGgv6uNZyZ7SjpxcPRwECShe9PAb4fEbdmWlgG0lCfDHwvIvql90M8FRGHZFxaswoZ7gCS2gMH\nkoT7sxFRqHG8jUn6BHApyU097TMup8VIuhUYDFxMMnpok09wRRo5VCq9yW0kyf+PP0fEgoxLyoSk\n6RExpFEX1eyI6J91bc0p1AVVSUdHxAOSPt/o0H6SiIj/yaSwjEgaQtJFczLJRaOrSG4zL5IhJN0P\nf08y/UDpnZhBwbpm0ht15kZEH+DZrOupAqvSYaENXVTDgHeyLak8hQp34CjgAeBzTRwLoBDhLuk/\ngS8Cb5PctHN4RNRnW1U2PHJoUxHxgaQ5krpHxEtZ11MFziNZI3o/SY8BXWgl0w8UtlumyCTdDfw4\nIh5Ot08jOXtfAlxQ4K6Ifdl8ptCHs6soG5IeIPlE83/Aqob9ETEqs6IykH6KGUbyOjR04S5sLffD\nFO3MHQBJ3yK5SLKCZGbIgcDEiLg308Jazt+SzH6IpBEkI0O+SXKzxlW0kjOTSpJ0EclFxPls7HcP\noHDhTjJhVulsmCKZd6hQ0k8x/x0Rw4F5WdezrQoZ7sDXImKSpM8AewJfJQn7ooR7m5Kz89HAVelQ\nwNskzc6wriydSDJneaEvrKd2SifO2iCdSKyI7k3vhfifaGXdHEUN94aLZscBkyNiTsGmNN2p5EaM\nkcD40mMZ1ZS1xSR3LRc23CWdBUwAPtZoQZddgceyqSpz55HM9b9e0rtsnKKjU7ZlNa+o/5FnSroX\n6Al8V9KuFOsmjRtJpjN9k2ShjkcAJH2cVjISYAdYDcyW9Gc2nQb63OxKanE3AH8EfgRMLNm/oqjX\nYSJi1+ZbVadCXlBNL5T0BxZHxNuS/gboGhFbWn4ud9IhXXuTLNCxKt13AMl81bMyLS4DW5odsqiz\nQtpGkkZRMslga1lusKjhfjgwOyJWSTqV5ILqpIZJ+c3MACT9mGTk0PXprrEkU/5O3PJ3VYeihvtc\noB/QF7gOuBr4fMkSa1Yw6SLQPyJZoKJDw35Pf1xsaVb0j4gP0u0akukHqn4+96JOHLYuvfJ9AskZ\n+yQ2XSfRimcyybTP64BPkiw7eF2mFVm16FzyeLfMqthGRb2gukLSd0nmcT8yfTdum3FNlq2dI+LP\nkpR2z10g6RGSdWatuH4EPCVpKslImRHAd7MtqTxFDffRwJdIxru/mq40U/VrItoO9V56of15SecA\nfyG5B8IKLCJulPQgSb+7gH+KiFezrao8hexzB0hXVNk/Iu6X9BGShRlWZF2XZSOdRG0ByUfwH5J8\n/P5JREzLtDDLhKRzIuLn6eODI6LV3aFayHCXNI7kxp2/iYj90otpV0bEyIxLM7Mq0LAaVePHrUlR\nu2XOBoYCTwJExPOS/BG8gCRdFhHflnQnTaylWrTJsqxJrfLu9aKG+/sRsaZhxoF0dZXifYQx2Dgi\n5uJMq7Bq01nSSSQjCjs1XgOiNaz9UNRumZ+QzGV+GslsiBOA+RHxvUwLs6og6aNAtyLdsWybkjR5\nK4cjIr7WYsVsp6KGexvgTODTJB+57gF+3dpmfbPKSUdEjCL5NDsbeAN4KCLOy7Ius+1VuHBPx7Rf\nGxGnZl2LVY+GNTIlfZ3krP1fJc1tDXci2o4jqTPJJ/xaNl3EpeonlCtcn3tErJfURVK7iFiTdT1W\nNXaStDfJ8oPunrMGdwPTgKdpZTPHFi7cU3XAY5KmsOkyYpdkVpFl7UKS7rlHI2K6pI8Bz2dck2Wv\nQ2vtmitctwyApCZvKY+If2vpWsysekn6DrASuItN5/mv+vntCxnuZo2lI6j+nWTxkj+RzBr67Yj4\nXaaFWaYknQ38B8nouoawjNYwW2ghw30LN6y8A8wAfhkR77V8VZYlSbMjon86tvlE4DvA1Ijol3Fp\nliFJLwCHRsSbWdeyrYo65e9iko9av0q/lgOvAQek21Y8DbOCHgfc2Bo+dluLmEeyBGOrU9QLqgMi\nYkTJ9p2SHo6IEZJa3QRBVhF3SnqWpFtmgqQugD/B2XqStXWn0srW1i1quHeR1D0iXgJIp/zdIz3m\n4ZEFFBETJV0ELE+Hy64iWczFiu2O9KvVKWq4nw88mvanCehJcrbWEfCCyAUk6bSSx6WHftvy1Vi1\niIhrJbUj6bIFWBgRa7OsqVyFvKAKIKk9cBBJuD/ri6jFJulnJZsdgJHArIg4JaOSrApI+gTJCV8d\nSVZ0A06PiIczLKsshQz3dHGO84AeETEunc/9wIi4K+PSrEpI2g24zlP+FpukmcCXImJhun0AyQX3\nQdlW1ryijpaZTNK3PjzdricZ42zWYDWwf9ZFWObaNgQ7QEQ8RytZb7mofe77RcRoSWMBIuJdNepo\ntWJpdO9DG6A3cEt2FVmVmCHpajbO+/9lYGaG9ZStqOG+RtLOpP+ZJe1HyTAnK6TSxTrWAUsioj6r\nYqxqnEWyctu5JH3uDwNXZFpRmYra534M8H2Ss7N7gcOBMyLiwSzrMjOrlEKGO4Ck3YFhJO/G01rj\n7cVWOZKGAT8DegHtgBpgVUR0yrQwy4Skp9nK0putYZ7/onbLEBHLgP8FkHSgpB9FxLiMy7Ls/BwY\nA9wKDCZZoOHjmVZkWTo+/fPs9M/SPvdWMR1BoUbLSOor6V5Jz0j6d0l7SboN+DMwP+v6LFsRsQio\niYj1ETEZ+GTWNVk2ImJJRCwBDo+If4yIp9OvicBnsq6vHIUKd5JJwW4ATiZZI3MWySRiH4+IS7Ms\nzDK3Or0TcY6kn6TzeHfMuijLXEdJRzRsSDqMVvLvolB97g3TupZsvwzURsT6DMuyKiCpB8nMoO1I\npvvtBPwiPZu3gpI0CPgNsFu6623gaxExK7uqylO0PvcOkgaQXESFZNrfvg1j3FvDX5hVlqQTgK4R\ncXm6/RCEGBsmAAAEc0lEQVSwJ8nFtCcAh3uBRcRMoJ+kTiQnw+9kXVO5inbmPnUrhyMijm6xYqwq\nSHoMGBMRL6fbs4GjgV2AyRExMsv6LFvpHFQnA7WUnAxHxIVZ1VSuQp25R4QvkFlj7RqCPfVoulDH\nX9NZQq3Y/kCySttMWtmNjoU6c2+Qrot4fUS8nW5/FBgbEa3izjOrHEmLIqLJIY+SXoiI/Vq6Jqse\nkp6JiD5Z17E9ijZapsG4hmAHiIi3AI9xL6YnJW32dy/p74D/y6Aeqy6PSzok6yK2R1HP3OcC/SL9\n5SXVAHMj4uBsK7OWJmlPkpV23icZGgswCGgPnBgRr2VVm2VP0nySm9leJPk3IpLrc1V/h2pRw/2/\nSC6QXEkyKuIbwMsRcX6WdVl2JB0NNLy5z4uIB7Ksx6pDOkR2M+kNTlWtqOHeBvg7ktV2RDJ52K89\n3t3MmpJ+wuvQsN2w/nI1K2S4m5mVQ9Io4L+BfYDXgR7AgtbQhVuooZCSbomIL25pxrfW0I9mZi3q\nhySzx94fEQMkfRIYm3FNZSlUuAPfSv88fqutzMwSayNimaQ2ktpExFRJF2VdVDkKNRQyIl5JH05o\nmPWtZPa3CVnWZmZV6W1Ju5CswHS9pEkkK3VVvUL2uUuaFREDG+2b624ZMyuV3qX8LsmJ8JdJJhC7\nPl0PoqoVKtwlnUVyhr4fm04ItSvwWEScmklhZtYqpPfEjImI67OupTlFC/fdgI8CPwImlhxakc4n\nYmZGOgvk2cC+wBTgvnT7H4DZEXFChuWVpVDh3kDSfkB9RLwv6RNAX+C3pVMSmFlxSfoD8BbJtM8j\nSU4K2wHfiojZWdZWrqKG+2ySdTJrgXtI3pkPjIjjsqzLzKqDpKcj4pD0cQ3wJtA9IlZkW1n5CjVa\npsQHEbEO+DxwWUR8B9g745rMrHqsbXiQ3rn+YmsKdijeOPcGayWNJVnh/nPpvrYZ1mNm1aWfpOXp\nYwE7p9sNE4d1yq608hQ13L9KMlnYf0TEi5J6Ar/LuCYzqxIRUZN1DR9WIfvczczyrlBn7p5bxsyK\nolBn7pL2johXWvMczWZm5ShUuJuZFUWhumUaSFrB5t0y7wAzgPMjYnHLV2VmVjmFDHfgEmApcAPJ\n0KYxwN8CC4HfAJ/IrDIzswooZLeMpCcj4tBG+6ZFxDBJcyKiX1a1mZlVQmHvUJX0xYYJ+CV9seRY\n8d7tzCx3inrm/jFgEjA83fUE8B3gL8CgiHg0q9rMzCqhkOFuZpZ3heyWkdRV0u2SXpf0mqTbJHXN\nui4zs0opZLgDk0mm+d2HZDL+O9N9Zma5UMhuGUmzI6J/c/vMzFqrop65vynpVEk16depQNUveGtm\nVq6inrl3B35OMlomgMeBcyPipUwLMzOrkEKGe1MkfTsiLsu6DjOzSnC4pyS9FBHds67DzKwSitrn\n3hRlXYCZWaU43DfyRxgzy41CzQq5hal+IV0At4XLMTPbYdznbmaWQ+6WMTPLIYe7mVkOOdzNzHLI\n4W5mlkMOdzOzHPr/xjLMxOS1hXMAAAAASUVORK5CYII=\n",
|
||
"text/plain": [
|
||
"<matplotlib.figure.Figure at 0x10c611ac8>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.svm import SVC\n",
|
||
"from sklearn.naive_bayes import GaussianNB\n",
|
||
"from sklearn import tree\n",
|
||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
|
||
"import random\n",
|
||
"examples = result_df.values.copy()\n",
|
||
"print(len(examples[0]))\n",
|
||
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
||
" columns = ['precision', 'recall', 'f1'])\n",
|
||
"\n",
|
||
"\n",
|
||
"def my_pred(y_pred, y_test, proba):\n",
|
||
" y_pred1 = list()\n",
|
||
" y_test1 = list()\n",
|
||
" [rows, clos] = proba.shape\n",
|
||
" print([rows, clos])\n",
|
||
" right_count = 0\n",
|
||
" wrong_count = 0\n",
|
||
" for i in range(rows):\n",
|
||
" temp = max(proba[i])\n",
|
||
" if temp < 0.95:\n",
|
||
" continue\n",
|
||
" y_pred1.append(y_pred[i])\n",
|
||
" y_test1.append(y_test[i])\n",
|
||
" f1 = f1_score(y_test1, y_pred1, average='micro')\n",
|
||
" recall = recall_score(y_test1, y_pred1, average='micro')\n",
|
||
" precision = precision_score(y_test1, y_pred1, average='micro')\n",
|
||
" print(precision)\n",
|
||
" print(recall)\n",
|
||
" print(f1)\n",
|
||
" \n",
|
||
"\n",
|
||
"'''\n",
|
||
"#def a():\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = LogisticRegression()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['LogisticRegression'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = SVC()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['SVM'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = GaussianNB()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['GaussianNB'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = tree.DecisionTreeClassifier()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['tree'] = scores\n",
|
||
"'''\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = RandomForestClassifier()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro')) \n",
|
||
" proba = classifer.predict_proba(x_test)\n",
|
||
" my_pred(y_pred, y_test, proba)\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['RandomForest'] = scores\n",
|
||
"print(score_df)\n",
|
||
"ax = score_df.plot.bar(title='ssl-fingerprint')\n",
|
||
"fig = ax.get_figure()\n",
|
||
"#fig.savefig('../figure/ssl.svg')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|