663 lines
21 KiB
Plaintext
663 lines
21 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import json\n",
|
||
"import traceback"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"alipay 4196\n",
|
||
"zhihu 4164\n",
|
||
"meituan 3992\n",
|
||
"gaode 3979\n",
|
||
"jd 3938\n",
|
||
"evernote 3511\n",
|
||
"weibo 3453\n",
|
||
"bilibili 3347\n",
|
||
"ele 2159\n",
|
||
"douyin 1990\n",
|
||
"Name: 4, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"date = '2019-12-20_21'\n",
|
||
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
|
||
"#example_label_file = root_dir + 'DataSet/result/' + date + '/stream_tag.txt'\n",
|
||
"example_label_file = root_dir + 'DataSet/result/' + 'noProxy/All' + '/stream_tag.txt'\n",
|
||
"example_label_df = pd.read_table(example_label_file, sep='\\s+', header=None)\n",
|
||
"example_label_df[3] = 443\n",
|
||
"example_label_df[4].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"extensions = { \n",
|
||
" 0:0, \n",
|
||
" 1:1, \n",
|
||
" 2:2, \n",
|
||
" 3:3, \n",
|
||
" 4:4, \n",
|
||
" 5:5, \n",
|
||
" 6:6, \n",
|
||
" 7:7, \n",
|
||
" 8:8, \n",
|
||
" 9:9, \n",
|
||
" 10:10, \n",
|
||
" 11:11, \n",
|
||
" 12:12, \n",
|
||
" 13:13, \n",
|
||
" 14:14, \n",
|
||
" 15:15, \n",
|
||
" 16:16, \n",
|
||
" 17:17, \n",
|
||
" 18:18, \n",
|
||
" 19:19, \n",
|
||
" 20:20, \n",
|
||
" 21:21, \n",
|
||
" 22:22, \n",
|
||
" 23:23, \n",
|
||
" 24:24, \n",
|
||
" 25:25, \n",
|
||
" 26:26, \n",
|
||
" 27:27, \n",
|
||
" 28:28, \n",
|
||
" 29:29, \n",
|
||
" 30:30, \n",
|
||
" 31:31, \n",
|
||
" 35:32, \n",
|
||
" 65281:33 \n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"ciper_suits = {\n",
|
||
" '1305':0,\n",
|
||
" 'C030':1,\n",
|
||
"\t'C02C':2,\n",
|
||
"\t'C028':3,\n",
|
||
"\t'C024':4,\n",
|
||
"\t'C014':5,\n",
|
||
"\t'C00A':6,\n",
|
||
"\t'00A5':7,\n",
|
||
"\t'00A3':8,\n",
|
||
"\t'00A1':9,\n",
|
||
"\t'009F':10,\n",
|
||
"\t'006B':11,\n",
|
||
"\t'006A':12,\n",
|
||
"\t'0069':13,\n",
|
||
"\t'0068':14,\n",
|
||
"\t'0039':15,\n",
|
||
"\t'0038':16,\n",
|
||
"\t'0037':17,\n",
|
||
"\t'0036':18,\n",
|
||
"\t'0088':19,\n",
|
||
"\t'0087':20,\n",
|
||
"\t'0086':21,\n",
|
||
"\t'0085':22,\n",
|
||
"\t'C019':23,\n",
|
||
"\t'00A7':24,\n",
|
||
"\t'006D':25,\n",
|
||
"\t'003A':26,\n",
|
||
"\t'0089':27,\n",
|
||
"\t'C032':28,\n",
|
||
"\t'C02E':29,\n",
|
||
"\t'C02A':30,\n",
|
||
"\t'C026':31,\n",
|
||
"\t'C00F':32,\n",
|
||
"\t'C005':33,\n",
|
||
"\t'009D':34,\n",
|
||
"\t'003D':35,\n",
|
||
"\t'0035':36,\n",
|
||
"\t'0084':37,\n",
|
||
"\t'008D':38,\n",
|
||
"\t'C02F':39,\n",
|
||
"\t'C02B':40,\n",
|
||
"\t'C027':41,\n",
|
||
"\t'C023':42,\n",
|
||
"\t'C013':43,\n",
|
||
"\t'C009':44,\n",
|
||
"\t'00A4':45,\n",
|
||
"\t'00A2':46,\n",
|
||
"\t'00A0':47,\n",
|
||
"\t'009E':48,\n",
|
||
"\t'0067':49,\n",
|
||
"\t'0040':50,\n",
|
||
"\t'003F':51,\n",
|
||
"\t'003E':52,\n",
|
||
"\t'0033':53,\n",
|
||
"\t'0032':54,\n",
|
||
"\t'0031':55,\n",
|
||
"\t'0030':56,\n",
|
||
"\t'009A':57,\n",
|
||
"\t'0099':58,\n",
|
||
"\t'0098':59,\n",
|
||
"\t'0097':60,\n",
|
||
"\t'0045':61,\n",
|
||
"\t'0044':62,\n",
|
||
"\t'0043':63,\n",
|
||
"\t'0042':64,\n",
|
||
"\t'C018':65,\n",
|
||
"\t'00A6':66,\n",
|
||
"\t'006C':67,\n",
|
||
"\t'0034':68,\n",
|
||
"\t'009B':69,\n",
|
||
"\t'0046':70,\n",
|
||
"\t'C031':71,\n",
|
||
"\t'C02D':72,\n",
|
||
"\t'C029':73,\n",
|
||
"\t'C025':74,\n",
|
||
"\t'C00E':75,\n",
|
||
"\t'C004':76,\n",
|
||
"\t'009C':77,\n",
|
||
"\t'003C':78,\n",
|
||
"\t'002F':79,\n",
|
||
"\t'0096':80,\n",
|
||
"\t'0041':81,\n",
|
||
"\t'008C':82,\n",
|
||
"\t'C012':83,\n",
|
||
"\t'C008':84,\n",
|
||
"\t'0016':85,\n",
|
||
"\t'0013':86,\n",
|
||
"\t'0010':87,\n",
|
||
"\t'000D':88,\n",
|
||
"\t'C017':89,\n",
|
||
"\t'001B':90,\n",
|
||
"\t'C00D':91,\n",
|
||
"\t'C003':92,\n",
|
||
"\t'000A':93,\n",
|
||
"\t'0007':94,\n",
|
||
"\t'008B':95,\n",
|
||
"\t'0021':96,\n",
|
||
"\t'001F':97,\n",
|
||
"\t'0025':98,\n",
|
||
"\t'0023':99,\n",
|
||
"\t'C011':100,\n",
|
||
"\t'C007':101,\n",
|
||
"\t'C016':102,\n",
|
||
"\t'0018':103,\n",
|
||
"\t'C00C':104,\n",
|
||
"\t'C002':105,\n",
|
||
"\t'0005':106,\n",
|
||
"\t'0004':107,\n",
|
||
"\t'008A':108,\n",
|
||
"\t'0020':109,\n",
|
||
"\t'0024':110,\n",
|
||
"\t'C010':111,\n",
|
||
"\t'C006':112,\n",
|
||
"\t'C015':113,\n",
|
||
"\t'C00B':114,\n",
|
||
"\t'C001':115,\n",
|
||
"\t'003B':116,\n",
|
||
"\t'0002':117,\n",
|
||
"\t'0001':118,\n",
|
||
" '1301':119,\n",
|
||
"\t'1302':120,\n",
|
||
"\t'1303':121,\n",
|
||
"\t'1304':122\n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"39777\n",
|
||
"39737\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#TODO: 加入cipher suites,extensions特征\n",
|
||
"#stream_stat_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\n",
|
||
"stream_stat_json_file = root_dir + 'DataSet/result/' + 'noProxy/All' + '/stream_stat.txt'\n",
|
||
"stm2cipherDict = dict()\n",
|
||
"stm2extenDict = dict()\n",
|
||
"flow_key_set = set()\n",
|
||
"with open(stream_stat_json_file) as f:\n",
|
||
" lines = f.readlines()\n",
|
||
" print(len(lines))\n",
|
||
" for line in lines:\n",
|
||
" line = json.loads(line)\n",
|
||
" flow_key = (line['sip'], line['sport'], line['dip'], line['dport'])\n",
|
||
" cipher_suites = line['tls']['cipher_suites']\n",
|
||
" flow_key_set.add(flow_key)\n",
|
||
" extension_list = line['tls']['extensions_list']\n",
|
||
" stm2cipherDict[flow_key] = cipher_suites\n",
|
||
" stm2extenDict[flow_key] = extension_list\n",
|
||
"print(len(flow_key_set))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"146.89064032\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"with open(stream_stat_json_file) as f:\n",
|
||
" lines = f.readlines()\n",
|
||
" pkt_len_list = list()\n",
|
||
" for line in lines:\n",
|
||
" line = json.loads(line)\n",
|
||
" packets = line['packets']\n",
|
||
" pkt_len_list.append(len(packets))\n",
|
||
" print(np.mean(pkt_len_list))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"34686\n",
|
||
"34686\n",
|
||
"end\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'\\nkeys = set(example_label.keys()).difference(set(result_key))\\nexample_keys = example_label_df.iloc[:,0:4].values.copy()\\nfor i,value in enumerate(list(example_keys)):\\n #print(tuple(value))\\n if tuple(value) in keys:\\n print(i)\\n'"
|
||
]
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#example_json_file = root_dir + 'DataSet/result/' + date + '/ssl_stat.txt'\n",
|
||
"example_json_file = root_dir + 'DataSet/result/' + 'noProxy/All' + '/ssl_stat.txt'\n",
|
||
"example_json_f = open(example_json_file, 'r')\n",
|
||
"#array_shape = (1771,6)\n",
|
||
"result_data = list()\n",
|
||
"result_label = list()\n",
|
||
"result_key = list()\n",
|
||
"i = 0\n",
|
||
"for line in example_json_f.readlines():\n",
|
||
" example_json = ''\n",
|
||
" try:\n",
|
||
" example_json = json.loads(line)\n",
|
||
" except Exception:\n",
|
||
" continue\n",
|
||
" #标签\n",
|
||
" try:\n",
|
||
" flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])\n",
|
||
" result_key.append(flow_key)\n",
|
||
" ciphers = stm2cipherDict[flow_key]\n",
|
||
" extensions_list = stm2extenDict[flow_key]\n",
|
||
" result_label.append(example_label[flow_key])\n",
|
||
" except Exception:\n",
|
||
" #traceback.print_exc()\n",
|
||
" continue\n",
|
||
" #print(example_json)\n",
|
||
" san_count = 0\n",
|
||
" cert_length = [0,0,0,0]\n",
|
||
" if 'san' in example_json:\n",
|
||
" san_count = len(example_json['san'].split(';'))\n",
|
||
" if 'sni' in example_json:\n",
|
||
" sni_len = len(example_json['sni'])\n",
|
||
" cert = example_json['Cert']\n",
|
||
" cert_count = cert['cert_count']\n",
|
||
" if cert_count != 0:\n",
|
||
" cert_length = [c['length'] for c in cert['cert_list']]\n",
|
||
" for i in range(4 - len(cert_length)):\n",
|
||
" cert_length.append(0)\n",
|
||
" result = [sni_len, san_count, cert_count]\n",
|
||
" result = [cert_count]\n",
|
||
" result += cert_length\n",
|
||
" #print(len(result))\n",
|
||
" \n",
|
||
" #tls\n",
|
||
" extensions_arr = np.zeros(34, dtype=np.uint8)\n",
|
||
" cipher_suits_arr = np.zeros(123, dtype=np.uint8)\n",
|
||
" for extension in extensions_list:\n",
|
||
" try:\n",
|
||
" extensions_arr[extensions[extension]]=1\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" for cipher in ciphers:\n",
|
||
" try:\n",
|
||
" cipher = cipher.upper()\n",
|
||
" cipher_suits_arr[ciper_suits[cipher]]=1\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" result += list(cipher_suits_arr)\n",
|
||
" result += list(extensions_arr)\n",
|
||
" result_data.append(result)\n",
|
||
" i += 1\n",
|
||
" \n",
|
||
"extensions_head = list()\n",
|
||
"for i in range(len(extensions)):\n",
|
||
" extensions_head.append('extension'+str(i))\n",
|
||
"cipher_head = ['cipher'+str(i) for i in range(len(ciper_suits))]\n",
|
||
"base_head = ['sni_len', 'san_count', 'cert_count', 'cert_length1', 'cert_length2', 'cert_length3','cert_length4']\n",
|
||
"base_head = ['cert_count', 'cert_length1', 'cert_length2', 'cert_length3','cert_length4']\n",
|
||
"header = base_head+cipher_head+extensions_head\n",
|
||
"result_df = pd.DataFrame(result_data, columns=header)\n",
|
||
"print(len(result_label))\n",
|
||
"print(len(result_data))\n",
|
||
"result_df['label'] = np.array(result_label)\n",
|
||
"\n",
|
||
"print('end')\n",
|
||
"\n",
|
||
"'''\n",
|
||
"keys = set(example_label.keys()).difference(set(result_key))\n",
|
||
"example_keys = example_label_df.iloc[:,0:4].values.copy()\n",
|
||
"for i,value in enumerate(list(example_keys)):\n",
|
||
" #print(tuple(value))\n",
|
||
" if tuple(value) in keys:\n",
|
||
" print(i)\n",
|
||
"'''"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"%matplotlib inline\n",
|
||
"import os\n",
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"163\n",
|
||
" precision recall f1\n",
|
||
"LogisticRegression 0.872348 0.872348 0.872348\n",
|
||
"SVM 0.910055 0.910055 0.910055\n",
|
||
"GaussianNB 0.698339 0.698339 0.698339\n",
|
||
"tree 0.911208 0.911208 0.911208\n",
|
||
"RandomForest 0.904059 0.904059 0.904059\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.svm import SVC\n",
|
||
"from sklearn.naive_bayes import GaussianNB\n",
|
||
"from sklearn import tree\n",
|
||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
|
||
"import random\n",
|
||
"examples = result_df.values.copy()\n",
|
||
"print(len(examples[0]))\n",
|
||
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
||
" columns = ['precision', 'recall', 'f1'])\n",
|
||
"\n",
|
||
"\n",
|
||
"def my_pred(y_pred, y_test, proba):\n",
|
||
" y_pred1 = list()\n",
|
||
" y_test1 = list()\n",
|
||
" [rows, clos] = proba.shape\n",
|
||
" print([rows, clos])\n",
|
||
" right_count = 0\n",
|
||
" wrong_count = 0\n",
|
||
" for i in range(rows):\n",
|
||
" temp = max(proba[i])\n",
|
||
" if temp < 0.9:\n",
|
||
" continue\n",
|
||
" y_pred1.append(y_pred[i])\n",
|
||
" y_test1.append(y_test[i])\n",
|
||
" f1 = f1_score(y_test1, y_pred1, average=None)\n",
|
||
" recall = recall_score(y_test1, y_pred1, average=None)\n",
|
||
" precision = precision_score(y_test1, y_pred1, average=None)\n",
|
||
" print(precision)\n",
|
||
" print(recall)\n",
|
||
" print(f1)\n",
|
||
" print(np.mean(precision))\n",
|
||
" print(np.mean(recall))\n",
|
||
" print(np.mean(f1))\n",
|
||
" print(str(len(y_test)) + \": \" + str(len(y_test1)) + \": \" + str(len(y_test1) / len(y_test)))\n",
|
||
" \n",
|
||
"#def a():\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = LogisticRegression()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['LogisticRegression'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = SVC()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['SVM'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = GaussianNB()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['GaussianNB'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" #np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = tree.DecisionTreeClassifier()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['tree'] = scores\n",
|
||
"\n",
|
||
"f1_score_list = list()\n",
|
||
"recall_score_list = list()\n",
|
||
"precision_score_list = list()\n",
|
||
"for i in range(1):\n",
|
||
" np.random.shuffle(examples)\n",
|
||
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
||
" examples_test = examples[int(len(examples)*0.75):]\n",
|
||
" x_train = examples_train[:,0:-1]\n",
|
||
" y_train = examples_train[:,-1]\n",
|
||
" x_test = examples_test[:,0:-1]\n",
|
||
" y_test = examples_test[:,-1]\n",
|
||
" classifer = RandomForestClassifier()\n",
|
||
" classifer.fit(x_train, y_train)\n",
|
||
" y_pred = classifer.predict(x_test)\n",
|
||
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
||
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
||
" precision_score_list.append(precision_score(y_test, y_pred, average='micro')) \n",
|
||
" #proba = classifer.predict_proba(x_test)\n",
|
||
" #my_pred(y_pred, y_test, proba)\n",
|
||
" \n",
|
||
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
||
"score_df.loc['RandomForest'] = scores\n",
|
||
"print(score_df)\n",
|
||
"# ax = score_df.plot.bar(title='ssl-fingerprint')\n",
|
||
"# fig = ax.get_figure()\n",
|
||
"# #fig.savefig('../figure/ssl.svg')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "IndentationError",
|
||
"evalue": "unexpected indent (<ipython-input-27-3f33e9548833>, line 2)",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-27-3f33e9548833>\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m precision recall f1\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unexpected indent\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"+ sni, san\n",
|
||
" precision recall f1\n",
|
||
"LogisticRegression 0.891720 0.891720 0.891720\n",
|
||
"SVM 0.946033 0.946033 0.946033\n",
|
||
"GaussianNB 0.772025 0.772025 0.772025\n",
|
||
"tree 0.963330 0.963330 0.963330\n",
|
||
"RandomForest 0.964022 0.964022 0.964022\n",
|
||
"\n",
|
||
"\n",
|
||
"no sni, san\n",
|
||
"\n",
|
||
" precision recall f1\n",
|
||
"LogisticRegression 0.872348 0.872348 0.872348\n",
|
||
"SVM 0.910055 0.910055 0.910055\n",
|
||
"GaussianNB 0.698339 0.698339 0.698339\n",
|
||
"tree 0.911208 0.911208 0.911208\n",
|
||
"RandomForest 0.904059 0.904059 0.904059"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|