{ "cells": [ { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "zhihu 3488\n", "weibo 2705\n", "douyin 2072\n", "hupu 1217\n", "toutiao 1058\n", "Name: 4, dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date = '2019-12-20_21'\n", "root_dir = '/Users/Leo/Documents/github/GradProj/'\n", "example_label_file = root_dir + 'DataSet/result/' + date + '/stream_tag.txt'\n", "example_label_df = pd.read_table(example_label_file, sep='\\s+', header=None)\n", "example_label_df[3] = 443\n", "example_label_df[4].value_counts()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "import json" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ciper_suits = {\n", " '1305':0,\n", " 'C030':1,\n", "\t'C02C':2,\n", "\t'C028':3,\n", "\t'C024':4,\n", "\t'C014':5,\n", "\t'C00A':6,\n", "\t'00A5':7,\n", "\t'00A3':8,\n", "\t'00A1':9,\n", "\t'009F':10,\n", "\t'006B':11,\n", "\t'006A':12,\n", "\t'0069':13,\n", "\t'0068':14,\n", "\t'0039':15,\n", "\t'0038':16,\n", "\t'0037':17,\n", "\t'0036':18,\n", "\t'0088':19,\n", "\t'0087':20,\n", "\t'0086':21,\n", "\t'0085':22,\n", "\t'C019':23,\n", "\t'00A7':24,\n", "\t'006D':25,\n", "\t'003A':26,\n", "\t'0089':27,\n", "\t'C032':28,\n", "\t'C02E':29,\n", "\t'C02A':30,\n", "\t'C026':31,\n", "\t'C00F':32,\n", "\t'C005':33,\n", "\t'009D':34,\n", "\t'003D':35,\n", "\t'0035':36,\n", "\t'0084':37,\n", "\t'008D':38,\n", "\t'C02F':39,\n", "\t'C02B':40,\n", "\t'C027':41,\n", "\t'C023':42,\n", "\t'C013':43,\n", "\t'C009':44,\n", "\t'00A4':45,\n", "\t'00A2':46,\n", "\t'00A0':47,\n", "\t'009E':48,\n", "\t'0067':49,\n", "\t'0040':50,\n", "\t'003F':51,\n", "\t'003E':52,\n", "\t'0033':53,\n", "\t'0032':54,\n", "\t'0031':55,\n", "\t'0030':56,\n", "\t'009A':57,\n", "\t'0099':58,\n", "\t'0098':59,\n", "\t'0097':60,\n", "\t'0045':61,\n", "\t'0044':62,\n", "\t'0043':63,\n", "\t'0042':64,\n", "\t'C018':65,\n", "\t'00A6':66,\n", "\t'006C':67,\n", "\t'0034':68,\n", "\t'009B':69,\n", "\t'0046':70,\n", "\t'C031':71,\n", "\t'C02D':72,\n", "\t'C029':73,\n", "\t'C025':74,\n", "\t'C00E':75,\n", "\t'C004':76,\n", "\t'009C':77,\n", "\t'003C':78,\n", "\t'002F':79,\n", "\t'0096':80,\n", "\t'0041':81,\n", "\t'008C':82,\n", "\t'C012':83,\n", "\t'C008':84,\n", "\t'0016':85,\n", "\t'0013':86,\n", "\t'0010':87,\n", "\t'000D':88,\n", "\t'C017':89,\n", "\t'001B':90,\n", "\t'C00D':91,\n", "\t'C003':92,\n", "\t'000A':93,\n", "\t'0007':94,\n", "\t'008B':95,\n", "\t'0021':96,\n", "\t'001F':97,\n", "\t'0025':98,\n", "\t'0023':99,\n", "\t'C011':100,\n", "\t'C007':101,\n", "\t'C016':102,\n", "\t'0018':103,\n", "\t'C00C':104,\n", "\t'C002':105,\n", "\t'0005':106,\n", "\t'0004':107,\n", "\t'008A':108,\n", "\t'0020':109,\n", "\t'0024':110,\n", "\t'C010':111,\n", "\t'C006':112,\n", "\t'C015':113,\n", "\t'C00B':114,\n", "\t'C001':115,\n", "\t'003B':116,\n", "\t'0002':117,\n", "\t'0001':118,\n", " '1301':119,\n", "\t'1302':120,\n", "\t'1303':121,\n", "\t'1304':122\n", "}" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "extensions = { \n", " 0:0, \n", " 1:1, \n", " 2:2, \n", " 3:3, \n", " 4:4, \n", " 5:5, \n", " 6:6, \n", " 7:7, \n", " 8:8, \n", " 9:9, \n", " 10:10, \n", " 11:11, \n", " 12:12, \n", " 13:13, \n", " 14:14, \n", " 15:15, \n", " 16:16, \n", " 17:17, \n", " 18:18, \n", " 19:19, \n", " 20:20, \n", " 21:21, \n", " 22:22, \n", " 23:23, \n", " 24:24, \n", " 25:25, \n", " 26:26, \n", " 27:27, \n", " 28:28, \n", " 29:29, \n", " 30:30, \n", " 31:31, \n", " 35:32, \n", " 65281:33 \n", "}" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#TODO: 加入cipher suites,extensions特征\n", "stream_stat_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\n", "stm2cipherDict = dict()\n", "stm2extenDict = dict()\n", "with open(stream_stat_json_file) as f:\n", " lines = f.readlines()\n", " for line in lines:\n", " line = json.loads(line)\n", " flow_key = (line['sip'], line['sport'], line['dip'], line['dport'])\n", " cipher_suites = line['tls']['cipher_suites']\n", " extension_list = line['tls']['extensions_list']\n", " stm2cipherDict[flow_key] = cipher_suites\n", " stm2extenDict[flow_key] = extension_list" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "end\n" ] }, { "data": { "text/plain": [ "'\\nkeys = set(example_label.keys()).difference(set(result_key))\\nexample_keys = example_label_df.iloc[:,0:4].values.copy()\\nfor i,value in enumerate(list(example_keys)):\\n #print(tuple(value))\\n if tuple(value) in keys:\\n print(i)\\n'" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_json_file = root_dir + 'DataSet/result/' + date + '/ssl_stat.txt'\n", "example_json_f = open(example_json_file, 'r')\n", "#array_shape = (1771,6)\n", "result_data = list()\n", "result_label = list()\n", "result_key = list()\n", "i = 0\n", "for line in example_json_f.readlines():\n", " example_json = ''\n", " try:\n", " example_json = json.loads(line)\n", " except Exception:\n", " continue\n", " #标签\n", " try:\n", " flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])\n", " result_label.append(example_label[flow_key])\n", " result_key.append(flow_key)\n", " ciphers = stm2cipherDict[flow_key]\n", " extensions_list = stm2extenDict[flow_key]\n", " except Exception:\n", " continue\n", " #print(example_json)\n", " san_count = 0\n", " cert_length = [0,0,0,0]\n", " if 'san' in example_json:\n", " san_count = len(example_json['san'].split(';'))\n", " cert = example_json['Cert']\n", " cert_count = cert['cert_count']\n", " if cert_count != 0:\n", " cert_length = [c['length'] for c in cert['cert_list']]\n", " for i in range(4 - len(cert_length)):\n", " cert_length.append(0)\n", " result = [san_count, cert_count]\n", " result += cert_length\n", " #print(len(result))\n", " \n", " #tls\n", " extensions_arr = np.zeros(34, dtype=np.uint8)\n", " cipher_suits_arr = np.zeros(123, dtype=np.uint8)\n", " for extension in extensions_list:\n", " try:\n", " extensions_arr[extensions[extension]]=1\n", " except Exception:\n", " pass\n", " for cipher in ciphers:\n", " try:\n", " cipher = cipher.upper()\n", " cipher_suits_arr[ciper_suits[cipher]]=1\n", " except Exception:\n", " pass\n", " result += list(cipher_suits_arr)\n", " result += list(extensions_arr)\n", " result_data.append(result)\n", " i += 1\n", " \n", "extensions_head = list()\n", "for i in range(len(extensions)):\n", " extensions_head.append('extension'+str(i))\n", "cipher_head = ['cipher'+str(i) for i in range(len(ciper_suits))]\n", "base_head = ['san_count', 'cert_count', 'cert_length1', 'cert_length2', 'cert_length3','cert_length4']\n", "header = base_head+cipher_head+extensions_head\n", "result_df = pd.DataFrame(result_data, columns=header)\n", "result_df['label'] = np.array(result_label)\n", "\n", "print('end')\n", "'''\n", "keys = set(example_label.keys()).difference(set(result_key))\n", "example_keys = example_label_df.iloc[:,0:4].values.copy()\n", "for i,value in enumerate(list(example_keys)):\n", " #print(tuple(value))\n", " if tuple(value) in keys:\n", " print(i)\n", "'''" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "164\n", "[2357, 5]\n", "0.999400838826\n", "0.999400838826\n", "0.999400838826\n", " precision recall f1\n", "LogisticRegression 0.00000 0.00000 0.00000\n", "SVM 0.00000 0.00000 0.00000\n", "GaussianNB 0.00000 0.00000 0.00000\n", "tree 0.00000 0.00000 0.00000\n", "RandomForest 0.89563 0.89563 0.89563\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFcCAYAAAAzq/4LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8VXWd//HXmyMXQ5FG0VG5HDIvIHIHwQuWZJk/Q00L\nKFPLYBLNSudCU9M4NjNl46hUmlmGZt5zNHQsL4l3cbgIKCCKeNATXknlpnLx8/tjrQObw4Gzwc1Z\n+6z1fj4e58Fea33PPp+zgfde+7u+6/tVRGBmZvnSJusCzMys8hzuZmY55HA3M8shh7uZWQ453M3M\ncsjhbmaWQw53q2qSaiWFpJ220ubfJb0p6VVJ3SWtlFTTknVuL0n/LOnXWddh+SOPc7dqJqkWeBFo\nGxHrmjjeDXgO6BERr7dsdS1L0gXAxyPi1KxrsernM3dr7XoAy7IO9q19sqjk95iVy+FuLU7SP0n6\ni6QVkhZKGilpqKQZkpZLek3SJWU8z6eA+4B90q6Yaxp340h6UNIPJT2W/rx7Je1R8hynSVoiaZmk\nf5FUlz4vktpImijphfT4LZL+Jj3W8HPOlPQS8EDJvvGSlkp6RdL5JT/rAkm/l/Q7ScuBM9J9v2v0\nnKdLeintavpeeuxY4J+B0envOqdSfx+WTw53a1GSDgTOAYZExK7AZ4A6YBIwKSI6AfsBtzT3XBFx\nP/BZYGlE7BIRZ2yh6ZeArwJ7Au2Av09r6Q1cAXwZ2BvYDdi35PvOBU4EjgL2Ad4CLm/03EcBvdLf\no8Engf2BTwMTG94sUicAvwc6A9dvod4jgAOBkcAPJPWKiD8B/wncnP6u/bbwvWaAw91a3nqgPdBb\nUtuIqIuIF4C1wMcl7RERKyNiWgV/5uSIeC4i3iV50+if7j8FuDMiHo2INcAPgNKLUH8HfC8i6iPi\nfeAC4JRG3SkXRMSq9Lkb/Fu672lgMjC25NgTEXFHRHzQ6Hto9P3vRsQcYA7gILdt5nC3FhURi4Bv\nkwTl65JukrQPcCZwAPCspOmSjm/8vZKOTLskVkqatw0/9tWSx6uBXdLH+wAvl9S2GlhW0rYHcLuk\ntyW9DSwgeXPaq6TNy2yudN+S9OdsrX259ZqVzeFuLS4iboiII0jCM4CLIuL5iBhL0nVyEfB7SR0b\nfd8jaZfELhFxcAVKeQXo2rAhaWdg95LjLwOfjYjOJV8dIuIvpWU18bzdSh53B5Y2075cHtpmZXO4\nW4uSdKCkoyW1B94D3gXWSzpVUpeI+AB4O22+fgeX83vgc5IOk9QO+DdAJcevBP5DUo+09i6STijj\nef9F0kckHUzS139zhep9DaiV5P+31iz/I7GW1h74MfAmSffDniSjQI4F5klaSXJxdUxEvLcjC4mI\necA3gZtIzuJXAK8D76dNJgFTgHslrQCmAYeW8dQPAYuAPwMXR8S9FSr51vTPZZJmVeg5Lad8E5NZ\nStIuJJ8a9o+IF7fj+2vZyg1XZi3JZ+5WaJI+l3ahdAQuBp4mGZpp1qo53K3oTiC54LmUZGz6mPDH\nWcsBd8uYmeWQz9zNzHLI4W5mlkOZzUq3xx57RG1tbVY/3sysVZo5c+abEdGluXaZhXttbS0zZszI\n6sebmbVKkpaU087dMmZmOeRwNzPLIYe7mVkOVdUyX2vXrqW+vp733tuhU4rkVocOHejatStt27bN\nuhQzy1hVhXt9fT277rortbW1SGr+G2yDiGDZsmXU19fTs2fPrMsxs4xVVbfMe++9x+677+5g3w6S\n2H333f2px8yAKgt3wMH+Ifi1M7MGVRfueXXYYYdt9fhxxx3H22+/vdU2Zmblqqo+98ZqJ/5vRZ+v\n7sf/ryLPs379empqarbpex5//PGtHr/77rs/TElm1oxtyZO6Dl8qu+0hPbuX3fbp058uu+2H5TP3\nRurq6jjooIM4/fTT6du3L6eccgqrV6+mtraWCy+8kCOOOIJbb72VF154gWOPPZZBgwZx5JFH8uyz\nzwLw2muvcdJJJ9GvXz/69eu3IdR32SVZ4/iVV15hxIgR9O/fnz59+vDII48AyR27b775JgCXXHIJ\nffr0oU+fPlx22WUb6urVqxfjxo3j4IMP5tOf/jTvvvtuS788ZtZKONybsHDhQsaPH8/cuXPp1KkT\nV1xxBZAMNXz00UcZM2YM48eP52c/+xkzZ87k4osvZsKECQCce+65HHXUUcyZM4dZs2Zx8MGbruN8\nww038JnPfIbZs2czZ84c+vfvv8nxmTNnMnnyZJ588kmmTZvGr371K5566ikAnn/+ec4++2zmzZtH\n586due2221rg1TCz1qiqu2Wy0q1bNw4//HAATj31VH76058CMHr0aABWrlzJ448/zhe+8IUN3/P+\n+8mymw888AC//e1vAaipqWG33Xbb5LmHDBnC1772NdauXcuJJ564Wbg/+uijnHTSSXTs2BGAz3/+\n8zzyyCOMGjWKnj17bmg/aNAg6urqKvybm1le+My9CY1HnTRsNwTuBx98QOfOnZk9e/aGrwULFpT1\n3CNGjODhhx9m33335Stf+cqGN4IGW1s8pX379hse19TUsG6dl+k0s6Y53Jvw0ksv8cQTTwBw4403\ncsQRR2xyvFOnTvTs2ZNbb00Wo48I5syZA8DIkSP5xS9+ASQXXpcvX77J9y5ZsoQ999yTcePGceaZ\nZzJr1qaL2I8YMYI77riD1atXs2rVKm6//XaOPPLIHfJ7mll+Odyb0KtXL6699lr69u3LX//6V846\n66zN2lx//fVcffXV9OvXj4MPPpg//OEPAEyaNImpU6dyyCGHMGjQIObNm7fJ9z344IP079+fAQMG\ncNttt/Gtb31rk+MDBw7kjDPOYOjQoRx66KF8/etfZ8CAATvulzWzXMpsDdXBgwdH4/ncFyxYQK9e\nvTKpp0FdXR3HH388zzzzTKZ1bK9qeA3NqlFehkJKmhkRg5tr5zN3M7Mccrg3Ultb22rP2s3MGjjc\nzcxyyOFuZpZDDnczsxxyuJuZ5ZDDvQXU1dXRp08fIBnnfvzxx2dckZnlXXXPLXPBbs232abne2eb\nmkcEEUGbNn4PNLPWxanVSMPUuhMmTGDgwIFcd911DB8+nIEDB/KFL3yBlStXAjB9+nQOO+ww+vXr\nx9ChQ1mxYgV1dXUceeSRDBw4kIEDBzY7h7uZ2Y7icG/CwoULOe2007jvvvu4+uqruf/++5k1axaD\nBw/mkksuYc2aNYwePZpJkyYxZ84c7r//fnbeeWf23HNP7rvvPmbNmsXNN9/Mueeem/WvYmYFVd3d\nMhnp0aMHw4YN46677mL+/Pkbpv9ds2YNw4cPZ+HChey9994MGTIESCYSA1i1ahXnnHMOs2fPpqam\nhueeey6z38HMiq2scJd0LDAJqAF+HRE/bnS8O3At0DltMzEiWu26cQ1T+0YExxxzDDfeeOMmx+fO\nndvkYtSXXnope+21F3PmzOGDDz6gQ4cOLVKvmVljzXbLSKoBLgc+C/QGxkrq3ajZ94FbImIAMAa4\notKFZmHYsGE89thjLFq0CIDVq1fz3HPPcdBBB7F06VKmT58OwIoVK1i3bh3vvPMOe++9N23atOG6\n665j/fr1WZZvZgVWTp/7UGBRRCyOiDXATcAJjdoE0Cl9vBuwtHIlZqdLly5cc801jB07lr59+zJs\n2DCeffZZ2rVrx80338w3v/lN+vXrxzHHHMN7773HhAkTuPbaaxk2bBjPPffchk8AZmYtrdkpfyWd\nAhwbEV9Pt78CHBoR55S02Ru4F/go0BH4VETMbOK5xgPjAbp37z5oyZIlmxz3dLUfnl9Ds6Z5yt8m\nnquJfY3fEcYC10REV+A44DpJmz13RFwVEYMjYnCXLl3K+NFmZrY9ygn3eqBbyXZXNu92ORO4BSAi\nngA6AHtUokAzM9t25YT7dGB/ST0ltSO5YDqlUZuXgJEAknqRhPsblSzUzMzK12y4R8Q64BzgHmAB\nyaiYeZIulDQqbXY+ME7SHOBG4IzIav0+MzMrb5x7Omb97kb7flDyeD5weGVLMzOz7eXpB8zMcsjh\n3shPf/pTevXqxcknn8zw4cNp3749F198cdZlmZltk6qeW+aQaw+p6POVM8b0iiuu4I9//CMdO3Zk\nyZIl3HHHHRWtwcysJfjMvcQ3vvENFi9ezKhRo7j++usZMmQIbdu2zbosM7NtVtVn7i3tyiuv5E9/\n+hNTp05ljz08TN/MWi+fuZuZ5ZDD3cwshxzuZmY55D73LXj11VcZPHgwy5cvp02bNlx22WXMnz9/\nw6pLZmbVrKrDvRLTY26rurq6DY/r6+tb/OebmVWCu2XMzHLI4W5mlkMOdzOzHKq6cPdMwdvPr52Z\nNaiqcO/QoQPLli1zSG2HiGDZsmV06NAh61LMrApU1WiZrl27Ul9fzxtveBGn7dGhQwe6du2adRlm\nVgWqKtzbtm1Lz549sy7DzKzVq6puGTMzqwyHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cws\nhxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7\nmVkOOdzNzHLI4W5mlkNlhbukYyUtlLRI0sQttPmipPmS5km6obJlmpnZttipuQaSaoDLgWOAemC6\npCkRMb+kzf7Ad4HDI+ItSXvuqILNzKx55Zy5DwUWRcTiiFgD3ASc0KjNOODyiHgLICJer2yZZma2\nLcoJ932Bl0u269N9pQ4ADpD0mKRpko5t6okkjZc0Q9KMN954Y/sqNjOzZpUT7mpiXzTa3gnYH/gE\nMBb4taTOm31TxFURMTgiBnfp0mVbazUzszKVE+71QLeS7a7A0iba/CEi1kbEi8BCkrA3M7MMlBPu\n04H9JfWU1A4YA0xp1OYO4JMAkvYg6aZZXMlCzcysfM2Ge0SsA84B7gEWALdExDxJF0oalTa7B1gm\naT4wFfiHiFi2o4o2M7Ota3YoJEBE3A3c3WjfD0oeB3Be+mVmZhnzHapmZjnkcDczyyGHu5lZDjnc\nzcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7Mc\ncribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5m\nZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ45\n3M3McsjhbmaWQ2WFu6RjJS2UtEjSxK20O0VSSBpcuRLNzGxbNRvukmqAy4HPAr2BsZJ6N9FuV+Bc\n4MlKF2lmZtumnDP3ocCiiFgcEWuAm4ATmmj3Q+AnwHsVrM/MzLZDOeG+L/ByyXZ9um8DSQOAbhFx\nVwVrMzOz7VROuKuJfbHhoNQGuBQ4v9knksZLmiFpxhtvvFF+lWZmtk3KCfd6oFvJdldgacn2rkAf\n4EFJdcAwYEpTF1Uj4qqIGBwRg7t06bL9VZuZ2VaVE+7Tgf0l9ZTUDhgDTGk4GBHvRMQeEVEbEbXA\nNGBURMzYIRWbmVmzmg33iFgHnAPcAywAbomIeZIulDRqRxdoZmbbbqdyGkXE3cDdjfb9YAttP/Hh\nyzIzsw/Dd6iameWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPd\nzCyHHO5mZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8sh\nh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5m\nlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDpUV7pKOlbRQ0iJJE5s4fp6k+ZLmSvqz\npB6VL9XMzMrVbLhLqgEuBz4L9AbGSurdqNlTwOCI6Av8HvhJpQs1M7PylXPmPhRYFBGLI2INcBNw\nQmmDiJgaEavTzWlA18qWaWZm26KccN8XeLlkuz7dtyVnAn/8MEWZmdmHs1MZbdTEvmiyoXQqMBg4\nagvHxwPjAbp3715miWZmtq3KOXOvB7qVbHcFljZuJOlTwPeAURHxflNPFBFXRcTgiBjcpUuX7anX\nzMzKUE64Twf2l9RTUjtgDDCltIGkAcAvSYL99cqXaWZm26LZcI+IdcA5wD3AAuCWiJgn6UJJo9Jm\n/wXsAtwqabakKVt4OjMzawHl9LkTEXcDdzfa94OSx5+qcF1mZvYh+A5VM7MccribmeWQw93MLIcc\n7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZ\nDjnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53\nM7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyH\nHO5mZjnkcDczyyGHu5lZDpUV7pKOlbRQ0iJJE5s43l7SzenxJyXVVrpQMzMrX7PhLqkGuBz4LNAb\nGCupd6NmZwJvRcTHgUuBiypdqJmZla+cM/ehwKKIWBwRa4CbgBMatTkBuDZ9/HtgpCRVrkwzM9sW\nO5XRZl/g5ZLteuDQLbWJiHWS3gF2B94sbSRpPDA+3VwpaeH2FF1he9CozgLza5Hw67BRIV+LLZyZ\nbuG1eKb85z2jIue8PcppVE64N1VNbEcbIuIq4KoyfmaLkTQjIgZnXUc18GuR8OuwkV+LjVrba1FO\nt0w90K1kuyuwdEttJO0E7Ab8tRIFmpnZtisn3KcD+0vqKakdMAaY0qjNFOD09PEpwAMRsdmZu5mZ\ntYxmu2XSPvRzgHuAGuA3ETFP0oXAjIiYAlwNXCdpEckZ+5gdWXSFVVU3Ucb8WiT8Omzk12KjVvVa\nyCfYZmb54ztUzcxyyOFuZpZDDnczsxxyuJuZbYGkw8vZV40KeUFV0gHAP5Dc6bVhxFBEHJ1ZUS1I\n0twtHQIiIvq2ZD1ZktQBGA28BdwJ/CNwJPAC8MOIKN7dmdJHgPOB7hExTtL+wIERcVfGpbU4SbMi\nYmBz+6pROXeo5tGtwJXAr4D1GdeShQ9I7iC+gSTQ3s22nEz9FlgLdCQJtGeAnwNHANcAx2dWWXYm\nAzOB4el2Pcn/mcKEu6ThwGFAF0nnlRzqRDIkvOoVNdzXRcQvsi4iKxHRX9JBwFiSgJ+f/nlvRKzL\ntLiW1zsi+qR3VtdHxFHp/j9JmpNlYRnaLyJGSxoLEBHvFnAiwHbALiQZuWvJ/uUkN2pWvaKG+52S\nJgC3A+837IyIwkyZEBHPAv8K/Kuk0SRnsBcB/5VpYS1vDWy4Wa/xtBpF/FQHsEbSzqTzQ0naj5L/\nJ0UQEQ8BD0m6JiKWAEhqA+wSEcuzra48Re1zf7GJ3RERH2vxYjIiaV+SO4lPIulvvgW4PSJWZlpY\nC5P0Osk01iLpe7+p4RDwxYjYK6vasiLpGOD7JOs33AscDpwREQ9mWVcWJN0AfIPkjX4mybxZl0RE\n1Z8EFTLci07SQyQfNW8hmX9/k08sRfoEI+n0rR2PiGu3djyvJO0ODCN5k5tWxAvLAJJmp92YXwYG\nAf8EzGwNgw4KGe6S2gJnASPSXQ8Cv4yItZkV1YIk1bFxSubSfwANo2UK8wnGNpf2r38Z+FhEXCip\nO/C3EfF/GZfW4iTNA/qTXJP6eUQ8JGlORPTLuLRmFbXP/RdAW+CKdPsr6b6vZ1ZRC4qI2qxrqBaS\nJtPE2gOpiIgzW7KeKnEFyYiqo4ELgRXAbcCQLIvKyC+BOmAO8LCkHiQXVateUc/cN3vnbS3vxpUg\naT7wO+CmiFicdT1ZknRyE7u7A98GaiKiawuXlLmGcdySnoqIAem+wvz/aI6knVrDqLKi3qG6Ph0B\nAICkj1GskRFjSfrc75P0pKRvS9on66KyEBG3NXwBT5EsBH8W8GOgqN1TayXVsHG0TBeSM/nCkbSX\npKsl/THd7s3GtSuqWlHP3EeS3KixmKSfuQfw1YiYmmlhGZA0jGSUyMnAIuDGiPhVtlW1LEm9gO8B\nA0iGgv6uNZyZ7SjpxcPRwECShe9PAb4fEbdmWlgG0lCfDHwvIvql90M8FRGHZFxaswoZ7gCS2gMH\nkoT7sxFRqHG8jUn6BHApyU097TMup8VIuhUYDFxMMnpok09wRRo5VCq9yW0kyf+PP0fEgoxLyoSk\n6RExpFEX1eyI6J91bc0p1AVVSUdHxAOSPt/o0H6SiIj/yaSwjEgaQtJFczLJRaOrSG4zL5IhJN0P\nf08y/UDpnZhBwbpm0ht15kZEH+DZrOupAqvSYaENXVTDgHeyLak8hQp34CjgAeBzTRwLoBDhLuk/\ngS8Cb5PctHN4RNRnW1U2PHJoUxHxgaQ5krpHxEtZ11MFziNZI3o/SY8BXWgl0w8UtlumyCTdDfw4\nIh5Ot08jOXtfAlxQ4K6Ifdl8ptCHs6soG5IeIPlE83/Aqob9ETEqs6IykH6KGUbyOjR04S5sLffD\nFO3MHQBJ3yK5SLKCZGbIgcDEiLg308Jazt+SzH6IpBEkI0O+SXKzxlW0kjOTSpJ0EclFxPls7HcP\noHDhTjJhVulsmCKZd6hQ0k8x/x0Rw4F5WdezrQoZ7sDXImKSpM8AewJfJQn7ooR7m5Kz89HAVelQ\nwNskzc6wriydSDJneaEvrKd2SifO2iCdSKyI7k3vhfifaGXdHEUN94aLZscBkyNiTsGmNN2p5EaM\nkcD40mMZ1ZS1xSR3LRc23CWdBUwAPtZoQZddgceyqSpz55HM9b9e0rtsnKKjU7ZlNa+o/5FnSroX\n6Al8V9KuFOsmjRtJpjN9k2ShjkcAJH2cVjISYAdYDcyW9Gc2nQb63OxKanE3AH8EfgRMLNm/oqjX\nYSJi1+ZbVadCXlBNL5T0BxZHxNuS/gboGhFbWn4ud9IhXXuTLNCxKt13AMl81bMyLS4DW5odsqiz\nQtpGkkZRMslga1lusKjhfjgwOyJWSTqV5ILqpIZJ+c3MACT9mGTk0PXprrEkU/5O3PJ3VYeihvtc\noB/QF7gOuBr4fMkSa1Yw6SLQPyJZoKJDw35Pf1xsaVb0j4gP0u0akukHqn4+96JOHLYuvfJ9AskZ\n+yQ2XSfRimcyybTP64BPkiw7eF2mFVm16FzyeLfMqthGRb2gukLSd0nmcT8yfTdum3FNlq2dI+LP\nkpR2z10g6RGSdWatuH4EPCVpKslImRHAd7MtqTxFDffRwJdIxru/mq40U/VrItoO9V56of15SecA\nfyG5B8IKLCJulPQgSb+7gH+KiFezrao8hexzB0hXVNk/Iu6X9BGShRlWZF2XZSOdRG0ByUfwH5J8\n/P5JREzLtDDLhKRzIuLn6eODI6LV3aFayHCXNI7kxp2/iYj90otpV0bEyIxLM7Mq0LAaVePHrUlR\nu2XOBoYCTwJExPOS/BG8gCRdFhHflnQnTaylWrTJsqxJrfLu9aKG+/sRsaZhxoF0dZXifYQx2Dgi\n5uJMq7Bq01nSSSQjCjs1XgOiNaz9UNRumZ+QzGV+GslsiBOA+RHxvUwLs6og6aNAtyLdsWybkjR5\nK4cjIr7WYsVsp6KGexvgTODTJB+57gF+3dpmfbPKSUdEjCL5NDsbeAN4KCLOy7Ius+1VuHBPx7Rf\nGxGnZl2LVY+GNTIlfZ3krP1fJc1tDXci2o4jqTPJJ/xaNl3EpeonlCtcn3tErJfURVK7iFiTdT1W\nNXaStDfJ8oPunrMGdwPTgKdpZTPHFi7cU3XAY5KmsOkyYpdkVpFl7UKS7rlHI2K6pI8Bz2dck2Wv\nQ2vtmitctwyApCZvKY+If2vpWsysekn6DrASuItN5/mv+vntCxnuZo2lI6j+nWTxkj+RzBr67Yj4\nXaaFWaYknQ38B8nouoawjNYwW2ghw30LN6y8A8wAfhkR77V8VZYlSbMjon86tvlE4DvA1Ijol3Fp\nliFJLwCHRsSbWdeyrYo65e9iko9av0q/lgOvAQek21Y8DbOCHgfc2Bo+dluLmEeyBGOrU9QLqgMi\nYkTJ9p2SHo6IEZJa3QRBVhF3SnqWpFtmgqQugD/B2XqStXWn0srW1i1quHeR1D0iXgJIp/zdIz3m\n4ZEFFBETJV0ELE+Hy64iWczFiu2O9KvVKWq4nw88mvanCehJcrbWEfCCyAUk6bSSx6WHftvy1Vi1\niIhrJbUj6bIFWBgRa7OsqVyFvKAKIKk9cBBJuD/ri6jFJulnJZsdgJHArIg4JaOSrApI+gTJCV8d\nSVZ0A06PiIczLKsshQz3dHGO84AeETEunc/9wIi4K+PSrEpI2g24zlP+FpukmcCXImJhun0AyQX3\nQdlW1ryijpaZTNK3PjzdricZ42zWYDWwf9ZFWObaNgQ7QEQ8RytZb7mofe77RcRoSWMBIuJdNepo\ntWJpdO9DG6A3cEt2FVmVmCHpajbO+/9lYGaG9ZStqOG+RtLOpP+ZJe1HyTAnK6TSxTrWAUsioj6r\nYqxqnEWyctu5JH3uDwNXZFpRmYra534M8H2Ss7N7gcOBMyLiwSzrMjOrlEKGO4Ck3YFhJO/G01rj\n7cVWOZKGAT8DegHtgBpgVUR0yrQwy4Skp9nK0putYZ7/onbLEBHLgP8FkHSgpB9FxLiMy7Ls/BwY\nA9wKDCZZoOHjmVZkWTo+/fPs9M/SPvdWMR1BoUbLSOor6V5Jz0j6d0l7SboN+DMwP+v6LFsRsQio\niYj1ETEZ+GTWNVk2ImJJRCwBDo+If4yIp9OvicBnsq6vHIUKd5JJwW4ATiZZI3MWySRiH4+IS7Ms\nzDK3Or0TcY6kn6TzeHfMuijLXEdJRzRsSDqMVvLvolB97g3TupZsvwzURsT6DMuyKiCpB8nMoO1I\npvvtBPwiPZu3gpI0CPgNsFu6623gaxExK7uqylO0PvcOkgaQXESFZNrfvg1j3FvDX5hVlqQTgK4R\ncXm6/RCEGBsmAAAEc0lEQVSwJ8nFtCcAh3uBRcRMoJ+kTiQnw+9kXVO5inbmPnUrhyMijm6xYqwq\nSHoMGBMRL6fbs4GjgV2AyRExMsv6LFvpHFQnA7WUnAxHxIVZ1VSuQp25R4QvkFlj7RqCPfVoulDH\nX9NZQq3Y/kCySttMWtmNjoU6c2+Qrot4fUS8nW5/FBgbEa3izjOrHEmLIqLJIY+SXoiI/Vq6Jqse\nkp6JiD5Z17E9ijZapsG4hmAHiIi3AI9xL6YnJW32dy/p74D/y6Aeqy6PSzok6yK2R1HP3OcC/SL9\n5SXVAHMj4uBsK7OWJmlPkpV23icZGgswCGgPnBgRr2VVm2VP0nySm9leJPk3IpLrc1V/h2pRw/2/\nSC6QXEkyKuIbwMsRcX6WdVl2JB0NNLy5z4uIB7Ksx6pDOkR2M+kNTlWtqOHeBvg7ktV2RDJ52K89\n3t3MmpJ+wuvQsN2w/nI1K2S4m5mVQ9Io4L+BfYDXgR7AgtbQhVuooZCSbomIL25pxrfW0I9mZi3q\nhySzx94fEQMkfRIYm3FNZSlUuAPfSv88fqutzMwSayNimaQ2ktpExFRJF2VdVDkKNRQyIl5JH05o\nmPWtZPa3CVnWZmZV6W1Ju5CswHS9pEkkK3VVvUL2uUuaFREDG+2b624ZMyuV3qX8LsmJ8JdJJhC7\nPl0PoqoVKtwlnUVyhr4fm04ItSvwWEScmklhZtYqpPfEjImI67OupTlFC/fdgI8CPwImlhxakc4n\nYmZGOgvk2cC+wBTgvnT7H4DZEXFChuWVpVDh3kDSfkB9RLwv6RNAX+C3pVMSmFlxSfoD8BbJtM8j\nSU4K2wHfiojZWdZWrqKG+2ySdTJrgXtI3pkPjIjjsqzLzKqDpKcj4pD0cQ3wJtA9IlZkW1n5CjVa\npsQHEbEO+DxwWUR8B9g745rMrHqsbXiQ3rn+YmsKdijeOPcGayWNJVnh/nPpvrYZ1mNm1aWfpOXp\nYwE7p9sNE4d1yq608hQ13L9KMlnYf0TEi5J6Ar/LuCYzqxIRUZN1DR9WIfvczczyrlBn7p5bxsyK\nolBn7pL2johXWvMczWZm5ShUuJuZFUWhumUaSFrB5t0y7wAzgPMjYnHLV2VmVjmFDHfgEmApcAPJ\n0KYxwN8CC4HfAJ/IrDIzswooZLeMpCcj4tBG+6ZFxDBJcyKiX1a1mZlVQmHvUJX0xYYJ+CV9seRY\n8d7tzCx3inrm/jFgEjA83fUE8B3gL8CgiHg0q9rMzCqhkOFuZpZ3heyWkdRV0u2SXpf0mqTbJHXN\nui4zs0opZLgDk0mm+d2HZDL+O9N9Zma5UMhuGUmzI6J/c/vMzFqrop65vynpVEk16depQNUveGtm\nVq6inrl3B35OMlomgMeBcyPipUwLMzOrkEKGe1MkfTsiLsu6DjOzSnC4pyS9FBHds67DzKwSitrn\n3hRlXYCZWaU43DfyRxgzy41CzQq5hal+IV0At4XLMTPbYdznbmaWQ+6WMTPLIYe7mVkOOdzNzHLI\n4W5mlkMOdzOzHPr/xjLMxOS1hXMAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn import tree\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import f1_score,recall_score,precision_score\n", "import random\n", "examples = result_df.values.copy()\n", "print(len(examples[0]))\n", "score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n", " columns = ['precision', 'recall', 'f1'])\n", "\n", "\n", "def my_pred(y_pred, y_test, proba):\n", " y_pred1 = list()\n", " y_test1 = list()\n", " [rows, clos] = proba.shape\n", " print([rows, clos])\n", " right_count = 0\n", " wrong_count = 0\n", " for i in range(rows):\n", " temp = max(proba[i])\n", " if temp < 0.95:\n", " continue\n", " y_pred1.append(y_pred[i])\n", " y_test1.append(y_test[i])\n", " f1 = f1_score(y_test1, y_pred1, average='micro')\n", " recall = recall_score(y_test1, y_pred1, average='micro')\n", " precision = precision_score(y_test1, y_pred1, average='micro')\n", " print(precision)\n", " print(recall)\n", " print(f1)\n", " \n", "\n", "'''\n", "#def a():\n", "f1_score_list = list()\n", "recall_score_list = list()\n", "precision_score_list = list()\n", "for i in range(1):\n", " np.random.shuffle(examples)\n", " examples_train = examples[:int(len(examples)*0.75)]\n", " examples_test = examples[int(len(examples)*0.75):]\n", " x_train = examples_train[:,0:-1]\n", " y_train = examples_train[:,-1]\n", " x_test = examples_test[:,0:-1]\n", " y_test = examples_test[:,-1]\n", " classifer = LogisticRegression()\n", " classifer.fit(x_train, y_train)\n", " y_pred = classifer.predict(x_test)\n", " f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n", " recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n", " precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n", "scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n", "score_df.loc['LogisticRegression'] = scores\n", "\n", "f1_score_list = list()\n", "recall_score_list = list()\n", "precision_score_list = list()\n", "for i in range(1):\n", " #np.random.shuffle(examples)\n", " examples_train = examples[:int(len(examples)*0.75)]\n", " examples_test = examples[int(len(examples)*0.75):]\n", " x_train = examples_train[:,0:-1]\n", " y_train = examples_train[:,-1]\n", " x_test = examples_test[:,0:-1]\n", " y_test = examples_test[:,-1]\n", " classifer = SVC()\n", " classifer.fit(x_train, y_train)\n", " y_pred = classifer.predict(x_test)\n", " f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n", " recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n", " precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n", "scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n", "score_df.loc['SVM'] = scores\n", "\n", "f1_score_list = list()\n", "recall_score_list = list()\n", "precision_score_list = list()\n", "for i in range(1):\n", " #np.random.shuffle(examples)\n", " examples_train = examples[:int(len(examples)*0.75)]\n", " examples_test = examples[int(len(examples)*0.75):]\n", " x_train = examples_train[:,0:-1]\n", " y_train = examples_train[:,-1]\n", " x_test = examples_test[:,0:-1]\n", " y_test = examples_test[:,-1]\n", " classifer = GaussianNB()\n", " classifer.fit(x_train, y_train)\n", " y_pred = classifer.predict(x_test)\n", " f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n", " recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n", " precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n", "scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n", "score_df.loc['GaussianNB'] = scores\n", "\n", "f1_score_list = list()\n", "recall_score_list = list()\n", "precision_score_list = list()\n", "for i in range(1):\n", " #np.random.shuffle(examples)\n", " examples_train = examples[:int(len(examples)*0.75)]\n", " examples_test = examples[int(len(examples)*0.75):]\n", " x_train = examples_train[:,0:-1]\n", " y_train = examples_train[:,-1]\n", " x_test = examples_test[:,0:-1]\n", " y_test = examples_test[:,-1]\n", " classifer = tree.DecisionTreeClassifier()\n", " classifer.fit(x_train, y_train)\n", " y_pred = classifer.predict(x_test)\n", " f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n", " recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n", " precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n", "scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n", "score_df.loc['tree'] = scores\n", "'''\n", "\n", "f1_score_list = list()\n", "recall_score_list = list()\n", "precision_score_list = list()\n", "for i in range(1):\n", " np.random.shuffle(examples)\n", " examples_train = examples[:int(len(examples)*0.75)]\n", " examples_test = examples[int(len(examples)*0.75):]\n", " x_train = examples_train[:,0:-1]\n", " y_train = examples_train[:,-1]\n", " x_test = examples_test[:,0:-1]\n", " y_test = examples_test[:,-1]\n", " classifer = RandomForestClassifier()\n", " classifer.fit(x_train, y_train)\n", " y_pred = classifer.predict(x_test)\n", " f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n", " recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n", " precision_score_list.append(precision_score(y_test, y_pred, average='micro')) \n", " proba = classifer.predict_proba(x_test)\n", " my_pred(y_pred, y_test, proba)\n", "scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n", "score_df.loc['RandomForest'] = scores\n", "print(score_df)\n", "ax = score_df.plot.bar(title='ssl-fingerprint')\n", "fig = ax.get_figure()\n", "#fig.savefig('../figure/ssl.svg')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }