617 lines
42 KiB
Plaintext
617 lines
42 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import json\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ciper_suits = {\n",
|
|
" '1305':0,\n",
|
|
" 'C030':1,\n",
|
|
"\t'C02C':2,\n",
|
|
"\t'C028':3,\n",
|
|
"\t'C024':4,\n",
|
|
"\t'C014':5,\n",
|
|
"\t'C00A':6,\n",
|
|
"\t'00A5':7,\n",
|
|
"\t'00A3':8,\n",
|
|
"\t'00A1':9,\n",
|
|
"\t'009F':10,\n",
|
|
"\t'006B':11,\n",
|
|
"\t'006A':12,\n",
|
|
"\t'0069':13,\n",
|
|
"\t'0068':14,\n",
|
|
"\t'0039':15,\n",
|
|
"\t'0038':16,\n",
|
|
"\t'0037':17,\n",
|
|
"\t'0036':18,\n",
|
|
"\t'0088':19,\n",
|
|
"\t'0087':20,\n",
|
|
"\t'0086':21,\n",
|
|
"\t'0085':22,\n",
|
|
"\t'C019':23,\n",
|
|
"\t'00A7':24,\n",
|
|
"\t'006D':25,\n",
|
|
"\t'003A':26,\n",
|
|
"\t'0089':27,\n",
|
|
"\t'C032':28,\n",
|
|
"\t'C02E':29,\n",
|
|
"\t'C02A':30,\n",
|
|
"\t'C026':31,\n",
|
|
"\t'C00F':32,\n",
|
|
"\t'C005':33,\n",
|
|
"\t'009D':34,\n",
|
|
"\t'003D':35,\n",
|
|
"\t'0035':36,\n",
|
|
"\t'0084':37,\n",
|
|
"\t'008D':38,\n",
|
|
"\t'C02F':39,\n",
|
|
"\t'C02B':40,\n",
|
|
"\t'C027':41,\n",
|
|
"\t'C023':42,\n",
|
|
"\t'C013':43,\n",
|
|
"\t'C009':44,\n",
|
|
"\t'00A4':45,\n",
|
|
"\t'00A2':46,\n",
|
|
"\t'00A0':47,\n",
|
|
"\t'009E':48,\n",
|
|
"\t'0067':49,\n",
|
|
"\t'0040':50,\n",
|
|
"\t'003F':51,\n",
|
|
"\t'003E':52,\n",
|
|
"\t'0033':53,\n",
|
|
"\t'0032':54,\n",
|
|
"\t'0031':55,\n",
|
|
"\t'0030':56,\n",
|
|
"\t'009A':57,\n",
|
|
"\t'0099':58,\n",
|
|
"\t'0098':59,\n",
|
|
"\t'0097':60,\n",
|
|
"\t'0045':61,\n",
|
|
"\t'0044':62,\n",
|
|
"\t'0043':63,\n",
|
|
"\t'0042':64,\n",
|
|
"\t'C018':65,\n",
|
|
"\t'00A6':66,\n",
|
|
"\t'006C':67,\n",
|
|
"\t'0034':68,\n",
|
|
"\t'009B':69,\n",
|
|
"\t'0046':70,\n",
|
|
"\t'C031':71,\n",
|
|
"\t'C02D':72,\n",
|
|
"\t'C029':73,\n",
|
|
"\t'C025':74,\n",
|
|
"\t'C00E':75,\n",
|
|
"\t'C004':76,\n",
|
|
"\t'009C':77,\n",
|
|
"\t'003C':78,\n",
|
|
"\t'002F':79,\n",
|
|
"\t'0096':80,\n",
|
|
"\t'0041':81,\n",
|
|
"\t'008C':82,\n",
|
|
"\t'C012':83,\n",
|
|
"\t'C008':84,\n",
|
|
"\t'0016':85,\n",
|
|
"\t'0013':86,\n",
|
|
"\t'0010':87,\n",
|
|
"\t'000D':88,\n",
|
|
"\t'C017':89,\n",
|
|
"\t'001B':90,\n",
|
|
"\t'C00D':91,\n",
|
|
"\t'C003':92,\n",
|
|
"\t'000A':93,\n",
|
|
"\t'0007':94,\n",
|
|
"\t'008B':95,\n",
|
|
"\t'0021':96,\n",
|
|
"\t'001F':97,\n",
|
|
"\t'0025':98,\n",
|
|
"\t'0023':99,\n",
|
|
"\t'C011':100,\n",
|
|
"\t'C007':101,\n",
|
|
"\t'C016':102,\n",
|
|
"\t'0018':103,\n",
|
|
"\t'C00C':104,\n",
|
|
"\t'C002':105,\n",
|
|
"\t'0005':106,\n",
|
|
"\t'0004':107,\n",
|
|
"\t'008A':108,\n",
|
|
"\t'0020':109,\n",
|
|
"\t'0024':110,\n",
|
|
"\t'C010':111,\n",
|
|
"\t'C006':112,\n",
|
|
"\t'C015':113,\n",
|
|
"\t'C00B':114,\n",
|
|
"\t'C001':115,\n",
|
|
"\t'003B':116,\n",
|
|
"\t'0002':117,\n",
|
|
"\t'0001':118,\n",
|
|
" '1301':119,\n",
|
|
"\t'1302':120,\n",
|
|
"\t'1303':121,\n",
|
|
"\t'1304':122\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"extensions = { \n",
|
|
" 0:0, \n",
|
|
" 1:1, \n",
|
|
" 2:2, \n",
|
|
" 3:3, \n",
|
|
" 4:4, \n",
|
|
" 5:5, \n",
|
|
" 6:6, \n",
|
|
" 7:7, \n",
|
|
" 8:8, \n",
|
|
" 9:9, \n",
|
|
" 10:10, \n",
|
|
" 11:11, \n",
|
|
" 12:12, \n",
|
|
" 13:13, \n",
|
|
" 14:14, \n",
|
|
" 15:15, \n",
|
|
" 16:16, \n",
|
|
" 17:17, \n",
|
|
" 18:18, \n",
|
|
" 19:19, \n",
|
|
" 20:20, \n",
|
|
" 21:21, \n",
|
|
" 22:22, \n",
|
|
" 23:23, \n",
|
|
" 24:24, \n",
|
|
" 25:25, \n",
|
|
" 26:26, \n",
|
|
" 27:27, \n",
|
|
" 28:28, \n",
|
|
" 29:29, \n",
|
|
" 30:30, \n",
|
|
" 31:31, \n",
|
|
" 35:32, \n",
|
|
" 65281:33 \n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"date = '2019-12-20_21'\n",
|
|
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
|
|
"example_label_file = root_dir + 'DataSet/result/' + date + '/stream_tag.txt'\n",
|
|
"example_label_df = pd.read_table(example_label_file, sep='\\s+', header=None)\n",
|
|
"example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}\n",
|
|
"example_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\n",
|
|
"example_json_f = open(example_json_file, 'r')\n",
|
|
"result_data = list()\n",
|
|
"result_label = list()\n",
|
|
"i = 0\n",
|
|
"for line in example_json_f.readlines():\n",
|
|
" example_json = json.loads(line)\n",
|
|
" #标签\n",
|
|
" try:\n",
|
|
" flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])\n",
|
|
" result_label.append(example_label[flow_key])\n",
|
|
" except Exception:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" #统计特征\n",
|
|
" packets = example_json['packets']\n",
|
|
" c2s_packets_bytes = list()\n",
|
|
" s2c_packets_bytes = list()\n",
|
|
" c2s_packets_intervals = list()\n",
|
|
" s2c_packets_intervals = list()\n",
|
|
" for packet in packets:\n",
|
|
" if packet['dir'] == 1:\n",
|
|
" c2s_packets_bytes.append(packet['bytes'])\n",
|
|
" c2s_packets_intervals.append(packet['interval'])\n",
|
|
" elif packet['dir'] == 2:\n",
|
|
" s2c_packets_bytes.append(packet['bytes'])\n",
|
|
" s2c_packets_intervals.append(packet['interval'])\n",
|
|
" c2s_bytes = example_json['c2s_bytes']\n",
|
|
" s2c_bytes = example_json['s2c_bytes']\n",
|
|
" c2s_pkts = example_json['c2s_pkts']\n",
|
|
" s2c_pkts = example_json['s2c_pkts']\n",
|
|
" duration = example_json['duration']\n",
|
|
" c2s_packets_bytes_mean = 0\n",
|
|
" c2s_packets_bytes_median = 0\n",
|
|
" c2s_packets_bytes_std = 0\n",
|
|
" c2s_packets_bytes_max = 0\n",
|
|
" c2s_packets_bytes_min = 0\n",
|
|
"\n",
|
|
" c2s_packets_intervals_mean = 0\n",
|
|
" c2s_packets_intervals_median = 0\n",
|
|
" c2s_packets_intervals_std = 0\n",
|
|
" c2s_packets_intervals_max = 0\n",
|
|
" c2s_packets_intervals_min = 0\n",
|
|
"\n",
|
|
" s2c_packets_bytes_mean = 0\n",
|
|
" s2c_packets_bytes_median = 0\n",
|
|
" s2c_packets_bytes_std = 0\n",
|
|
" s2c_packets_bytes_max = 0\n",
|
|
" s2c_packets_bytes_min = 0\n",
|
|
"\n",
|
|
" s2c_packets_intervals_mean = 0\n",
|
|
" s2c_packets_intervals_median = 0\n",
|
|
" s2c_packets_intervals_std = 0\n",
|
|
" s2c_packets_intervals_max = 0\n",
|
|
" s2c_packets_intervals_min = 0\n",
|
|
" \n",
|
|
" if c2s_bytes > 0:\n",
|
|
" c2s_packets_bytes_mean = np.mean(c2s_packets_bytes)\n",
|
|
" c2s_packets_bytes_median = np.median(c2s_packets_bytes)\n",
|
|
" c2s_packets_bytes_std = np.std(c2s_packets_bytes)\n",
|
|
" c2s_packets_bytes_max = np.max(c2s_packets_bytes)\n",
|
|
" c2s_packets_bytes_min = np.min(c2s_packets_bytes)\n",
|
|
"\n",
|
|
" c2s_packets_intervals_mean = np.mean(c2s_packets_intervals)\n",
|
|
" c2s_packets_intervals_median = np.median(c2s_packets_intervals)\n",
|
|
" c2s_packets_intervals_std = np.std(c2s_packets_intervals)\n",
|
|
" c2s_packets_intervals_max = np.max(c2s_packets_intervals)\n",
|
|
" c2s_packets_intervals_min = np.min(c2s_packets_intervals)\n",
|
|
" \n",
|
|
" if s2c_bytes > 0:\n",
|
|
" s2c_packets_bytes_mean = np.mean(s2c_packets_bytes)\n",
|
|
" s2c_packets_bytes_median = np.median(s2c_packets_bytes)\n",
|
|
" s2c_packets_bytes_std = np.std(s2c_packets_bytes)\n",
|
|
" s2c_packets_bytes_max = np.max(s2c_packets_bytes)\n",
|
|
" s2c_packets_bytes_min = np.min(s2c_packets_bytes)\n",
|
|
"\n",
|
|
" s2c_packets_intervals_mean = np.mean(s2c_packets_intervals)\n",
|
|
" s2c_packets_intervals_median = np.median(s2c_packets_intervals)\n",
|
|
" s2c_packets_intervals_std = np.std(s2c_packets_intervals)\n",
|
|
" s2c_packets_intervals_max = np.max(s2c_packets_intervals)\n",
|
|
" s2c_packets_intervals_min = np.min(s2c_packets_intervals)\n",
|
|
"\n",
|
|
" #tls\n",
|
|
" tls = example_json['tls']\n",
|
|
" extensions_list = tls['extensions_list']\n",
|
|
" #print(extensions_list)\n",
|
|
" ciphers = tls['cipher_suites']\n",
|
|
" #print(ciphers)\n",
|
|
" extensions_arr = np.zeros(34, dtype=np.uint8)\n",
|
|
" cipher_suits_arr = np.zeros(123, dtype=np.uint8)\n",
|
|
" for extension in extensions_list:\n",
|
|
" try:\n",
|
|
" extensions_arr[extensions[extension]]=1\n",
|
|
" except Exception:\n",
|
|
" pass\n",
|
|
" for cipher in ciphers:\n",
|
|
" try:\n",
|
|
" cipher = cipher.upper()\n",
|
|
" cipher_suits_arr[ciper_suits[cipher]]=1\n",
|
|
" except Exception:\n",
|
|
" pass\n",
|
|
" result = [c2s_bytes, c2s_pkts, s2c_bytes, s2c_pkts, duration, c2s_packets_bytes_mean, c2s_packets_bytes_median, c2s_packets_bytes_std,\\\n",
|
|
" c2s_packets_bytes_max, c2s_packets_bytes_min, c2s_packets_intervals_mean, c2s_packets_intervals_median, c2s_packets_intervals_std,\\\n",
|
|
" c2s_packets_intervals_max, c2s_packets_intervals_min, s2c_packets_bytes_mean, s2c_packets_bytes_median, s2c_packets_bytes_std,\\\n",
|
|
" s2c_packets_bytes_max, s2c_packets_bytes_min, s2c_packets_intervals_mean, s2c_packets_intervals_median, s2c_packets_intervals_std,\\\n",
|
|
" s2c_packets_intervals_max, s2c_packets_intervals_min]\n",
|
|
" result += list(cipher_suits_arr)\n",
|
|
" result += list(extensions_arr)\n",
|
|
" result_data.append(result)\n",
|
|
" i += 1\n",
|
|
"extensions_head = list()\n",
|
|
"for i in range(len(extensions)):\n",
|
|
" extensions_head.append('extension'+str(i))\n",
|
|
"cipher_head = ['cipher'+str(i) for i in range(len(ciper_suits))]\n",
|
|
"base_head = ['c2s_bytes', 'c2s_pkts', 's2c_bytes', 's2c_pkts', 'duration', 'c2s_packets_bytes_mean', 'c2s_packets_bytes_median', 'c2s_packets_bytes_std',\\\n",
|
|
" 'c2s_packets_bytes_max', 'c2s_packets_bytes_min', 'c2s_packets_intervals_mean', 'c2s_packets_intervals_median', 'c2s_packets_intervals_std',\\\n",
|
|
" 'c2s_packets_intervals_max', 'c2s_packets_intervals_min', 's2c_packets_bytes_mean', 's2c_packets_bytes_median', 's2c_packets_bytes_std',\\\n",
|
|
" 's2c_packets_bytes_max', 's2c_packets_bytes_min', 's2c_packets_intervals_mean', 's2c_packets_intervals_median', 's2c_packets_intervals_std',\\\n",
|
|
" 's2c_packets_intervals_max', 's2c_packets_intervals_min']\n",
|
|
"header = base_head+cipher_head+extensions_head\n",
|
|
"result_df = pd.DataFrame(result_data, columns=header)\n",
|
|
"result_df['label'] = np.array(result_label)\n",
|
|
"example_csv_file = root_dir + 'Experiment/StatFeature/CsvFile/' + date + '/examples.csv'\n",
|
|
"result_df.to_csv(example_csv_file, index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline\n",
|
|
"import os\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"hupu: 489846\n",
|
|
"weibo: 897897\n",
|
|
"douyin: 158497\n",
|
|
"toutiao: 213989\n",
|
|
"zhihu: 968036\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 统计每个app的包数\n",
|
|
"date = '2019-12-20_21'\n",
|
|
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
|
|
"exmaples_file = root_dir + 'Experiment/StatFeature/CsvFile/' + date + '/examples.csv'\n",
|
|
"app2pktsDict = dict()\n",
|
|
"with open(exmaples_file) as f:\n",
|
|
" lines = f.readlines()\n",
|
|
" i = 0\n",
|
|
" for line in lines:\n",
|
|
" if i == 0:\n",
|
|
" i += 1\n",
|
|
" continue;\n",
|
|
" line = line.split(',')\n",
|
|
" pkts = int(line[1]) + int(line[3])\n",
|
|
" appName = line[-1]\n",
|
|
" if appName not in app2pktsDict.keys():\n",
|
|
" app2pktsDict[appName] = 0\n",
|
|
" app2pktsDict[appName] += pkts \n",
|
|
"for appName, pkts in app2pktsDict.items():\n",
|
|
" appName = appName[:-1]\n",
|
|
" print(appName + ': ', pkts)\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"zhihu 6403\n",
|
|
"weibo 5487\n",
|
|
"douyin 3964\n",
|
|
"hupu 2304\n",
|
|
"toutiao 520\n",
|
|
"Name: label, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEVCAYAAADpbDJPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFx1JREFUeJzt3Xu0nXV95/H3B6hirUDQwFASDbZZXuoIYgRmvIxKy9UR\nxkoHV9XU0mYu1LFTZ2x0Zg0VdYm2asdebBlBo2OLqFWoMGqKqKWjSLgYUXQSESULK9EA2nphxX7n\nj/07ZBNOcvYJh/2c8Hu/1jprP8/v+e29v88m7M9+fs8tVYUkqT/7DF2AJGkYBoAkdcoAkKROGQCS\n1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpU/sNXcDuPOpRj6oVK1YMXYYk7VWuvfba71TV0rn6LeoA\nWLFiBRs2bBi6DEnaqyT5xiT9HAKSpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoA\nkKROLeozgRfCirWXDV0CALecd+rQJUjSvbgFIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhS\npwwASerURAGQ5KAkH0zylSQ3JfkXSQ5Osj7Jpva4pPVNkrcn2ZxkY5Kjx15ndeu/KcnqB2qlJElz\nm3QL4H8CH6uqxwNHAjcBa4ErqmolcEWbBzgZWNn+1gDvAEhyMHAOcCxwDHDOTGhIkqZvzgBIcgDw\nLOACgKq6u6ruBE4D1rVu64DT2/RpwHtq5HPAQUkOA04E1lfVtqq6A1gPnLSgayNJmtgkWwCPBbYC\n70pyfZJ3Jnk4cGhVfQugPR7S+h8O3Dr2/C2tbVftkqQBTBIA+wFHA++oqqcA/8iO4Z7ZZJa22k37\nvZ+crEmyIcmGrVu3TlCeJGlPTBIAW4AtVXV1m/8go0D4dhvaoT3ePtZ/+djzlwG37ab9Xqrq/Kpa\nVVWrli5dOp91kSTNw5wBUFV/D9ya5HGt6Xjgy8ClwMyRPKuBS9r0pcBL29FAxwF3tSGijwMnJFnS\ndv6e0NokSQOY9H4ALwfel+QhwM3AyxiFx8VJzgK+CZzR+l4OnAJsBn7Q+lJV25K8Drim9Tu3qrYt\nyFpIkuZtogCoqhuAVbMsOn6WvgWcvYvXuRC4cD4FSpIeGJ4JLEmdMgAkqVMGgCR1ygCQpE4ZAJLU\nKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdWrSi8HpQWDF2suGLgGAW847degSJOEWgCR1ywCQ\npE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1KmJAiDJLUm+mOSGJBta\n28FJ1ifZ1B6XtPYkeXuSzUk2Jjl67HVWt/6bkqx+YFZJkjSJ+WwBPKeqjqqqVW1+LXBFVa0Ermjz\nACcDK9vfGuAdMAoM4BzgWOAY4JyZ0JAkTd/9GQI6DVjXptcBp4+1v6dGPgcclOQw4ERgfVVtq6o7\ngPXASffj/SVJ98OkAVDAJ5Jcm2RNazu0qr4F0B4Pae2HA7eOPXdLa9tVuyRpAJPeD+DpVXVbkkOA\n9Um+spu+maWtdtN+7yePAmYNwKMf/egJy5MkzddEWwBVdVt7vB34MKMx/G+3oR3a4+2t+xZg+djT\nlwG37aZ95/c6v6pWVdWqpUuXzm9tJEkTmzMAkjw8ySNmpoETgBuBS4GZI3lWA5e06UuBl7ajgY4D\n7mpDRB8HTkiypO38PaG1SZIGMMkQ0KHAh5PM9P+LqvpYkmuAi5OcBXwTOKP1vxw4BdgM/AB4GUBV\nbUvyOuCa1u/cqtq2YGsiSZqXOQOgqm4Gjpyl/bvA8bO0F3D2Ll7rQuDC+ZcpSVpongksSZ0yACSp\nUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjpl\nAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnq1MQBkGTfJNcn+Wib\nPyLJ1Uk2JXl/koe09oe2+c1t+Yqx13h1a/9qkhMXemUkSZObzxbAK4CbxubfBLytqlYCdwBntfaz\ngDuq6ueBt7V+JHkicCbwC8BJwJ8m2ff+lS9J2lMTBUCSZcCpwDvbfIDnAh9sXdYBp7fp09o8bfnx\nrf9pwEVV9eOq+jqwGThmIVZCkjR/k24B/CHwKuCf2vwjgTuranub3wIc3qYPB24FaMvvav3vaZ/l\nOfdIsibJhiQbtm7dOo9VkSTNx35zdUjyPOD2qro2ybNnmmfpWnMs291zdjRUnQ+cD7Bq1ar7LJcW\nwoq1lw1dArecd+rQJahzcwYA8HTg+UlOAfYHDmC0RXBQkv3ar/xlwG2t/xZgObAlyX7AgcC2sfYZ\n48+RJE3ZnENAVfXqqlpWVSsY7cT9ZFX9KnAl8MLWbTVwSZu+tM3Tln+yqqq1n9mOEjoCWAl8fsHW\nRJI0L5NsAezK7wIXJXk9cD1wQWu/AHhvks2MfvmfCVBVX0pyMfBlYDtwdlX95H68vyTpfphXAFTV\np4BPtembmeUonqr6EXDGLp7/BuAN8y1SkrTwPBNYkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoA\nkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ\n6pQBIEmdMgAkqVMGgCR1ygCQpE7NGQBJ9k/y+SRfSPKlJK9t7UckuTrJpiTvT/KQ1v7QNr+5LV8x\n9lqvbu1fTXLiA7VSkqS5TbIF8GPguVV1JHAUcFKS44A3AW+rqpXAHcBZrf9ZwB1V9fPA21o/kjwR\nOBP4BeAk4E+T7LuQKyNJmtycAVAj/9Bmf6r9FfBc4IOtfR1weps+rc3Tlh+fJK39oqr6cVV9HdgM\nHLMgayFJmreJ9gEk2TfJDcDtwHrga8CdVbW9ddkCHN6mDwduBWjL7wIeOd4+y3MkSVM2UQBU1U+q\n6ihgGaNf7U+YrVt7zC6W7ar9XpKsSbIhyYatW7dOUp4kaQ/M6yigqroT+BRwHHBQkv3aomXAbW16\nC7AcoC0/ENg23j7Lc8bf4/yqWlVVq5YuXTqf8iRJ8zDJUUBLkxzUph8G/CJwE3Al8MLWbTVwSZu+\ntM3Tln+yqqq1n9mOEjoCWAl8fqFWRJI0P/vN3YXDgHXtiJ19gIur6qNJvgxclOT1wPXABa3/BcB7\nk2xm9Mv/TICq+lKSi4EvA9uBs6vqJwu7OpKkSc0ZAFW1EXjKLO03M8tRPFX1I+CMXbzWG4A3zL9M\nSdJC80xgSeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaA\nJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6tQkt4SU9CC2Yu1lQ5fALeedOnQJXXILQJI6ZQBIUqcM\nAEnqlAEgSZ0yACSpU3MGQJLlSa5MclOSLyV5RWs/OMn6JJva45LWniRvT7I5ycYkR4+91urWf1OS\n1Q/cakmS5jLJFsB24JVV9QTgOODsJE8E1gJXVNVK4Io2D3AysLL9rQHeAaPAAM4BjgWOAc6ZCQ1J\n0vTNGQBV9a2quq5Nfx+4CTgcOA1Y17qtA05v06cB76mRzwEHJTkMOBFYX1XbquoOYD1w0oKujSRp\nYvPaB5BkBfAU4Grg0Kr6FoxCAjikdTscuHXsaVta267aJUkDmDgAkvwM8CHgt6vqe7vrOktb7aZ9\n5/dZk2RDkg1bt26dtDxJ0jxNFABJforRl//7quqvWvO329AO7fH21r4FWD729GXAbbtpv5eqOr+q\nVlXVqqVLl85nXSRJ8zDJUUABLgBuqqq3ji26FJg5kmc1cMlY+0vb0UDHAXe1IaKPAyckWdJ2/p7Q\n2iRJA5jkYnBPB14CfDHJDa3tNcB5wMVJzgK+CZzRll0OnAJsBn4AvAygqrYleR1wTet3blVtW5C1\nkCTN25wBUFVXMfv4PcDxs/Qv4OxdvNaFwIXzKVCS9MDwTGBJ6pQBIEmdMgAkqVMGgCR1ygCQpE4Z\nAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEg\nSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKn5gyAJBcmuT3JjWNtBydZn2RTe1zS2pPk7Uk2\nJ9mY5Oix56xu/TclWf3ArI4kaVKTbAG8Gzhpp7a1wBVVtRK4os0DnAysbH9rgHfAKDCAc4BjgWOA\nc2ZCQ5I0jDkDoKo+A2zbqfk0YF2bXgecPtb+nhr5HHBQksOAE4H1VbWtqu4A1nPfUJEkTdGe7gM4\ntKq+BdAeD2nthwO3jvXb0tp21X4fSdYk2ZBkw9atW/ewPEnSXBZ6J3BmaavdtN+3ser8qlpVVauW\nLl26oMVJknbY0wD4dhvaoT3e3tq3AMvH+i0DbttNuyRpIHsaAJcCM0fyrAYuGWt/aTsa6DjgrjZE\n9HHghCRL2s7fE1qbJGkg+83VIclfAs8GHpVkC6Ojec4DLk5yFvBN4IzW/XLgFGAz8APgZQBVtS3J\n64BrWr9zq2rnHcuSpCmaMwCq6kW7WHT8LH0LOHsXr3MhcOG8qpMkPWA8E1iSOmUASFKnDABJ6pQB\nIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSp+a8FIQk9WLF2suGLoFbzjt1au/lFoAkdcoA\nkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOjX1AEhyUpKv\nJtmcZO2031+SNDLVAEiyL/AnwMnAE4EXJXniNGuQJI1MewvgGGBzVd1cVXcDFwGnTbkGSRLTD4DD\ngVvH5re0NknSlKWqpvdmyRnAiVX1G23+JcAxVfXysT5rgDVt9nHAV6dW4K49CvjO0EUsEn4WO/hZ\n7OBnscNi+CweU1VL5+o07TuCbQGWj80vA24b71BV5wPnT7OouSTZUFWrhq5jMfCz2MHPYgc/ix32\nps9i2kNA1wArkxyR5CHAmcClU65BksSUtwCqanuS3wI+DuwLXFhVX5pmDZKkkanfFL6qLgcun/b7\n3k+LakhqYH4WO/hZ7OBnscNe81lMdSewJGnx8FIQktQpA0CSOmUASFKnpr4TeG+Q5FmztVfVZ6Zd\ni6TFL8nzgZnvjU9X1V8PWc+k3Ak8iyTj//H2Z3QNo2ur6rkDlTSYJAcCvwc8szV9Gji3qu4arKiB\nJHkB8CbgECDtr6rqgEELG0iS/zFbe1WdO+1ahpTkjYy+I97Xml4EbKiqVw9X1WQMgAkkWQ68uape\nNHQt05bkQ8CNwLrW9BLgyKp6wXBVDSPJZuBfV9VNQ9eyGCR55djs/sDzgJuq6tcHKmkQSTYCR1XV\nP7X5fYHrq+rJw1Y2N4eAJrMFeNLQRQzk56rql8fmX5vkhsGqGda3/fLfoareMj6f5A/o98z+g4Bt\nbfrAIQuZDwNgFkn+CJjZNNoHOAr4wnAVDeqHSZ5RVVcBJHk68MOBaxrKhiTvBz4C/Himsar+ariS\nFpWfBh47dBEDeCNwfZIrGQ0LPgtY9MM/4BDQrJKsHpvdDtxSVX83VD1DSnIUo+GfAxn9494GrK6q\njYMWNoAk75qluXob8piR5Ivs+KG0L7CU0f6hPx6uqmEkOQx4GqP/R66uqr8fuKSJGACaSJIDAKrq\ne0PXosUhyWPGZrczGiLbPlQ9Q0qyBFjJaF8IsHccNWgAzKINc/we8BhGw2QzR3t0t3mb5JHAOcAz\nGP3au4rRr7zvDlrYFCV5VVW9eaehwXtU1X8aoKxFIcnRjP3bqKrrBy5p6pL8BvAKRpe3vwE4Dvjs\n3nDUoPsAZncB8J+Ba4GfDFzL0C4CPgPM7Aj+VeD9wC8OVtH0zez43TBoFYtMOwz0DGBmH8i7k3yg\nql4/YFlDeAWj4Z/PVdVzkjweeO3ANU3ELYBZJLm6qo4duo7FIMm1VfXUndr2mhteLKQkT6qqG4eu\nY7FIchPwlKr6UZt/GHBdVT1h2MqmK8k1VfW0dnTcsVX14yQ3VNVRQ9c2F7cAxrTNWYArk/w+o182\n40d7XDdIYcO6MsmZwMVt/oXAZQPWM6Q/azcyejfwF1V158D1DO0WRmPeP2rzDwW+Nlg1w9mS5CBG\nR4etT3IHO93pcLFyC2BMO4xrV2pvGNNbKEm+z2hcN8DD2TEUti/wDx2f/boS+HVGQx+fB95dVZ8Y\ntqphJPkIo6GP9Yz+rfwSo31Et0Of+0aS/CtGR8x9rKruHrqeuRgA0jy1Mz1PB94OfI9RSL6mt/MB\ndjpc+j6qat3ulu/tkhxQVd9LcvBsy6tq22zti4kBMCbJi6vqfyf5ndmWV9Vbp13TUJI8vqq+MjYs\ndi89DocleTLwMuBURr96L6iq65L8LKOjPh6z2xfQg0qSj1bV85J8nR1by/c87g1HDboP4N4e3h4f\nMWgVi8PvAGuAt8yyrIBuhsPG/DHwTka/9u85G7qqbkvy34craxhjX3z3sjd88S2Eqnpeezxi6Fr2\nlFsAkvZIO0dkxv6M9oscXFWzXiX0wSrJFVV1/Fxti5EBMIskS4HfBFYwtpXU4yn/SX6a0dbAo6tq\nTdsJ+riq+ujApU1d7794J5Hkqqp6xtB1TEOS/Rld/+hK4NmMhn4ADgD+z95wOKxDQLO7BPhb4G/w\nRLB3MToh7l+2+S3AB4DuAgAYP/fhnl+8A9UyuJ32D+3D6PPpafj03wG/DfwsML5P7HvAnwxS0Ty5\nBTCLveUkjmmYOekryfVV9ZTW9oWqOnLo2haDnn7x7qwdNj3zBbKd0XkBf1BV/2+wogaQ5OVV9UdD\n17En3AKY3UeTnFJVlw9dyCJwdzvDswCS/BxjJ8f1xF+893Eyo0uErGDHd8mZQFd3BAPuSvLSnRur\n6j1DFDMfBsCYsZOfAF6T5G7gbvq+9d85wMeA5UneBzwd+LVBKxrO+BFRM794f2WYUhaFjwB3Mhr+\n+NEcfR/MnjY2vT9wPKPPZNEHgENAs0jySeAtVXXZWNv/qqrfHLCsQSR5L/BFRjeBuZnRtc6/M2xV\nWgyS3FhVvd4pb5fafbTfW1XPH7qWuewzdAGL1ArgVTvd9Pqpu+j7YPcuRr9qns/ozNc/T/KKYUsa\nRpIDk7w1yYb295b2P3uv/m+Sfz50EYvQDxjdG2DRcwtgFkmuA45h9IW3HHgxcGVVzXpW7INdu/TB\n04DnAP8e+GFVPX7YqqYvyYeAGxndIQ3gJcCRVfWC4aqavrE7ge3H6IvuZkb7hWaGShf9zdAXUpK/\n5t53RnsCcHFVrR2uqskYALPY6YiXXwNeCSypqmWDFjaAJFcwOkP6s4wOjb2qqm4ftqphzHZ0WI9H\njO10J7D7qKpvTKuWxaBdAG7GduAbVbVlqHrmw53As/uzmYmqenf7xXP2gPUMaSOj4a8nAXcBdyb5\n7PilEDrywyTPqKqr4J47x3X3OfT2BT+Xqvp0kkPZsTN405D1zIdbAJpIkp9hdCG0/wL8s6p66MAl\nTV2SIxkd2TEz7n8HsLqqNg5XlYaW5FeA3wc+xWgY7JnAf62qDw5Z1yQMAO1Wkt9i9A/6qcA3GN0e\n8m+r6pODFjZFO10ddub+CAD/yGjMu5urxOq+knwB+KWZodF2KZm/2RtOlnQISHN5GPBW4Nqq2j50\nMQOZOdnrcYw28y9hFAQvZhSI6ts+O+0X+y57yRGWbgFIE0ryCeCXq+r7bf4RwAeq6qRhK9OQkrwZ\nOBL4y9b0b4GNVfW7w1U1mb0ipaRF4tGMzgyfcTejc0bUtwL+HHgyoyA4f9hyJucWgDShJP+N0aUf\nPszof/p/A7y/qt44aGEaVJLrdj5HKMnGveF8CANAmod2QbhnttnPVNX1Q9aj4ST5D8B/BB4LfG1s\n0SOAv6uqFw9S2DwYAJK0B9plQJYAbwTGz/r9/t5wQ3gwACSpW+4ElqROGQCS1CkDQJI6ZQBIUqcM\nAEnq1P8HV9/axntrFV8AAAAASUVORK5CYII=\n",
|
|
"text/plain": [
|
|
"<matplotlib.figure.Figure at 0x113f03390>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"examples_df = pd.read_csv(exmaples_file)\n",
|
|
"class_counts = examples_df['label'].value_counts()\n",
|
|
"class_counts.plot.bar()\n",
|
|
"class_counts"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"183\n",
|
|
" precision recall f1\n",
|
|
"LogisticRegression 0.775161 0.775161 0.775161\n",
|
|
"SVM 0.831906 0.831906 0.831906\n",
|
|
"GaussianNB 0.729122 0.729122 0.729122\n",
|
|
"tree 0.984582 0.984582 0.984582\n",
|
|
"RandomForest 0.989507 0.989507 0.989507\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFcCAYAAAAzq/4LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYVOWZ/vHvTcvibiJoDCigQQWVTRAQt2icqOOgiYlI\nQozBJYpEjc5ixvwSxyQTs2lwVIwG1ygu46joYFwi7tFhEVxADJJWO0ZFFAERWXx+f5zTUDQNXY1F\nn+pz7s911WWdpaqeKum7Tr3nPe+riMDMzPKlTdYFmJlZ5TnczcxyyOFuZpZDDnczsxxyuJuZ5ZDD\n3cwshxzuZmY55HC3qifp3yX9fiMfu0TSrpugph0lPS5psaTfVPr5zT4t+SIm25QkXQh8ISJGlrn/\nIcAfIqLLRrzWo+ljN+qLoJmv9f+AfsBx8Sn/iCRdD9RFxA8rUZsZ+MjdbGN1BWZ92mCvBEmbZV2D\nVaGI8M23ityAfwP+BiwG5gD/CCwHVgBLgJnpft8BZqf7zQO+m67fEvgI+CTdfwnweeBCkiNygA7A\nH4AFwEJgCrAj8DNgFbAsfdzl6f5B8ssBYHPgN8BrwAfAk+m6Rp9zA+/z+vQ9LU9f60skB0rnA6+m\nz3M78NmSx9wBvJW+7uPAXun60xo8170N6y55zZ+m9w8B6tLP+y3gpnT90cCM9D08DfTO+t+Eb9nd\n/I1vFSFpD2AMMDAi3pTUDagB/pN1m2XeIQmiecBBwP2SpkTEdElH0qBZRlLpS30b2BbYGfgY6At8\nFBEXSBrKhptlfg3sBexPEoqDSL5ITmrsOdf3XiPipLSm1U0pks4BjgUOBuYDlwFXACPSh90PjCIJ\n8V8ANwN9I+JqSfvT/GaZzwGfJfkF0UZSf+Ba4J+AqcBIYKKkPSLi42Y8r+WEm2WsUlYB7YFektpG\nRG1EvNrYjhHxvxHxaiQeAx4EDizzdVYA25N8YayKiGkRsaipB0lqQxKuZ0fE39LHPp0G30Y9ZwPf\nBS6IiLr0OS8EvlbfZBIR10bE4pJtfSRt28zXKPUJ8OOI+DgiPgJOBX4XEc+m7+EGki+qwZ/iNawV\nc7hbRUTEXOAckuB6R9Ktkj7f2L6SjpT0jKT3JC0EjgI6lvlSNwEPALdKelPSLyW1LeNxHUmaXxr7\nwtnY5yzVFbhL0sL0Pc0m+cLbUVKNpIslvSppEVBbUtPGmh8Ryxq8/nn1r5/WsDNJs5YVkMPdKiYi\nbomIA0iCJkiaH9Y64SipPXAnSRPJjhGxHTAJqG972eAJyohYERH/ERG9SJpXjgZOLOOx75K0x+/W\nzOcs1xvAkRGxXcmtQ0T8DfgGcAxJ2/y2QLf0MRt6z0uBLUqWP9ew7EZe/2cNXn+LiJjQzPdhOeFw\nt4qQtIekQ9PwXkbSZr0KeBvoljaLALQjab6ZD6xM29j/oeSp3ga2X1+ThaQvStpHUg2wiKRJZVXJ\nYxvt0x4Rn5C0SV8i6fPp0fQQSe2beM5yXQX8TFLXtM5Oko5Jt21N0kSygCSw/7PBYxurewbwjbTO\nI0ja8jfkGuB0SYOU2FLSP0raupnvw3LC4W6V0h64mOQI+S1gB+DfSXqJACyQND0iFgNnkfQmeZ/k\nqHZi/ZNExMvABGBe2rzQsFnhc8B/k4TwbOAxkp4uAGNJ2rnfl3RZIzX+M/ACSW+Y90h+WbRp4jnL\nNTZ9Hw9KWgw8Q3LCFuBGkh46fwNmpdtKjSc5V7FQ0t3purNJTo4uBL4J3M0GRMRUknb3y0k+17kk\nJ4qtoHwRk5lZDvnI3cwsh9zP3Ww9JC1Zz6YjI+KJFi3GrJncLGNmlkNuljEzy6HMmmU6duwY3bp1\ny+rlzcxapWnTpr0bEZ2a2i+zcO/WrRtTp07N6uXNzFolSa+Vs5+bZczMcsjhbmaWQ02Gu6RrJb0j\n6cX1bJekyyTNlfR8OvSomZllqJw29+tJLmm+cT3bjwR6pLdBwDjWXHZtto4VK1ZQV1fHsmXLmt7Z\n1tGhQwe6dOlC27bNHbjSiqTJcI+Ix9OJF9bnGODGSDrMPyNpO0k7RcTfK1Sj5UxdXR1bb7013bp1\nazgRhzUhIliwYAF1dXV0794963KsilWizb0zyXCj9erSdWaNWrZsGdtvv72DfSNIYvvtt/evHmtS\nJcK9sb/QRi97lXSapKmSps6fP78CL22tlYN94/mzs3JUItzrSGZ8qdcFeLOxHSPi6ogYEBEDOnVq\nsg++Wauz//77b3D7UUcdxcKFC1uoGiuySlzENBEYI+lWkhOpH7i93Zqj2/n/W9Hnq734HyvyPKtW\nraKmpqZZj3n66ac3uH3SpEmfpiT7FJrz76y2wzfK3nef7ruUve8L336h7H0/rSbDXdIE4BCgo6Q6\n4MdAW4CIuIpkirSjSCYHWAp8Z1MVa1YptbW1HHHEEQwaNIjnnnuO3XffnRtvvJFevXoxatQoHnzw\nQcaMGcPAgQM588wzmT9/PltssQXXXHMNe+65J2+//Tann3468+bNA2DcuHHsv//+bLXVVixZsoS/\n//3vDB8+nEWLFrFy5UrGjRvHgQceuPrK7I4dO3LJJZdw7bXXAnDKKadwzjnnUFtby5FHHskBBxzA\n008/TefOnbnnnnvYfPPNN+p9Fi3QbI1yesuMaGJ7AGdWrCKzFjJnzhzGjx/P0KFDGTVqFFdeeSWQ\ndDV88sknATjssMO46qqr6NGjB88++yyjR4/mkUce4ayzzuLggw/mrrvuYtWqVSxZsvbowLfccgtf\n/vKXueCCC1i1ahVLly5da/u0adO47rrrePbZZ4kIBg0axMEHH8xnPvMZ/vKXvzBhwgSuueYajj/+\neO68805GjhzZMh+K5YbHc7fC2nnnnRk6dCgAI0eO5LLLkpn5hg8fDsCSJUt4+umn+frXv776MR9/\n/DEAjzzyCDfemFz6UVNTw7bbrj3l68CBAxk1ahQrVqzg2GOPpW/fvmttf/LJJ/nKV77ClltuCcBX\nv/pVnnjiCYYNG0b37t1X77/vvvtSW1tb4XduReDhB6ywGvY6qV+uD9xPPvmE7bbbjhkzZqy+zZ49\nu6znPuigg3j88cfp3Lkz3/rWt1Z/EdTb0DwK7du3X32/pqaGlStXlvWaZqUc7lZYr7/+On/+858B\nmDBhAgcccMBa27fZZhu6d+/OHXckc3xHBDNnzgSS5ppx48YByYnXRYsWrfXY1157jR122IFTTz2V\nk08+menTp6+1/aCDDuLuu+9m6dKlfPjhh9x1110ceOCBm+R9WjE53K2wevbsyQ033EDv3r157733\nOOOMM9bZ5+abb2b8+PH06dOHvfbai3vuuQeAsWPHMnnyZPbZZx/23XdfXnrppbUe9+ijj9K3b1/6\n9evHnXfeydlnn73W9v79+3PSSSex3377MWjQIE455RT69eu36d6sFU5m0+wNGDAgPJ57Mc2ePZue\nPXtmWkNtbS1HH300L77Y6Hh4Va/cz9C9ZdbIy2chaVpEDGhqPx+5m5nlkMPdCqlbt26t9qjdrBwO\ndzOzHHK4m5nlkMPdzCyHHO5mZjnkcDerkNraWvbee28g6ed+9NFHZ1yRFZnHlrHsXbht0/s06/k+\naNbuEUFE0KaNj3UsP/yv2QqptraWnj17Mnr0aPr3789NN93EkCFD6N+/P1//+tdXj/I4ZcoU9t9/\nf/r06cN+++3H4sWLqa2t5cADD6R///7079+/yTHczbLgcLfCmjNnDieeeCIPPfQQ48eP5+GHH2b6\n9OkMGDCASy65hOXLlzN8+HDGjh3LzJkzefjhh9l8883ZYYcdeOihh5g+fTq33XYbZ511VtZvxWwd\nbpaxwuratSuDBw/mvvvuY9asWauH/12+fDlDhgxhzpw57LTTTgwcOBBIBhID+PDDDxkzZgwzZsyg\npqaGV155JbP3YLY+DncrrPqhfSOCww8/nAkTJqy1/fnnn290MupLL72UHXfckZkzZ/LJJ5/QoUOH\nFqnXrDncLGOFN3jwYJ566inmzp0LwNKlS3nllVfYc889efPNN5kyZQoAixcvZuXKlXzwwQfstNNO\ntGnThptuuolVq1ZlWb5ZoxzuVnidOnXi+uuvZ8SIEfTu3ZvBgwfz8ssv065dO2677Ta+973v0adP\nHw4//HCWLVvG6NGjueGGGxg8eDCvvPLK6l8AZtXEQ/5ai6uGIX9bOw/523x5+Sw85K+ZWYE53M3M\ncsjhbmaWQw53M7MccribmeWQw93MLIcc7lZIl112GT179uS4445jyJAhtG/fnl//+tdZl2VWMR5+\nwDK3zw37VPT5yulLfOWVV3L//fez5ZZb8tprr3H33XdXtAazrPnI3Qrn9NNPZ968eQwbNoybb76Z\ngQMH0rZt26zLMqsoH7lb4Vx11VX88Y9/ZPLkyXTs2DHrcsw2CYd7TuTl0mozqww3y5iZ5ZDD3cws\nh8pqlpF0BDAWqAF+HxEXN9i+C3ADsF26z/kRManCtZpV3FtvvcWAAQNYtGgRbdq04be//S2zZs1a\nPeuSWWvVZLhLqgGuAA4H6oApkiZGxKyS3X4I3B4R4yT1AiYB3TZBvZZDWbTV19bWrr5fV1fX4q9v\ntqmV0yyzHzA3IuZFxHLgVuCYBvsEUH+osy3wZuVKNDOz5iqnWaYz8EbJch0wqME+FwIPSvoesCXw\npYpUZ2ZmG6WcI/d1ZwhOjtRLjQCuj4guwFHATZLWeW5Jp0maKmnq/Pnzm1+tmZmVpZxwrwN2Llnu\nwrrNLicDtwNExJ+BDsA6V4dExNURMSAiBnTq1GnjKrZcyGp6xzzwZ2flKCfcpwA9JHWX1A44AZjY\nYJ/XgcMAJPUkCXcfmlujOnTowIIFCxxSGyEiWLBgAR06dMi6FKtyTba5R8RKSWOAB0i6OV4bES9J\nugiYGhETgfOAayR9n6TJ5qTwX66tR5cuXairq8NNcxunQ4cOdOnSJesyrMqV1c897bM+qcG6H5Xc\nnwUMrWxplldt27ale/fuWZdhlmutemwZj6diZtY4Dz9gZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY5\n5HA3M8shh7uZWQ453M3McsjhbmaWQ636ClWzxvjKZTMfuZuZ5ZLD3cwshxzuZmY55HA3M8shh7uZ\nWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI4W5mlkMO\ndzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDjnczcxyqKxwl3SEpDmS5ko6fz37HC9plqSX\nJN1S2TLNzKw5NmtqB0k1wBXA4UAdMEXSxIiYVbJPD+AHwNCIeF/SDpuqYDMza1o5R+77AXMjYl5E\nLAduBY5psM+pwBUR8T5ARLxT2TLNzKw5ygn3zsAbJct16bpSuwO7S3pK0jOSjqhUgWZm1nxNNssA\namRdNPI8PYBDgC7AE5L2joiFaz2RdBpwGsAuu+zS7GLNzKw85Ry51wE7lyx3Ad5sZJ97ImJFRPwV\nmEMS9muJiKsjYkBEDOjUqdPG1mxmZk0oJ9ynAD0kdZfUDjgBmNhgn7uBLwJI6kjSTDOvkoWamVn5\nmgz3iFgJjAEeAGYDt0fES5IukjQs3e0BYIGkWcBk4F8iYsGmKtrMzDasnDZ3ImISMKnBuh+V3A/g\n3PRmZmYZ8xWqZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRw\nNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDjnczcxy\nyOFuZpZDDnczsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7Mccrib\nmeWQw93MLIcc7mZmOVRWuEs6QtIcSXMlnb+B/b4mKSQNqFyJZmbWXE2Gu6Qa4ArgSKAXMEJSr0b2\n2xo4C3i20kWamVnzlHPkvh8wNyLmRcRy4FbgmEb2+wnwS2BZBeszM7ONUE64dwbeKFmuS9etJqkf\nsHNE3LehJ5J0mqSpkqbOnz+/2cWamVl5ygl3NbIuVm+U2gCXAuc19UQRcXVEDIiIAZ06dSq/SjMz\na5Zywr0O2LlkuQvwZsny1sDewKOSaoHBwESfVDUzy0454T4F6CGpu6R2wAnAxPqNEfFBRHSMiG4R\n0Q14BhgWEVM3ScVmZtakJsM9IlYCY4AHgNnA7RHxkqSLJA3b1AWamVnzbVbOThExCZjUYN2P1rPv\nIZ++LDMz+zR8haqZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRwNzPLIYe7mVkO\nOdzNzHLI4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDncz\nsxxyuJuZ5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc\n7mZmOeRwNzPLIYe7mVkOlRXuko6QNEfSXEnnN7L9XEmzJD0v6U+Sula+VDMzK1eT4S6pBrgCOBLo\nBYyQ1KvBbs8BAyKiN/DfwC8rXaiZmZWvnCP3/YC5ETEvIpYDtwLHlO4QEZMjYmm6+AzQpbJlmplZ\nc5QT7p2BN0qW69J163MycH9jGySdJmmqpKnz588vv0ozM2uWcsJdjayLRneURgIDgF81tj0iro6I\nARExoFOnTuVXaWZmzbJZGfvUATuXLHcB3my4k6QvARcAB0fEx5Upz8zMNkY5R+5TgB6SuktqB5wA\nTCzdQVI/4HfAsIh4p/JlmplZczQZ7hGxEhgDPADMBm6PiJckXSRpWLrbr4CtgDskzZA0cT1PZ2Zm\nLaCcZhkiYhIwqcG6H5Xc/1KF6zIzs0/BV6iameWQw93MLIcc7mZmOeRwNzPLIYe7mVkOOdzNzHLI\n4W5mlkMOdzOzHHK4m5nlkMPdzCyHHO5mZjnkcDczyyGHu5lZDjnczcxyyOFuZpZDDnczsxxyuJuZ\n5ZDD3cwshxzuZmY55HA3M8shh7uZWQ453M3McsjhbmaWQw53M7MccribmeWQw93MLIcc7mZmOeRw\nNzPLIYe7mVkOOdzNzHLI4W5mlkMOdzOzHHK4m5nlUFnhLukISXMkzZV0fiPb20u6Ld3+rKRulS7U\nzMzK12S4S6oBrgCOBHoBIyT1arDbycD7EfEF4FLgF5Uu1MzMylfOkft+wNyImBcRy4FbgWMa7HMM\ncEN6/7+BwySpcmWamVlzKCI2vIP0NeCIiDglXf4WMCgixpTs82K6T126/Gq6z7sNnus04LR0cQ9g\nTqXeyKfQEXi3yb2KwZ9Fwp/DGv4s1qiWz6JrRHRqaqfNyniixo7AG34jlLMPEXE1cHUZr9liJE2N\niAFZ11EN/Fkk/Dms4c9ijdb2WZTTLFMH7Fyy3AV4c337SNoM2BZ4rxIFmplZ85UT7lOAHpK6S2oH\nnABMbLDPRODb6f2vAY9EU+09Zma2yTTZLBMRKyWNAR4AaoBrI+IlSRcBUyNiIjAeuEnSXJIj9hM2\nZdEVVlXNRBnzZ5Hw57CGP4s1WtVn0eQJVTMza318haqZWQ453M3McsjhbmaWQw53M7P1kDS0nHXV\nqJAnVCXtDvwL0JWSHkMRcWhmRbUgSc+vbxMQEdG7JevJkqQOwHDgfeBe4F+BA4FXgZ80vMq6CCRt\nAZwH7BIRp0rqAewREfdlXFqLkzQ9Ivo3ta4alXOFah7dAVwFXAOsyriWLHxCcgXxLSSB9lG25WTq\nRmAFsCVJoL0IXA4cAFwPHJ1ZZdm5DpgGDEmX60j+ZgoT7pKGAPsDnSSdW7JpG5Iu4VWvqOG+MiLG\nZV1EViKir6Q9gREkAT8r/e+DEbEy0+JaXq+I2Du9srouIg5O1/9R0swsC8vQbhExXNIIgIj4qIAD\nAbYDtiLJyK1L1i8iuVCz6hU13O+VNBq4C/i4fmVEFGbIhIh4Gfgx8GNJw0mOYH8B/CrTwlreclh9\nsV7DYTWK+KsOYLmkzUnHh5K0GyV/J0UQEY8Bj0m6PiJeA5DUBtgqIhZlW115itrm/tdGVkdE7Nri\nxWREUmeSK4m/QtLefDtwV0QsybSwFibpHZJhrEXS9n5r/Sbg+IjYMavasiLpcOCHJPM3PAgMBU6K\niEezrCsLkm4BTif5op9GMm7WJRFR9QdBhQz3opP0GMlPzdtJxt9f6xdLkX7BSPr2hrZHxA0b2p5X\nkrYHBpN8yT1TxBPLAJJmpM2Y3wT2Bf4NmNYaOh0UMtwltQXOAA5KVz0K/C4iVmRWVAuSVMuaIZlL\n/wHU95YpzC8YW1favv5NYNeIuEjSLsDnIuL/Mi6txUl6CehLck7q8oh4TNLMiOiTcWlNKmqb+zig\nLXBluvytdN0pmVXUgiKiW9Y1VAtJ19HI3AOpiIiTW7KeKnElSY+qQ4GLgMXAncDALIvKyO+AWmAm\n8LikriQnVateUY/c1/nmbS3fxpUgaRbwB+DWiJiXdT1ZknRcI6t3Ac4BaiKiSwuXlLn6ftySnouI\nfum6wvx9NEXSZq2hV1lRr1BdlfYAAEDSrhSrZ8QIkjb3hyQ9K+kcSZ/PuqgsRMSd9TfgOZKJ4M8A\nLgaK2jy1QlINa3rLdCI5ki8cSTtKGi/p/nS5F2vmrqhqRT1yP4zkQo15JO3MXYHvRMTkTAvLgKTB\nJL1EjgPmAhMi4ppsq2pZknoCFwD9SLqC/qE1HJltKunJw+FAf5KJ778G/DAi7si0sAykoX4dcEFE\n9Emvh3guIvbJuLQmFTLcASS1J5mkW8DLEVGofrwNSToEuJTkop72GZfTYiTdAQwAfk3Se2itX3BF\n6jlUKr3I7TCSv48/RcTsjEvKhKQpETGwQRPVjIjom3VtTSnUCVVJh0bEI5K+2mDTbpKIiP/JpLCM\nSBpI0kRzHMlJo6tJLjMvkoEkzQ//TDL8QOmVmEHBmmbSC3Wej4i9gZezrqcKfJh2C61vohoMfJBt\nSeUpVLgDBwOPAP/UyLYAChHukv4TOB5YSHLRztCIqMu2qmy459DaIuITSTMl7RIRr2ddTxU4l2SO\n6N0kPQV0opUMP1DYZpkikzQJuDgiHk+XTyQ5en8NuLDATRGdWXek0Mezqygbkh4h+UXzf8CH9esj\nYlhmRWUg/RUzmORzqG/CndNarocp2pE7AJLOJjlJsphkZMj+wPkR8WCmhbWcz5GMfoikg0h6hnyP\n5GKNq2klRyaVJOkXJCcRZ7Gm3T2AwoU7yYBZpaNhimTcoUJJf8X8JiKGAC9lXU9zFTLcgVERMVbS\nl4EdgO+QhH1Rwr1NydH5cODqtCvgnZJmZFhXlo4lGbO80CfWU5ulA2etlg4kVkQPptdC/E+0smaO\nooZ7/Umzo4DrImJmwYY03azkQozDgNNKt2VUU9bmkVy1XNhwl3QGMBrYtcGELlsDT2VTVebOJRnr\nf5Wkj1gzRMc22ZbVtKL+IU+T9CDQHfiBpK0p1kUaE0iGM32XZKKOJwAkfYFW0hNgE1gKzJD0J9Ye\nBvqs7EpqcbcA9wM/B84vWb+4qOdhImLrpveqToU8oZqeKOkLzIuIhZI+C3SJiPVNP5c7aZeunUgm\n6PgwXbc7yXjV0zMtLgPrGx2yqKNC2hqShlEyyGBrmW6wqOE+FJgRER9KGklyQnVs/aD8ZmYAki4m\n6Tl0c7pqBMmQv+ev/1HVoajh/jzQB+gN3ASMB75aMsWaFUw6CfTPSSao6FC/3sMfF1uaFX0j4pN0\nuYZk+IGqH8+9qAOHrUzPfB9DcsQ+lrXnSbTiuY5k2OeVwBdJph28KdOKrFpsV3J/28yqaKainlBd\nLOkHJOO4H5h+G7fNuCbL1uYR8SdJSpvnLpT0BMk8s1ZcPweekzSZpKfMQcAPsi2pPEUN9+HAN0j6\nu7+VzjRT9XMi2ia1LD3R/hdJY4C/kVwDYQUWERMkPUrS7i7g3yLirWyrKk8h29wB0hlVekTEw5K2\nIJmYYXHWdVk20kHUZpP8BP8Jyc/vX0bEM5kWZpmQNCYiLk/v7xURre4K1UKGu6RTSS7c+WxE7Jae\nTLsqIg7LuDQzqwL1s1E1vN+aFLVZ5kxgP+BZgIj4iyT/BC8gSb+NiHMk3Usjc6kWbbAsa1SrvHq9\nqOH+cUQsrx9xIJ1dpXg/YQzW9Ij5daZVWLXZTtJXSHoUbtNwDojWMPdDUZtlfkkylvmJJKMhjgZm\nRcQFmRZmVUHSZ4Cdi3TFsq1N0nUb2BwRMarFitlIRQ33NsDJwD+Q/OR6APh9axv1zSon7RExjOTX\n7AxgPvBYRJybZV1mG6tw4Z72ab8hIkZmXYtVj/o5MiWdQnLU/mNJz7eGKxFt05G0Hckv/G6sPYlL\n1Q8oV7g294hYJamTpHYRsTzreqxqbCZpJ5LpB908Z/UmAc8AL9DKRo4tXLinaoGnJE1k7WnELsms\nIsvaRSTNc09GxBRJuwJ/ybgmy16H1to0V7hmGQBJjV5SHhH/0dK1mFn1kvR9YAlwH2uP81/149sX\nMtzNGkp7UP2UZPKSP5KMGnpORPwh08IsU5LOBH5G0ruuPiyjNYwWWshwX88FKx8AU4HfRcSylq/K\nsiRpRkT0Tfs2Hwt8H5gcEX0yLs0yJOlVYFBEvJt1Lc1V1CF/55H81LomvS0C3gZ2T5eteOpHBT0K\nmNAafnZbi3iJZArGVqeoJ1T7RcRBJcv3Sno8Ig6S1OoGCLKKuFfSyyTNMqMldQL8C85WkcytO5lW\nNrduUcO9k6RdIuJ1gHTI347pNnePLKCIOF/SL4BFaXfZD0kmc7Fiuzu9tTpFDffzgCfT9jQB3UmO\n1rYEPCFyAUk6seR+6aYbW74aqxYRcYOkdiRNtgBzImJFljWVq5AnVAEktQf2JAn3l30Stdgk/VfJ\nYgfgMGB6RHwto5KsCkg6hOSAr5YkK3YGvh0Rj2dYVlkKGe7p5BznAl0j4tR0PPc9IuK+jEuzKiFp\nW+AmD/lbbJKmAd+IiDnp8u4kJ9z3zbayphW1t8x1JG3rQ9LlOpI+zmb1lgI9si7CMte2PtgBIuIV\nWsl8y0WPmyQ4AAAGGUlEQVRtc98tIoZLGgEQER+pQUOrFUuDax/aAL2A27OryKrEVEnjWTPu/zeB\naRnWU7aihvtySZuT/jFL2o2Sbk5WSKWTdawEXouIuqyKsapxBsnMbWeRtLk/DlyZaUVlKmqb++HA\nD0mOzh4EhgInRcSjWdZlZlYphQx3AEnbA4NJvo2faY2XF1vlSBoM/BfQE2gH1AAfRsQ2mRZmmZD0\nAhuYerM1jPNf1GYZImIB8L8AkvaQ9POIODXjsiw7lwMnAHcAA0gmaPhCphVZlo5O/3tm+t/SNvdW\nMRxBoXrLSOot6UFJL0r6qaQdJd0J/AmYlXV9lq2ImAvURMSqiLgO+GLWNVk2IuK1iHgNGBoR/xoR\nL6S384EvZ11fOQoV7iSDgt0CHEcyR+Z0kkHEvhARl2ZZmGVuaXol4kxJv0zH8d4y66Isc1tKOqB+\nQdL+tJJ/F4Vqc68f1rVk+Q2gW0SsyrAsqwKSupKMDNqOZLjfbYBx6dG8FZSkfYFrgW3TVQuBUREx\nPbuqylO0NvcOkvqRnESFZNjf3vV93FvD/zCrLEnHAF0i4op0+TFgB5KTaX8GHO4FFhHTgD6StiE5\nGP4g65rKVbQj98kb2BwRcWiLFWNVQdJTwAkR8Ua6PAM4FNgKuC4iDsuyPstWOgbVcUA3Sg6GI+Ki\nrGoqV6GO3CPCJ8isoXb1wZ56Mp2o4710lFArtntIZmmbRiu70LFQR+710nkRb46IhenyZ4AREdEq\nrjyzypE0NyIa7fIo6dWI2K2la7LqIenFiNg76zo2RtF6y9Q7tT7YASLifcB93IvpWUnr/L+X9F3g\n/zKox6rL05L2ybqIjVHUI/fngT6RvnlJNcDzEbFXtpVZS5O0A8lMOx+TdI0F2BdoDxwbEW9nVZtl\nT9IskovZ/kryb0Qk5+eq/grVoob7r0hOkFxF0ividOCNiDgvy7osO5IOBeq/3F+KiEeyrMeqQ9pF\ndh3pBU5Vrajh3gb4LslsOyIZPOz37u9uZo1Jf+F1qF+un3+5mhUy3M3MyiFpGPAb4PPAO0BXYHZr\naMItVFdISbdHxPHrG/GtNbSjmVmL+gnJ6LEPR0Q/SV8ERmRcU1kKFe7A2el/j97gXmZmiRURsUBS\nG0ltImKypF9kXVQ5CtUVMiL+nt4dXT/qW8nob6OzrM3MqtJCSVuRzMB0s6SxJDN1Vb1CtrlLmh4R\n/Ruse97NMmZWKr1K+SOSA+FvkgwgdnM6H0RVK1S4SzqD5Ah9N9YeEGpr4KmIGJlJYWbWKqTXxJwQ\nETdnXUtTihbu2wKfAX4OnF+yaXE6noiZGekokGcCnYGJwEPp8r8AMyLimAzLK0uhwr2epN2Auoj4\nWNIhQG/gxtIhCcysuCTdA7xPMuzzYSQHhe2AsyNiRpa1lauo4T6DZJ7MbsADJN/Me0TEUVnWZWbV\nQdILEbFPer8GeBfYJSIWZ1tZ+QrVW6bEJxGxEvgq8NuI+D6wU8Y1mVn1WFF/J71y/a+tKdiheP3c\n662QNIJkhvt/Ste1zbAeM6sufSQtSu8L2Dxdrh84bJvsSitPUcP9OySDhf0sIv4qqTvwh4xrMrMq\nERE1WdfwaRWyzd3MLO8KdeTusWXMrCgKdeQuaaeI+HtrHqPZzKwchQp3M7OiKFSzTD1Ji1m3WeYD\nYCpwXkTMa/mqzMwqp5DhDlwCvAncQtK16QTgc8Ac4FrgkMwqMzOrgEI2y0h6NiIGNVj3TEQMljQz\nIvpkVZuZWSUU9gpVScfXD8Av6fiSbcX7tjOz3CnqkfuuwFhgSLrqz8D3gb8B+0bEk1nVZmZWCYUM\ndzOzvCtks4ykLpLukvSOpLcl3SmpS9Z1mZlVSiHDHbiOZJjfz5MMxn9vus7MLBcK2SwjaUZE9G1q\nnZlZa1XUI/d3JY2UVJPeRgJVP+GtmVm5inrkvgtwOUlvmQCeBs6KiNczLczMrEIKGe6NkXRORPw2\n6zrMzCrB4Z6S9HpE7JJ1HWZmlVDUNvfGKOsCzMwqxeG+hn/CmFluFGpUyPUM9QvpBLgtXI6Z2Sbj\nNnczsxxys4yZWQ453M3McsjhbmaWQw53M7MccribmeXQ/wfeqDwsuoSv0AAAAABJRU5ErkJggg==\n",
|
|
"text/plain": [
|
|
"<matplotlib.figure.Figure at 0x10e68fa58>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.naive_bayes import GaussianNB\n",
|
|
"from sklearn import tree\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
|
|
"import random\n",
|
|
"examples = examples_df.values.copy()\n",
|
|
"print(len(examples[0]))\n",
|
|
"#只取25个流统计特征\n",
|
|
"examples = np.c_[examples[:,:25].copy(),examples[:,-1].copy()]\n",
|
|
"#print(examples)\n",
|
|
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
|
|
" columns = ['precision', 'recall', 'f1'])\n",
|
|
"#def a():\n",
|
|
"\n",
|
|
"\n",
|
|
"f1_score_list = list()\n",
|
|
"recall_score_list = list()\n",
|
|
"precision_score_list = list()\n",
|
|
"for i in range(1):\n",
|
|
" np.random.shuffle(examples)\n",
|
|
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
|
" examples_test = examples[int(len(examples)*0.75):]\n",
|
|
" x_train = examples_train[:,0:-1]\n",
|
|
" y_train = examples_train[:,-1]\n",
|
|
" x_test = examples_test[:,0:-1]\n",
|
|
" y_test = examples_test[:,-1]\n",
|
|
" classifer = LogisticRegression()\n",
|
|
" classifer.fit(x_train, y_train)\n",
|
|
" y_pred = classifer.predict(x_test)\n",
|
|
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
|
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
|
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
|
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
|
"score_df.loc['LogisticRegression'] = scores\n",
|
|
"\n",
|
|
"f1_score_list = list()\n",
|
|
"recall_score_list = list()\n",
|
|
"precision_score_list = list()\n",
|
|
"for i in range(1):\n",
|
|
" np.random.shuffle(examples)\n",
|
|
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
|
" examples_test = examples[int(len(examples)*0.75):]\n",
|
|
" x_train = examples_train[:,0:-1]\n",
|
|
" y_train = examples_train[:,-1]\n",
|
|
" x_test = examples_test[:,0:-1]\n",
|
|
" y_test = examples_test[:,-1]\n",
|
|
" classifer = SVC()\n",
|
|
" classifer.fit(x_train, y_train)\n",
|
|
" y_pred = classifer.predict(x_test)\n",
|
|
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
|
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
|
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
|
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
|
"score_df.loc['SVM'] = scores\n",
|
|
"\n",
|
|
"f1_score_list = list()\n",
|
|
"recall_score_list = list()\n",
|
|
"precision_score_list = list()\n",
|
|
"for i in range(1):\n",
|
|
" #np.random.shuffle(examples)\n",
|
|
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
|
" examples_test = examples[int(len(examples)*0.75):]\n",
|
|
" x_train = examples_train[:,0:-1]\n",
|
|
" y_train = examples_train[:,-1]\n",
|
|
" x_test = examples_test[:,0:-1]\n",
|
|
" y_test = examples_test[:,-1]\n",
|
|
" classifer = GaussianNB()\n",
|
|
" classifer.fit(x_train, y_train)\n",
|
|
" y_pred = classifer.predict(x_test)\n",
|
|
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
|
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
|
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
|
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
|
"score_df.loc['GaussianNB'] = scores\n",
|
|
"\n",
|
|
"f1_score_list = list()\n",
|
|
"recall_score_list = list()\n",
|
|
"precision_score_list = list()\n",
|
|
"for i in range(1):\n",
|
|
" #np.random.shuffle(examples)\n",
|
|
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
|
" examples_test = examples[int(len(examples)*0.75):]\n",
|
|
" x_train = examples_train[:,0:-1]\n",
|
|
" y_train = examples_train[:,-1]\n",
|
|
" x_test = examples_test[:,0:-1]\n",
|
|
" y_test = examples_test[:,-1]\n",
|
|
" classifer = tree.DecisionTreeClassifier()\n",
|
|
" classifer.fit(x_train, y_train)\n",
|
|
" y_pred = classifer.predict(x_test)\n",
|
|
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
|
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
|
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
|
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
|
"score_df.loc['tree'] = scores\n",
|
|
"\n",
|
|
"f1_score_list = list()\n",
|
|
"recall_score_list = list()\n",
|
|
"precision_score_list = list()\n",
|
|
"for i in range(1):\n",
|
|
" np.random.shuffle(examples)\n",
|
|
" examples_train = examples[:int(len(examples)*0.75)]\n",
|
|
" examples_test = examples[int(len(examples)*0.75):]\n",
|
|
" x_train = examples_train[:,0:-1]\n",
|
|
" y_train = examples_train[:,-1]\n",
|
|
" x_test = examples_test[:,0:-1]\n",
|
|
" y_test = examples_test[:,-1]\n",
|
|
" classifer = RandomForestClassifier()\n",
|
|
" classifer.fit(x_train, y_train)\n",
|
|
" y_pred = classifer.predict(x_test)\n",
|
|
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
|
|
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
|
|
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
|
|
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
|
|
"score_df.loc['RandomForest'] = scores\n",
|
|
"print(score_df)\n",
|
|
"ax = score_df.plot.bar(title='statistics_feature')\n",
|
|
"fig = ax.get_figure()\n",
|
|
"#fig.savefig('../figure/base_feature.svg')\n",
|
|
"#print(score_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|