This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
cuiyiming-gradproj/Experiment/statFeature/.ipynb_checkpoints/StatFeature-checkpoint.ipynb
2020-05-10 13:52:13 +08:00

688 lines
48 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"import pandas as pd\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ciper_suits = {\n",
" '1305':0,\n",
" 'C030':1,\n",
"\t'C02C':2,\n",
"\t'C028':3,\n",
"\t'C024':4,\n",
"\t'C014':5,\n",
"\t'C00A':6,\n",
"\t'00A5':7,\n",
"\t'00A3':8,\n",
"\t'00A1':9,\n",
"\t'009F':10,\n",
"\t'006B':11,\n",
"\t'006A':12,\n",
"\t'0069':13,\n",
"\t'0068':14,\n",
"\t'0039':15,\n",
"\t'0038':16,\n",
"\t'0037':17,\n",
"\t'0036':18,\n",
"\t'0088':19,\n",
"\t'0087':20,\n",
"\t'0086':21,\n",
"\t'0085':22,\n",
"\t'C019':23,\n",
"\t'00A7':24,\n",
"\t'006D':25,\n",
"\t'003A':26,\n",
"\t'0089':27,\n",
"\t'C032':28,\n",
"\t'C02E':29,\n",
"\t'C02A':30,\n",
"\t'C026':31,\n",
"\t'C00F':32,\n",
"\t'C005':33,\n",
"\t'009D':34,\n",
"\t'003D':35,\n",
"\t'0035':36,\n",
"\t'0084':37,\n",
"\t'008D':38,\n",
"\t'C02F':39,\n",
"\t'C02B':40,\n",
"\t'C027':41,\n",
"\t'C023':42,\n",
"\t'C013':43,\n",
"\t'C009':44,\n",
"\t'00A4':45,\n",
"\t'00A2':46,\n",
"\t'00A0':47,\n",
"\t'009E':48,\n",
"\t'0067':49,\n",
"\t'0040':50,\n",
"\t'003F':51,\n",
"\t'003E':52,\n",
"\t'0033':53,\n",
"\t'0032':54,\n",
"\t'0031':55,\n",
"\t'0030':56,\n",
"\t'009A':57,\n",
"\t'0099':58,\n",
"\t'0098':59,\n",
"\t'0097':60,\n",
"\t'0045':61,\n",
"\t'0044':62,\n",
"\t'0043':63,\n",
"\t'0042':64,\n",
"\t'C018':65,\n",
"\t'00A6':66,\n",
"\t'006C':67,\n",
"\t'0034':68,\n",
"\t'009B':69,\n",
"\t'0046':70,\n",
"\t'C031':71,\n",
"\t'C02D':72,\n",
"\t'C029':73,\n",
"\t'C025':74,\n",
"\t'C00E':75,\n",
"\t'C004':76,\n",
"\t'009C':77,\n",
"\t'003C':78,\n",
"\t'002F':79,\n",
"\t'0096':80,\n",
"\t'0041':81,\n",
"\t'008C':82,\n",
"\t'C012':83,\n",
"\t'C008':84,\n",
"\t'0016':85,\n",
"\t'0013':86,\n",
"\t'0010':87,\n",
"\t'000D':88,\n",
"\t'C017':89,\n",
"\t'001B':90,\n",
"\t'C00D':91,\n",
"\t'C003':92,\n",
"\t'000A':93,\n",
"\t'0007':94,\n",
"\t'008B':95,\n",
"\t'0021':96,\n",
"\t'001F':97,\n",
"\t'0025':98,\n",
"\t'0023':99,\n",
"\t'C011':100,\n",
"\t'C007':101,\n",
"\t'C016':102,\n",
"\t'0018':103,\n",
"\t'C00C':104,\n",
"\t'C002':105,\n",
"\t'0005':106,\n",
"\t'0004':107,\n",
"\t'008A':108,\n",
"\t'0020':109,\n",
"\t'0024':110,\n",
"\t'C010':111,\n",
"\t'C006':112,\n",
"\t'C015':113,\n",
"\t'C00B':114,\n",
"\t'C001':115,\n",
"\t'003B':116,\n",
"\t'0002':117,\n",
"\t'0001':118,\n",
" '1301':119,\n",
"\t'1302':120,\n",
"\t'1303':121,\n",
"\t'1304':122\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"extensions = { \n",
" 0:0, \n",
" 1:1, \n",
" 2:2, \n",
" 3:3, \n",
" 4:4, \n",
" 5:5, \n",
" 6:6, \n",
" 7:7, \n",
" 8:8, \n",
" 9:9, \n",
" 10:10, \n",
" 11:11, \n",
" 12:12, \n",
" 13:13, \n",
" 14:14, \n",
" 15:15, \n",
" 16:16, \n",
" 17:17, \n",
" 18:18, \n",
" 19:19, \n",
" 20:20, \n",
" 21:21, \n",
" 22:22, \n",
" 23:23, \n",
" 24:24, \n",
" 25:25, \n",
" 26:26, \n",
" 27:27, \n",
" 28:28, \n",
" 29:29, \n",
" 30:30, \n",
" 31:31, \n",
" 35:32, \n",
" 65281:33 \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-28-229f14785c74>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mexample_label_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroot_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'DataSet/result/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'noProxy/All'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'/stream_tag.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mexample_label_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_label_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\\s+'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mexample_label\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;31m#example_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mexample_json_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroot_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'DataSet/result/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'noProxy/All'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'/stream_stat.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-28-229f14785c74>\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mexample_label_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroot_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'DataSet/result/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'noProxy/All'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'/stream_tag.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mexample_label_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_label_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\\s+'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mexample_label\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mexample_label_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;31m#example_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mexample_json_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroot_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'DataSet/result/'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'noProxy/All'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'/stream_stat.txt'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1320\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1321\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1322\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1323\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mKeyError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1324\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_scalar\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[0;31m# a fast-path to scalar access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1628\u001b[0m \u001b[0;31m# if not, raise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1629\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtakeable\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1630\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1631\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, index, col, takeable)\u001b[0m\n\u001b[1;32m 1815\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtakeable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1816\u001b[0m \u001b[0mseries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iget_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1817\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_maybe_box_datetimelike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1818\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1819\u001b[0m \u001b[0mseries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;34m\"\"\" return the internal repr of this data \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 387\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minternal_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 389\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36minternal_values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 4220\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4221\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minternal_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_block\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minternal_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4224\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36minternal_values\u001b[0;34m(self, dtype)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 154\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0minternal_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 155\u001b[0m \"\"\" return an internal format, currently just the ndarray\n\u001b[1;32m 156\u001b[0m \u001b[0mthis\u001b[0m \u001b[0mshould\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpure\u001b[0m \u001b[0minternal\u001b[0m \u001b[0mAPI\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"date = '2019-12-20_21'\n",
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
"#example_label_file = root_dir + 'DataSet/result/' + date + '/stream_tag.txt'\n",
"example_label_file = root_dir + 'DataSet/result/' + 'noProxy/All' + '/stream_tag.txt'\n",
"example_label_df = pd.read_table(example_label_file, sep='\\s+', header=None)\n",
"example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}\n",
"#example_json_file = root_dir + 'DataSet/result/' + date + '/stream_stat.txt'\n",
"example_json_file = root_dir + 'DataSet/result/' + 'noProxy/All' + '/stream_stat.txt'\n",
"example_json_f = open(example_json_file, 'r')\n",
"result_data = list()\n",
"result_label = list()\n",
"i = 0\n",
"for line in example_json_f.readlines():\n",
" example_json = json.loads(line)\n",
" #标签\n",
" try:\n",
" flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])\n",
" result_label.append(example_label[flow_key])\n",
" except Exception:\n",
" continue\n",
" \n",
" #统计特征\n",
" packets = example_json['packets']\n",
" c2s_packets_bytes = list()\n",
" s2c_packets_bytes = list()\n",
" c2s_packets_intervals = list()\n",
" s2c_packets_intervals = list()\n",
" for packet in packets:\n",
" if packet['dir'] == 1:\n",
" c2s_packets_bytes.append(packet['bytes'])\n",
" c2s_packets_intervals.append(packet['interval'])\n",
" elif packet['dir'] == 2:\n",
" s2c_packets_bytes.append(packet['bytes'])\n",
" s2c_packets_intervals.append(packet['interval'])\n",
" c2s_bytes = example_json['c2s_bytes']\n",
" s2c_bytes = example_json['s2c_bytes']\n",
" c2s_pkts = example_json['c2s_pkts']\n",
" s2c_pkts = example_json['s2c_pkts']\n",
" duration = example_json['duration']\n",
" c2s_packets_bytes_mean = 0\n",
" c2s_packets_bytes_median = 0\n",
" c2s_packets_bytes_std = 0\n",
" c2s_packets_bytes_max = 0\n",
" c2s_packets_bytes_min = 0\n",
"\n",
" c2s_packets_intervals_mean = 0\n",
" c2s_packets_intervals_median = 0\n",
" c2s_packets_intervals_std = 0\n",
" c2s_packets_intervals_max = 0\n",
" c2s_packets_intervals_min = 0\n",
"\n",
" s2c_packets_bytes_mean = 0\n",
" s2c_packets_bytes_median = 0\n",
" s2c_packets_bytes_std = 0\n",
" s2c_packets_bytes_max = 0\n",
" s2c_packets_bytes_min = 0\n",
"\n",
" s2c_packets_intervals_mean = 0\n",
" s2c_packets_intervals_median = 0\n",
" s2c_packets_intervals_std = 0\n",
" s2c_packets_intervals_max = 0\n",
" s2c_packets_intervals_min = 0\n",
" \n",
" if c2s_bytes > 0:\n",
" c2s_packets_bytes_mean = np.mean(c2s_packets_bytes)\n",
" c2s_packets_bytes_median = np.median(c2s_packets_bytes)\n",
" c2s_packets_bytes_std = np.std(c2s_packets_bytes)\n",
" c2s_packets_bytes_max = np.max(c2s_packets_bytes)\n",
" c2s_packets_bytes_min = np.min(c2s_packets_bytes)\n",
"\n",
" c2s_packets_intervals_mean = np.mean(c2s_packets_intervals)\n",
" c2s_packets_intervals_median = np.median(c2s_packets_intervals)\n",
" c2s_packets_intervals_std = np.std(c2s_packets_intervals)\n",
" c2s_packets_intervals_max = np.max(c2s_packets_intervals)\n",
" c2s_packets_intervals_min = np.min(c2s_packets_intervals)\n",
" \n",
" if s2c_bytes > 0:\n",
" s2c_packets_bytes_mean = np.mean(s2c_packets_bytes)\n",
" s2c_packets_bytes_median = np.median(s2c_packets_bytes)\n",
" s2c_packets_bytes_std = np.std(s2c_packets_bytes)\n",
" s2c_packets_bytes_max = np.max(s2c_packets_bytes)\n",
" s2c_packets_bytes_min = np.min(s2c_packets_bytes)\n",
"\n",
" s2c_packets_intervals_mean = np.mean(s2c_packets_intervals)\n",
" s2c_packets_intervals_median = np.median(s2c_packets_intervals)\n",
" s2c_packets_intervals_std = np.std(s2c_packets_intervals)\n",
" s2c_packets_intervals_max = np.max(s2c_packets_intervals)\n",
" s2c_packets_intervals_min = np.min(s2c_packets_intervals)\n",
"\n",
" #tls\n",
" tls = example_json['tls']\n",
" extensions_list = tls['extensions_list']\n",
" #print(extensions_list)\n",
" ciphers = tls['cipher_suites']\n",
" #print(ciphers)\n",
" extensions_arr = np.zeros(34, dtype=np.uint8)\n",
" cipher_suits_arr = np.zeros(123, dtype=np.uint8)\n",
" for extension in extensions_list:\n",
" try:\n",
" extensions_arr[extensions[extension]]=1\n",
" except Exception:\n",
" pass\n",
" for cipher in ciphers:\n",
" try:\n",
" cipher = cipher.upper()\n",
" cipher_suits_arr[ciper_suits[cipher]]=1\n",
" except Exception:\n",
" pass\n",
" result = [c2s_bytes, c2s_pkts, s2c_bytes, s2c_pkts, duration, c2s_packets_bytes_mean, c2s_packets_bytes_median, c2s_packets_bytes_std,\\\n",
" c2s_packets_bytes_max, c2s_packets_bytes_min, c2s_packets_intervals_mean, c2s_packets_intervals_median, c2s_packets_intervals_std,\\\n",
" c2s_packets_intervals_max, c2s_packets_intervals_min, s2c_packets_bytes_mean, s2c_packets_bytes_median, s2c_packets_bytes_std,\\\n",
" s2c_packets_bytes_max, s2c_packets_bytes_min, s2c_packets_intervals_mean, s2c_packets_intervals_median, s2c_packets_intervals_std,\\\n",
" s2c_packets_intervals_max, s2c_packets_intervals_min]\n",
" result += list(cipher_suits_arr)\n",
" result += list(extensions_arr)\n",
" result_data.append(result)\n",
" i += 1\n",
"extensions_head = list()\n",
"for i in range(len(extensions)):\n",
" extensions_head.append('extension'+str(i))\n",
"cipher_head = ['cipher'+str(i) for i in range(len(ciper_suits))]\n",
"base_head = ['c2s_bytes', 'c2s_pkts', 's2c_bytes', 's2c_pkts', 'duration', 'c2s_packets_bytes_mean', 'c2s_packets_bytes_median', 'c2s_packets_bytes_std',\\\n",
" 'c2s_packets_bytes_max', 'c2s_packets_bytes_min', 'c2s_packets_intervals_mean', 'c2s_packets_intervals_median', 'c2s_packets_intervals_std',\\\n",
" 'c2s_packets_intervals_max', 'c2s_packets_intervals_min', 's2c_packets_bytes_mean', 's2c_packets_bytes_median', 's2c_packets_bytes_std',\\\n",
" 's2c_packets_bytes_max', 's2c_packets_bytes_min', 's2c_packets_intervals_mean', 's2c_packets_intervals_median', 's2c_packets_intervals_std',\\\n",
" 's2c_packets_intervals_max', 's2c_packets_intervals_min']\n",
"header = base_head+cipher_head+extensions_head\n",
"result_df = pd.DataFrame(result_data, columns=header)\n",
"result_df['label'] = np.array(result_label)\n",
"example_csv_file = root_dir + 'Experiment/StatFeature/CsvFile/' + date + '/examples.csv'\n",
"result_df.to_csv(example_csv_file, index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 统计每个app的包数\n",
"date = '2019-12-20_21'\n",
"root_dir = '/Users/Leo/Documents/github/GradProj/'\n",
"exmaples_file = root_dir + 'Experiment/StatFeature/CsvFile/' + date + '/examples.csv'\n",
"app2pktsDict = dict()\n",
"with open(exmaples_file) as f:\n",
" lines = f.readlines()\n",
" i = 0\n",
" for line in lines:\n",
" if i == 0:\n",
" i += 1\n",
" continue;\n",
" line = line.split(',')\n",
" pkts = int(line[1]) + int(line[3])\n",
" appName = line[-1]\n",
" if appName not in app2pktsDict.keys():\n",
" app2pktsDict[appName] = 0\n",
" app2pktsDict[appName] += pkts \n",
"for appName, pkts in app2pktsDict.items():\n",
" appName = appName[:-1]\n",
" print(appName + ': ', pkts)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"examples_df = pd.read_csv(exmaples_file)\n",
"class_counts = examples_df['label'].value_counts()\n",
"class_counts.plot.bar()\n",
"class_counts"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"183\n",
"[array([ 0.97651004, 0.98160604, 0.9761279 , 0.94049181, 0.9981586 ,\n",
" 0.99686109, 0.95801188, 0.98112154, 0.97535658, 0.98954548]), array([ 0.97265403, 0.98924357, 0.97842462, 0.95896234, 0.99429078,\n",
" 0.9940108 , 0.97831705, 0.96367686, 0.97805697, 0.97734534]), array([ 0.97456595, 0.98540172, 0.97725141, 0.94959633, 0.99621813,\n",
" 0.99543102, 0.96804488, 0.97230357, 0.97669943, 0.98340521])]\n",
" precision recall f1\n",
"LogisticRegression 0.0 0.0 0.0\n",
"SVM 0.0 0.0 0.0\n",
"GaussianNB 0.0 0.0 0.0\n",
"tree 0.0 0.0 0.0\n",
"RandomForest 0.0 0.0 0.0\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAFcCAYAAADMJRmsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYVOWZ/vHvTYNgXMAFV8TGXUBBaBRciMu4jnE3SmLE\naCSKRo2ZmZgx89MxZqIZ4xajRmMUjStx3BKN+240LIIKiiJBbXfFBQRle35/nNNQp23oqq6yT5V9\nf66rrq6zVNVTpdRd533PeV9FBGZmZk065V2AmZlVFweDmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZ\nhoPBzMwyHAz2tSbpPyX9oY2PnSNpo6+gprUlPSZptqTfVPr5zcolX+Bm1UrSmcAmEXFEkfvvDPwp\nInq14bUeSR/bphAp8bX+C9gGODjK/Aco6RqgMSJ+XonazMBHDGZ52BCYWm4oVIKkznnXYFUoInzz\nLfcb8FPgTWA2MA34V2A+sACYA0xO9/s+8GK63wzgh+n6lYB5wOJ0/znAesCZJEcCAN2APwEfAh8D\n44C1gV8Ci4DP08ddku4fJEcsACsCvwFeAz4BnkjXtficy3mf16TvaX76Wv9C8gPtNODV9HluAVYv\neMxY4J30dR8D+qXrRzV7rrua113wmmen93cGGtPP+x3gunT9vsCk9D08BWyd9/8TvuV3868Fy52k\nzYETgSER8ZakeqAO+B++3JT0HsmX2AxgOHCPpHERMVHS3jRrSpJU+FIjge7ABsAXwEBgXkScLmkH\nlt+UdB7QD9ie5At1O5IQOqql51zWe42Io9KaljT/SDoFOAD4JvA+cDHwO2BE+rB7gKNJAuBc4Hpg\nYERcIWl7Sm9KWgdYneTIpZOkQcAfgW8B44EjgDslbR4RX5TwvPY14aYkqwaLgK5AX0ldImJmRLza\n0o4R8deIeDUSjwL3ATsV+ToLgDVIwmZRREyIiE9be5CkTiRfzCdHxJvpY59KvzTb9JzN/BA4PSIa\n0+c8EzikqZknIv4YEbMLtg2Q1L3E1yi0GDgjIr6IiHnAscDvI+KZ9D2MIQm5oWW8htUwB4PlLiKm\nA6eQfOm9J+kmSeu1tK+kvSU9LWmWpI+BfYA1i3yp64B7gZskvSXp15K6FPG4NUmajFoKq7Y+Z6EN\ngdskfZy+pxdJwnJtSXWSzpH0qqRPgZkFNbXV+xHxebPX/0nT66c1bEDSFGcdkIPBqkJE3BARO5J8\nSQVJk0mmc1ZSV+BWkmadtSOiB3A30NRetNzO3IhYEBH/HRF9SZqE9gWOLOKxH5D0P2xc4nMW6w1g\n74joUXDrFhFvAt8B9ifpi+gO1KePWd57ngt8o2B5neZlt/D6v2z2+t+IiBtLfB/2NeFgsNxJ2lzS\nrukX/+ckbfSLgHeB+rQpB2AFkian94GFaZ/CHgVP9S6wxrKaWSTtImkrSXXApyTNQIsKHtviNQsR\nsZikDf58Seulv+KHSeraynMW63Lgl5I2TOvsKWn/dNsqJM06H5J82f9Ps8e2VPck4DtpnXuR9F0s\nz5XAcZK2U2IlSf8qaZUS34d9TTgYrBp0Bc4h+WX+DrAW8J8kZ+MAfChpYkTMBk4iOWvnI5Jf03c2\nPUlEvATcCMxIm0SaN4WsA/yZ5Av8ReBRkjOKAC4iadf/SNLFLdT4b8DzJGcdzSI5ounUynMW66L0\nfdwnaTbwNEnnNsC1JGdCvQlMTbcVuoqkb+ZjSben604m6Uj+GPgucDvLERHjSfoZLiH5XKeTdKpb\nB+UL3MzMLMNHDGZmluHrGMy+ApLmLGPT3hHxeLsWY1YiNyWZmVmGm5LMzCyjJpuS1lxzzaivr8+7\nDDOzmjJhwoQPIqJna/vVZDDU19czfvz4vMswM6spkl4rZj83JZmZWYaDwczMMhwMZmaWUZN9DGb2\n9bVgwQIaGxv5/PPPW9/ZWtStWzd69epFly6lDvSbcDCYWVVpbGxklVVWob6+vvlES1aEiODDDz+k\nsbGRPn36tOk53JRkZlXl888/Z4011nAotJEk1lhjjbKOuBwMZlZ1HArlKffzczCYmbWT7bfffrnb\n99lnHz7++ON2qmbZ3MdgZlWt/rS/VvT5Zp7zrxV5nkWLFlFXV1fSY5566qnlbr/77rvLKalifMRg\nZtbMzJkz2WKLLRg5ciRbb701hxxyCHPnzqW+vp6zzjqLHXfckbFjx/Lqq6+y1157MXjwYHbaaSde\neuklAN59910OPPBABgwYwIABA5YEwsorrwzA22+/zfDhwxk4cCD9+/fn8ceTAXfr6+v54IMPADj/\n/PPp378//fv358ILL1xS15Zbbsmxxx5Lv3792GOPPZg3b17F37+DwcysBdOmTWPUqFE899xzrLrq\nqlx66aVAciroE088weGHH86oUaP47W9/y4QJEzjvvPMYPXo0ACeddBLf/OY3mTx5MhMnTqRfv36Z\n577hhhvYc889mTRpEpMnT2bgwIGZ7RMmTODqq6/mmWee4emnn+bKK6/k2WefBeCVV17hhBNOYMqU\nKfTo0YNbb7214u/dTUlmZi3YYIMN2GGHHQA44ogjuPjiZMbXww47DIA5c+bw1FNPceihhy55zBdf\nfAHAQw89xLXXXgtAXV0d3btnpyEfMmQIRx99NAsWLOCAAw74UjA88cQTHHjggay00koAHHTQQTz+\n+OPst99+9OnTZ8n+gwcPZubMmRV+5z5iMDNrUfMze5qWm76sFy9eTI8ePZg0adKS24svvljUcw8f\nPpzHHnuM9ddfn+9973tLQqTJ8ubJ6dq165L7dXV1LFy4sKjXLIWDwcysBa+//jp///vfAbjxxhvZ\ncccdM9tXXXVV+vTpw9ixY4Hky3zy5MkA7Lbbblx22WVA0kn96aefZh772muvsdZaa3HsscdyzDHH\nMHHixMz24cOHc/vttzN37lw+++wzbrvtNnbaaaev5H22xMFgZtaCLbfckjFjxrD11lsza9Ysjj/+\n+C/tc/3113PVVVcxYMAA+vXrxx133AHARRddxMMPP8xWW23F4MGDmTJlSuZxjzzyCAMHDmSbbbbh\n1ltv5eSTT85sHzRoEEcddRTbbrst2223HT/4wQ/YZpttvro320xNTu3Z0NAQno/B7OvpxRdfZMst\nt8y1hpkzZ7Lvvvvywgsv5FpHOVr6HCVNiIiG1h7rIwYzM8twMJiZNVNfX1/TRwvlcjCYmVmGg8HM\nzDIcDGZmluFgMDOzDAeDmVk7mDlzJv379weS6xj23XffnCtaNo+VZGbV7czure9T0vN9UtLuEUFE\n0KlTx/kdXZF3KmkvSdMkTZd0Wgvbu0q6Od3+jKT6Ztt7S5oj6d8qUY+ZWTmahrcePXo0gwYN4rrr\nrmPYsGEMGjSIQw89lDlz5gAwbtw4tt9+ewYMGMC2227L7NmzmTlzJjvttBODBg1i0KBBrc7BUI3K\nDgZJdcDvgL2BvsAISX2b7XYM8FFEbAJcAJzbbPsFwD3l1mJmVinTpk3jyCOP5P777+eqq67igQce\nYOLEiTQ0NHD++eczf/58DjvsMC666CImT57MAw88wIorrshaa63F/fffz8SJE7n55ps56aST8n4r\nJatEU9K2wPSImAEg6SZgf2BqwT77A2em9/8MXCJJERGSDgBmAJ9VoBYzs4rYcMMNGTp0KH/5y1+Y\nOnXqkiG458+fz7Bhw5g2bRrrrrsuQ4YMAZJB9QA+++wzTjzxRCZNmkRdXR0vv/xybu+hrSoRDOsD\nbxQsNwLbLWufiFgo6RNgDUnzgJ8CuwPLbUaSNAoYBdC7d+8KlG1mtmxNw2tHBLvvvjs33nhjZvtz\nzz33paG5AS644ALWXnttJk+ezOLFi+nWrVu71FtJlehj+PInA81H5lvWPv8NXBARc1p7kYi4IiIa\nIqKhZ8+ebSjTzKx0Q4cO5cknn2T69OkAzJ07l5dffpktttiCt956i3HjxgEwe/ZsFi5cyCeffMK6\n665Lp06duO6661i0aFGe5bdJJY4YGoENCpZ7AW8tY59GSZ2B7sAskiOLQyT9GugBLJb0eURcUoG6\nzMzK1rNnT6655hpGjBixZIa2s88+m80224ybb76ZH/3oR8ybN48VV1yRBx54gNGjR3PwwQczduxY\ndtlllyVHHrWk7GG30y/6l4HdgDeBccB3ImJKwT4nAFtFxHGSDgcOiohvN3ueM4E5EXFea6/pYbfN\nvr6qYdjtr4Nyht0u+4gh7TM4EbgXqAP+GBFTJJ0FjI+IO4GrgOskTSc5Uji83Nc1M7OvRkUucIuI\nu4G7m637fwX3PwcObf64ZvufWYlazMysPB3nUj4zMyuKg8HMzDIcDGZmluFgMDOzDAeDmVkzF198\nMVtuuSUHH3www4YNo2vXrpx3Xqtn0n9teNhtM6tqW43ZqqLP9/zI51vd59JLL+Wee+5hpZVW4rXX\nXuP222+vaA3VzkcMZmYFjjvuOGbMmMF+++3H9ddfz5AhQ+jSpUveZbUrHzGYmRW4/PLL+dvf/sbD\nDz/MmmuumXc5ufARg5mZZTgYzMwsw8FgZmYZ7mMwM1uGd955h4aGBj799FM6derEhRdeyNSpU5fM\n1vZ15WAws6pWzOmllTZz5swl9xsbG9v99fPmpiQzM8twMJiZWYaDwczMMhwMZlZ1yp1yuKMr9/Nz\nMJhZVenWrRsffvihw6GNIoIPP/yQbt26tfk5fFaSmVWVXr160djYyPvvv593KTWrW7du9OrVq82P\ndzCYWVXp0qULffr0ybuMDs1NSWZmluFgMDOzDAeDmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZhoPB\nzMwyHAxmZpbhYDAzs4yKBIOkvSRNkzRd0mktbO8q6eZ0+zOS6tP1u0uaIOn59O+ulajHzMzaruxg\nkFQH/A7YG+gLjJDUt9luxwAfRcQmwAXAuen6D4BvRcRWwEjgunLrMTOz8lTiiGFbYHpEzIiI+cBN\nwP7N9tkfGJPe/zOwmyRFxLMR8Va6fgrQTVLXCtRkZmZtVIlgWB94o2C5MV3X4j4RsRD4BFij2T4H\nA89GxBcVqMnMzNqoEsNuq4V1zWfYWO4+kvqRNC/tscwXkUYBowB69+5depVmZlaUShwxNAIbFCz3\nAt5a1j6SOgPdgVnpci/gNuDIiHh1WS8SEVdERENENPTs2bMCZZuZWUsqEQzjgE0l9ZG0AnA4cGez\nfe4k6VwGOAR4KCJCUg/gr8DPIuLJCtRiZmZlKjsY0j6DE4F7gReBWyJiiqSzJO2X7nYVsIak6cCp\nQNMprScCmwD/JWlSelur3JrMzKztVIsTbjc0NMT48ePzLsPMrKZImhARDa3t5yufzcwsw8FgZmYZ\nDgYzM8twMJiZWYaDwczMMhwMZmaW4WAwM7MMB4OZmWU4GMzMLMPBYGZmGQ4GMzPLcDCYmVmGg8HM\nzDIcDGZmluFgMDOzDAeDmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZhoPBzMwyHAxmZpbhYDAzswwH\ng5mZZTgYzMwsw8FgZmYZDgYzM8twMJiZWYaDwczMMhwMZmaW4WAwM7OMigSDpL0kTZM0XdJpLWzv\nKunmdPszkuoLtv0sXT9N0p6VqMfMzNqu7GCQVAf8Dtgb6AuMkNS32W7HAB9FxCbABcC56WP7AocD\n/YC9gEvT5zMzs5xU4ohhW2B6RMyIiPnATcD+zfbZHxiT3v8zsJskpetviogvIuKfwPT0+czMLCeV\nCIb1gTcKlhvTdS3uExELgU+ANYp8LACSRkkaL2n8+++/X4GyzcysJZUIBrWwLorcp5jHJisjroiI\nhoho6NmzZ4klmplZsSoRDI3ABgXLvYC3lrWPpM5Ad2BWkY81M7N2VIlgGAdsKqmPpBVIOpPvbLbP\nncDI9P4hwEMREen6w9OzlvoAmwL/qEBNZmbWRp3LfYKIWCjpROBeoA74Y0RMkXQWMD4i7gSuAq6T\nNJ3kSOHw9LFTJN0CTAUWAidExKJyazIzs7ZT8sO9tjQ0NMT48ePzLsPMrKZImhARDa3t5yufzcws\nw8FgZmYZDgYzM8twMJiZWYaDwczMMhwMZmaW4WAwM7MMB4OZmWU4GMzMLMPBYGZmGQ4GMzPLcDCY\nmVmGg8HMzDIcDGZmluFgMDOzDAeDmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZhoPBzMwyHAxmZpbh\nYDAzswwHg5mZZTgYzMwsw8FgZmYZDgYzM8twMJiZWYaDwczMMhwMZmaWUVYwSFpd0v2SXkn/rraM\n/Uam+7wiaWS67huS/irpJUlTJJ1TTi1mZlYZ5R4xnAY8GBGbAg+myxmSVgfOALYDtgXOKAiQ8yJi\nC2AbYAdJe5dZj5mZlancYNgfGJPeHwMc0MI+ewL3R8SsiPgIuB/YKyLmRsTDABExH5gI9CqzHjMz\nK1O5wbB2RLwNkP5dq4V91gfeKFhuTNctIakH8C2Sow4zM8tR59Z2kPQAsE4Lm04v8jXUwrooeP7O\nwI3AxRExYzl1jAJGAfTu3bvIlzYzs1K1GgwR8S/L2ibpXUnrRsTbktYF3mtht0Zg54LlXsAjBctX\nAK9ExIWt1HFFui8NDQ2xvH3NzKztym1KuhMYmd4fCdzRwj73AntIWi3tdN4jXYeks4HuwCll1mFm\nZhVSbjCcA+wu6RVg93QZSQ2S/gAQEbOAXwDj0ttZETFLUi+S5qi+wERJkyT9oMx6zMysTIqovVaZ\nhoaGGD9+fN5lmJnVFEkTIqKhtf185bOZmWU4GMzMLMPBYGZmGQ4GMzPLcDCYmVmGg8HMzDIcDGZm\nluFgMDOzDAeDmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZhoPBzMwyHAxmZpbhYDAzswwHg5mZZTgY\nzMwsw8FgZmYZDgYzM8twMJiZWYaDwczMMhwMZmaW4WAwM7MMB4OZmWU4GMzMLMPBYGZmGQ4GMzPL\ncDCYmVmGg8HMzDIcDGZmllFWMEhaXdL9kl5J/662jP1Gpvu8ImlkC9vvlPRCObWYmVlllHvEcBrw\nYERsCjyYLmdIWh04A9gO2BY4ozBAJB0EzCmzDjMzq5Byg2F/YEx6fwxwQAv77AncHxGzIuIj4H5g\nLwBJKwOnAmeXWYeZmVVIucGwdkS8DZD+XauFfdYH3ihYbkzXAfwC+A0wt8w6zMysQjq3toOkB4B1\nWth0epGvoRbWhaSBwCYR8WNJ9UXUMQoYBdC7d+8iX9rMzErVajBExL8sa5ukdyWtGxFvS1oXeK+F\n3RqBnQuWewGPAMOAwZJmpnWsJemRiNiZFkTEFcAVAA0NDdFa3WZm1jblNiXdCTSdZTQSuKOFfe4F\n9pC0WtrpvAdwb0RcFhHrRUQ9sCPw8rJCwczM2k+5wXAOsLukV4Dd02UkNUj6A0BEzCLpSxiX3s5K\n15mZWRVSRO21yjQ0NMT48ePzLsPMrKZImhARDa3t5yufzcwsw8FgZmYZDgYzM8twMJiZWYaDwczM\nMhwMZmaW4WAwM7MMB4OZmWU4GMzMLMPBYGZmGQ4GMzPLcDCYmVmGg8HMzDIcDGZmluFgMDOzDAeD\nmZllOBjMzCzDwWBmZhkOBjMzy3AwmJlZhoPBzMwyHAxmZpbhYDAzswwHg5mZZSgi8q6hZJLeB17L\nuYw1gQ9yrqFa+LNYyp/FUv4slqqWz2LDiOjZ2k41GQzVQNL4iGjIu45q4M9iKX8WS/mzWKrWPgs3\nJZmZWYaDwczMMhwMbXdF3gVUEX8WS/mzWMqfxVI19Vm4j8HMzDJ8xGBmZhkOBjMzy3AwmJlZhoPB\nzOwrIGmHYtZVI3c+l0DSZsC/AxsCnZvWR8SuuRXVziQ9t6xNQETE1u1ZT54kdQMOAz4C7gL+A9gJ\neBX4RURUw5Wu7UbSN4CfAL0j4lhJmwKbR8Rfci4tF5ImRsSg1tZVo86t72IFxgKXA1cCi3KuJS+L\ngQBuIPkynJdvObm6FlgArETyhfgCcAmwI3ANsG9uleXjamACMCxdbiT5N9OhgkHSMGB7oKekUws2\nrQrU5VNVaRwMpVkYEZflXUSeImKgpC2AESThMDX9e19ELMy1uPbXNyL6S+oMNEbEN9P1f5M0Oc/C\ncrJxRBwmaQRARMyTpLyLysEKwMok36+rFKz/FDgkl4pK5GAozV2SRgO3AV80rYyIWfmV1P4i4iXg\nDOAMSYeR/HI+F/jfXAtrf/MBImKhpLeabeuIR5TzJa1IckSJpI0p+HfSUUTEo8Cjkq6JiNcAJHUC\nVo6IT/OtrjjuYyiBpH+2sDoiYqN2LyZHktYHDgcOJGlfvwW4LSLm5FpYO5P0HnATSf/KYel90uVv\nR8TaedWWB0m7Az8H+gL3ATsAR0XEI3nWlRdJNwDHkfxImAB0B86PiKr/AeVgsJJIepTk8PgW4M9A\n5mipIx09SRq5vO0RMaa9aqkWktYAhpKE49MdrQO+kKRJadPrd4HBwE+BCbVwgoaDoQSSugDHA8PT\nVY8Av4+IBbkV1c4kzSRtKij4C0vPSupQR0+2VNqf8F1go4g4S1JvYJ2I+EfOpeVC0hRgIEkf3CUR\n8aikyRExIOfSWuU+htJcBnQBLk2Xv5eu+0FuFbWziKjPu4ZqIelqsuFYKCLimPaspwpcSnLW2q7A\nWcBs4FZgSJ5F5ej3wExgMvCYpA1JOqCrno8YStBS2tfKL4BKkTQV+BNwU0TMyLuePEk6uIXVvYFT\ngLqI6NXOJeWq6Rx9Sc9GxDbpug7176M1kjrXwtl7vvK5NIvSMy0AkLQRHe/skxEkfQz3S3pG0imS\n1su7qDxExK1NN+BZYG+SpsZzgI7YpLZAUh1Lz0rqSXIE0SFJWlvSVZLuSZf7Asvtl6oWPmIogaTd\nSC7imUHSpr4h8P2IeDjXwnIiaSjJ2TgHA9OBGyPiynyral+StgROB7YhOV33T7Xwi/CrkHayHgYM\nAsaQnLP/84gYm2thOUkD4Wrg9IgYkF7v8mxEbJVzaa1yMJRIUldgc5JgeCkiOtx52s1J2hm4gOSC\nr645l9NuJI0FGoDzSM7Syhw9dqQztJqkFz/uRvLv48GIeDHnknIjaVxEDGnWtDYpIgbmXVtr3Plc\nBEm7RsRDkg5qtmljSUTE/+VSWI4kDSFpVjqYpIPtCpLhDzqSISTNJv9GMiRG4VW+QQdqTkov4Hou\nIvoDL+VdT5X4LD19t6lpbSjwSb4lFcfBUJxvAg8B32phWwAdJhgk/Q/wbeBjkgu6doiIxnyryofP\n0FoqIhZLmiypd0S8nnc9VeJU4E6SH5BPAj2pkSEx3JRkJZF0N3BORDyWLh9JctTwGnBmR2w+gSVX\ngzcfdfex/Cpqf5IeIjmK+gfwWdP6iNgvt6Jykh5BDSX5LJqanqfVyjVPPmIogaSTSTqTZpOMsDoI\nOC0i7su1sPa1DskookgaTnIGzo9ILuS5ghr5RVRJks4l6XSdytJ+hgA6VDCQDBxXOKKsSMbQ6nDS\nI6jfRMQwYEre9ZTKwVCaoyPiIkl7AmsB3ycJio4UDJ0KjgoOA65IT9e8VdKkHOvK0wEk8w509BMR\nOqcDyC2RDqrXUd2XXuvyf1FjTTMOhtI0dS7uA1wdEZM74LDCnQsu0tkNGFW4Laea8jaD5Ir4DhkM\nko4HRgMbNZvIaRXgyXyqqgqnkszVsUjSPJYOG7NqvmW1rqP+Q26rCZLuA/oAP5O0Ch3vAp4bSYYU\n/oBkkp7HASRtQo2ccfEVmAtMkvQg2eHYT8qvpHZ1A3AP8CvgtIL1sztqnxNARKzS+l7VyZ3PJUg7\nlAYCMyLiY0mrA70iYlnTXX4tpafdrUsyOc9n6brNSMabn5hrcTlY1iirHXF0VcuStB8Fg27WyjSn\nDoYSpBN5T4qIzyQdQdL5fFHTZBxmZk0knUNyltb16aoRJMNun7bsR1UHB0MJ0vbTAcDWwHXAVcBB\nBVM6WgeUTnr/K5IJaro1rfcQ5B1b+n0xMCIWp8t1JENiVP18DB5ErzQL07ML9ic5UriI7Jyu1jFd\nTTL8+kJgF5KpTq/LtSKrFj0K7nfPrYoSufO5NLMl/YxkHoad0l8AXXKuyfK3YkQ8KElps+KZkh4n\nmRfbOq5fAc9KepjkjKThwM/yLak4DobSHAZ8h+R6hnfSGaqqfv5W+8p9np6Y8IqkE4E3Sa5zsQ4s\nIm6U9AhJP4OAn0bEO/lWVRz3MZQonYVp04h4QNI3SCZkmZ13XZafdEDBF0maDX5B0mTw64h4OtfC\nLBeSToyIS9L7/SKi5q58djCUQNKxJBd0rR4RG6edjpdHxG45l2ZmVaJpJrvm92uJm5JKcwKwLfAM\nQES8IslNBh2UpAsj4hRJd9HC3M8dcfA4+5KaHBnBwVCaLyJiftMoGOmMTD7k6riazjw6L9cqrNr0\nkHQgyVmfqzafx6UW5m9xU1IJJP2aZB6CI0lGFB0NTI2I03MtzKqGpNWADTra1fC2lKSrl7M5IuLo\ndiumjRwMJUjPPDkG2IPkEPFe4A+1NnKiVVZ65sl+JEfgk4D3gUcj4tQ86zJrKwdDkdJrFsZExBF5\n12LVpWlOX0k/IDlaOEPSc7Vwhat9dST1IGldqCc7gVPVD67oPoYiRcQiST0lrRAR8/Oux6pKZ0nr\nkkx56mZFa3I38DTwPDU2CrODoTQzgScl3Ul26sLzc6vIqsFZJM2KT0TEOEkbAa/kXJPlr1utNie6\nKakEkloc4iAi/ru9azGz6ibpx8Ac4C9k5+mo+jkqHAxmZUrPVjubZOKiv5GMwHtKRPwp18IsV5JO\nAH5JciZj0xdt1MKouw6GEizjQqZPgPHA7yPi8/avyvImaVJEDEzPXT8A+DHwcEQMyLk0y5GkV4Ht\nIuKDvGsplYfdLs0MkkPDK9Pbp8C7wGbpsnVMTSPs7gPcWAtNBdYuppBM+1pz3Plcmm0iYnjB8l2S\nHouI4ZJqbqAsq5i7JL1E0pQ0WlJPwEePtohkLvCHqbG5wB0MpekpqXdEvA6QDru9ZrrNp7B2UBFx\nmqRzgU/T05o/I5nMyTq229NbzXEwlOYnwBNp26GAPiS/EFcCPPF7ByXpyIL7hZuubf9qrFpExBhJ\nK5A0NQMxGp3eAAAGuElEQVRMi4gFedZULHc+l0hSV2ALkmB4yR3OJum3BYvdgN2AiRFxSE4lWRWQ\ntDPJD8aZJN8XGwAjI+KxHMsqioOhBOnEPKcCG0bEsel8DJtHxF9yLs2qiKTuwHUedrtjkzQB+E5E\nTEuXNyM5OWFwvpW1zmclleZqkr6EYelyI8n562aF5gKb5l2E5a5LUygARMTL1Mgc8e5jKM3GEXGY\npBEAETFPzRqVreNpdn1LJ6AvcEt+FVmVGC/pKpbO2/FdYEKO9RTNwVCa+ZJWJP0SkLQxBaehWYdV\nOFHPQuC1iGjMqxirGseTzPp4Ekkfw2PApblWVCT3MZRA0u7Az0l+Ed4H7AAcFRGP5FmXmVklORhK\nJGkNYCjJL4Cna/Fyd6ssSUOB3wJbAisAdcBnEbFqroVZLiQ9z3Km/K2FeTrclFSiiPgQ+CuApM0l\n/Soijs25LMvXJcDhwFiggWRylk1yrcjytG/694T0b2EfQ00MkeGzkoogaWtJ90l6QdLZktaWdCvw\nIDA17/osfxExHaiLiEURcTWwS941WT4i4rWIeA3YISL+IyKeT2+nAXvmXV8xHAzFuRK4ATiYZD7f\niSQD6m0SERfkWZhVhbnpFa6TJf06HYd/pbyLstytJGnHpgVJ21Mj/1+4j6EITcMqFyy/AdRHxKIc\ny7IqIWlDklF2VyAZcntV4LL0KMI6KEmDgT8C3dNVHwNHR8TE/KoqjvsYitNN0jYkHc6QDL29ddM1\nDLXwH9oqT9L+QK+I+F26/CiwFknH498BB0MHFhETgAGSViX5Ef5J3jUVy0cMRUiHzV2WiIhd260Y\nqxqSngQOj4g30uVJwK7AysDVEbFbnvVZvtJx1Q4G6in4ER4RZ+VVU7F8xFCEiHBHorVkhaZQSD2R\nTtIzKx1x1zq2O0hmeJxAjV0I6yOGEqRzuF4fER+ny6sBIyKiJq5mtMqSND0iWjwtVdKrEbFxe9dk\n1UPSCxHRP+862sJnJZXm2KZQAIiIjwBfw9BxPSPpS//9Jf0Q+EcO9Vh1eUrSVnkX0RY+YiiBpOeA\nAZF+aJLqgOciol++lVkeJK1FMkPXFySnMAMMBroCB0TEu3nVZvmTNJXkQsd/kvw/IpI+yaq/8tnB\nUAJJ/0vSkXQ5yZknxwFvRMRP8qzL8iVpV6Dpx8GUiHgoz3qsOqSnMX9JevFbVXMwlEBSJ+CHJDN0\niWQgvT/4egYzW5b0yLJb03LTnPHVzMFgZvYVkLQf8BtgPeA9YEPgxVpoevbpqkWQdEtEfHtZoybW\nQpuhmbW7X5CMxPxARGwjaRdgRM41FcXBUJyT07/7LncvM7OlFkTEh5I6SeoUEQ9LOjfvoorh01WL\nEBFvp3dHN42cWDCC4ug8azOzqvWxpJVJZm67XtJFJDP8VT33MZRA0sSIGNRs3XNuSjKz5tKr3+eR\n/AD/Lslgetenc7pUNQdDESQdT3JksDHZgdFWAZ6MiCNyKczMakZ63dPhEXF93rW0xsFQBEndgdWA\nXwGnFWyanY6NY2YGQDqa6gnA+sCdwP3p8r8DkyJi/xzLK4qDoQSSNgYaI+ILSTsDWwPXFg6TYWYd\nm6Q7gI9Ihl7fjeRH5QrAyRExKc/aiuVgKEE6rHIDydXP95L8Gtg8IvbJsy4zqx6Sno+IrdL7dcAH\nQO+ImJ1vZcXzWUmlWRwRC4GDgAsj4sfAujnXZGbVZUHTnXRUhH/WUiiAr2Mo1QJJI4AjgW+l67rk\nWI+ZVZ8Bkj5N7wtYMV1uGkRv1fxKK46DoTTfJxk475cR8U9JfYA/5VyTmVWRiKjLu4ZyuY/BzMwy\nfMRQBI+VZGYdiY8YiiBp3Yh4u5bHVzczK5aDwczMMtyUVAJJs/lyU9InwHjgJxExo/2rMjOrLAdD\nac4H3gJuIDn17HBgHWAa8Edg59wqMzOrEDcllUDSMxGxXbN1T0fEUEmTI2JAXrWZmVWKr3wuzWJJ\n326aeEPStwu2OWHN7GvBRwwlkLQRcBEwLF31d+DHwJvA4Ih4Iq/azMwqxcFgZmYZbkoqgaRekm6T\n9J6kdyXdKqlX3nWZmVWSg6E0V5MMtb0eySQcd6XrzMy+NtyUVAJJkyJiYGvrzMxqmY8YSvOBpCMk\n1aW3I4Cqn9jbzKwUPmIogaTewCUkZyUF8BRwUkS8nmthZmYV5GAok6RTIuLCvOswM6sUB0OZJL0e\nEb3zrsPMrFLcx1A+5V2AmVklORjK50MuM/ta8eiqRVjGcNuQTvTdzuWYmX2l3MdgZmYZbkoyM7MM\nB4OZmWU4GMzMLMPBYGZmGQ4GMzPL+P/jbmu9lD9o+AAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1157b3c88>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn import tree\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import f1_score,recall_score,precision_score\n",
"from sklearn.metrics import classification_report\n",
"\n",
"import random\n",
"examples = examples_df.values.copy()\n",
"print(len(examples[0]))\n",
"#只取25个流统计特征\n",
"examples = np.c_[examples[:,:25].copy(),examples[:,-1].copy()]\n",
"#print(examples)\n",
"score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \\\n",
" columns = ['precision', 'recall', 'f1'])\n",
"#def a():\n",
"\n",
"\n",
"f1_score_list = list()\n",
"recall_score_list = list()\n",
"precision_score_list = list()\n",
"\n",
"class_list = [\"alipay\", \"bilibili\", \"douyin\", \"ele\", \"evernote\", \"gaode\", \"jd\", \"meituan\", \"weibo\", \"zhihu\"]\n",
"\n",
"\"\"\"\n",
"for i in range(1):\n",
" np.random.shuffle(examples)\n",
" examples_train = examples[:int(len(examples)*0.75)]\n",
" examples_test = examples[int(len(examples)*0.75):]\n",
" x_train = examples_train[:,0:-1]\n",
" y_train = examples_train[:,-1]\n",
" x_test = examples_test[:,0:-1]\n",
" y_test = examples_test[:,-1]\n",
" classifer = LogisticRegression()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
"score_df.loc['LogisticRegression'] = scores\n",
"\n",
"f1_score_list = list()\n",
"recall_score_list = list()\n",
"precision_score_list = list()\n",
"for i in range(1):\n",
" np.random.shuffle(examples)\n",
" examples_train = examples[:int(len(examples)*0.75)]\n",
" examples_test = examples[int(len(examples)*0.75):]\n",
" x_train = examples_train[:,0:-1]\n",
" y_train = examples_train[:,-1]\n",
" x_test = examples_test[:,0:-1]\n",
" y_test = examples_test[:,-1]\n",
" classifer = SVC()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
"score_df.loc['SVM'] = scores\n",
"\n",
"f1_score_list = list()\n",
"recall_score_list = list()\n",
"precision_score_list = list()\n",
"for i in range(1):\n",
" np.random.shuffle(examples)\n",
" examples_train = examples[:int(len(examples)*0.75)]\n",
" examples_test = examples[int(len(examples)*0.75):]\n",
" x_train = examples_train[:,0:-1]\n",
" y_train = examples_train[:,-1]\n",
" x_test = examples_test[:,0:-1]\n",
" y_test = examples_test[:,-1]\n",
" classifer = GaussianNB()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
"score_df.loc['GaussianNB'] = scores\n",
"\n",
"f1_score_list = list()\n",
"recall_score_list = list()\n",
"precision_score_list = list()\n",
"for i in range(1):\n",
" np.random.shuffle(examples)\n",
" examples_train = examples[:int(len(examples)*0.75)]\n",
" examples_test = examples[int(len(examples)*0.75):]\n",
" x_train = examples_train[:,0:-1]\n",
" y_train = examples_train[:,-1]\n",
" x_test = examples_test[:,0:-1]\n",
" y_test = examples_test[:,-1]\n",
" classifer = tree.DecisionTreeClassifier()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" f1_score_list.append(f1_score(y_test, y_pred, average='micro'))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average='micro'))\n",
" precision_score_list.append(precision_score(y_test, y_pred, average='micro'))\n",
"scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]\n",
"score_df.loc['tree'] = scores\n",
"\"\"\"\n",
"\n",
"\n",
"f1_score_list = list()\n",
"recall_score_list = list()\n",
"precision_score_list = list()\n",
"for i in range(10):\n",
" np.random.shuffle(examples)\n",
" examples_train = examples[:int(len(examples)*0.75)]\n",
" examples_test = examples[int(len(examples)*0.75):]\n",
" x_train = examples_train[:,0:-1]\n",
" y_train = examples_train[:,-1]\n",
" x_test = examples_test[:,0:-1]\n",
" y_test = examples_test[:,-1]\n",
" classifer = RandomForestClassifier()\n",
" classifer.fit(x_train, y_train)\n",
" y_pred = classifer.predict(x_test)\n",
" f1_score_list.append(f1_score(y_test, y_pred, average=None))\n",
" recall_score_list.append(recall_score(y_test, y_pred, average=None))\n",
" precision_score_list.append(precision_score(y_test, y_pred, average=None))\n",
" \n",
"scores = [np.mean(precision_score_list, axis=0), np.mean(recall_score_list, axis=0), np.mean(f1_score_list, axis=0)]\n",
"print(\"RandomForest: \")\n",
"for score in scores:\n",
" print(score)\n",
" \n",
"#score_df.loc['RandomForest'] = scores\n",
"#print(score_df)\n",
"#ax = score_df.plot.bar(title='statistics_feature')\n",
"#fig = ax.get_figure()\n",
"#fig.savefig('base_feature.svg')\n",
"#print(score_df)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.67 1.00 0.80 2\n",
" 1 0.00 0.00 0.00 1\n",
" 2 1.00 1.00 1.00 2\n",
"\n",
"avg / total 0.67 0.80 0.72 5\n",
"\n",
"[ 0.66666667 0. 1. ]\n",
"[ 1. 0. 1.]\n",
"[ 0.8 0. 1. ]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/Leo/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n",
"/Users/Leo/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n",
"/Users/Leo/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
},
{
"data": {
"text/plain": [
"array([ 3., 4., 5., 6.])"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import classification_report\n",
"y_test = [0, 1, 2, 2, 0]\n",
"y_pred = [0, 0, 2, 2, 0]\n",
"\n",
"y_test = [\"ali\", \"douyin\", \"zhifubao\", \"zhifubao\", \"ali\"]\n",
"y_pred = [\"ali\", \"ali\", \"zhifubao\", \"zhifubao\", \"ali\"]\n",
"target_names = ['0', '1', '2']\n",
"print(classification_report(y_test, y_pred, target_names=target_names))\n",
"\n",
"print(precision_score(y_test, y_pred, average=None))\n",
"print(recall_score(y_test, y_pred, average=None))\n",
"print(f1_score(y_test, y_pred, average=None))\n",
"\n",
"z = [[1,2,3,4],[5,6,7,8]]\n",
"\n",
"np.mean(z, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}