增加解析tls message信息
This commit is contained in:
@@ -1 +1,14 @@
|
||||
Wed Dec 4 15:20:57 2019, INFO, __write_access_log, 192.168.50.193 50754 121.51.142.32 443, dp3.qq.com, VBBaseCore 0.1.15 rv:1 (iPhone; iOS 12.4.1; zh_CN)
|
||||
Fri Dec 6 21:33:08 2019, INFO, __write_access_log, 192.168.50.193 59972 182.254.52.230 443, inews.gtimg.com, WeChat/7.0.8.32 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:33:08 2019, INFO, __write_access_log, 192.168.50.193 59972 182.254.52.230 443, inews.gtimg.com, WeChat/7.0.8.32 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:33:08 2019, INFO, __write_access_log, 192.168.50.193 59976 182.254.52.230 443, inews.gtimg.com, WeChat/7.0.8.32 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:33:08 2019, INFO, __write_access_log, 192.168.50.193 59977 182.254.52.230 443, inews.gtimg.com, WeChat/7.0.8.32 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:47:32 2019, INFO, __write_access_log, 192.168.50.193 60797 182.254.52.87 443, d.url.cn, QQ/8.1.8.429 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:47:32 2019, INFO, __write_access_log, 192.168.50.193 60805 182.254.52.87 80, dl.url.cn, QQ/8.1.8.429 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:47:32 2019, INFO, __write_access_log, 192.168.50.193 60807 182.254.52.100 80, 3gimg.qq.com, QQ/8.1.8.429 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:47:32 2019, INFO, __write_access_log, 192.168.50.193 60806 182.254.52.100 80, 3gimg.qq.com, QQ/8.1.8.429 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 21:54:20 2019, INFO, __write_access_log, 192.168.50.193 61250 123.125.84.232 443, search.video.iqiyi.com, QIYIVideo/10.11.0 (iOS;com.qiyi.iphone;iOS12.4.1;iPhone11,8) Corejar
|
||||
Fri Dec 6 21:54:20 2019, INFO, __write_access_log, 192.168.50.193 61260 49.7.32.101 80, opportunarch.iqiyi.com, QIYIVideo/10.11.0 (iOS;com.qiyi.iphone;iOS12.4.1;iPhone11,8) Corejar
|
||||
Fri Dec 6 21:54:20 2019, INFO, __write_access_log, 192.168.50.193 61262 116.211.198.208 80, ifacelog.iqiyi.com, QIYIVideo/10.11.0 (iOS;com.qiyi.iphone;iOS12.4.1;iPhone11,8) Corejar
|
||||
Fri Dec 6 22:20:30 2019, INFO, __write_access_log, 192.168.50.193 62499 182.18.17.109 80, p9-dy.byteimg.com, Aweme/89017 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 22:20:30 2019, INFO, __write_access_log, 192.168.50.193 62497 182.18.17.109 80, p9-dy.byteimg.com, Aweme/89017 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
Fri Dec 6 22:20:32 2019, INFO, __write_access_log, 192.168.50.193 62498 122.14.43.125 443, p3-dy.byteimg.com, Aweme/89017 CFNetwork/978.0.7 Darwin/18.7.0
|
||||
@@ -1,4 +1,4 @@
|
||||
add_library(stmstat SHARED src/sslstat_entry.cpp src/ssl_utils.cpp)
|
||||
add_library(stmstat SHARED src/stmstat_entry.cpp src/ssl_utils.cpp)
|
||||
target_include_directories(stmstat PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include)
|
||||
target_link_libraries(stmstat MESA_prof_load MESA_field_stat cjson)
|
||||
|
||||
|
||||
@@ -44,6 +44,42 @@ int g_stream_fail_count = 0;
|
||||
int g_log_succ_count = 0;
|
||||
int g_exceed_max_pkts_count = 0;
|
||||
|
||||
struct tls_message_type{
|
||||
int message_type;
|
||||
int content_type;
|
||||
int handshake_type;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
struct tls_message_type g_tls_types[] = {
|
||||
{0, 22, 0, "hello_request_RESERVED"},
|
||||
{1, 22, 1, "client_hello"},
|
||||
{2, 22, 2, "server_hello"},
|
||||
{3, 22, 3, "hello_verify_request_RESERVED"},
|
||||
{4, 22, 4, "new_session_ticket"},
|
||||
{5, 22, 5, "end_of_early_data"},
|
||||
{6, 22, 6, "hello_retry_request_RESERVED"},
|
||||
{7, 22, 8, "encrypted_extensions"},
|
||||
{8, 22, 11, "certificate"},
|
||||
{9, 22, 12, "server_key_exchange_RESERVED"},
|
||||
{10, 22, 13, "certificate_request"},
|
||||
{11, 22, 14, "server_hello_done_RESERVED"},
|
||||
{12, 22, 15, "certificate_verify"},
|
||||
{13, 22, 16, "client_key_exchange_RESERVED"},
|
||||
{14, 22, 20, "finished"},
|
||||
{15, 22, 21, "certificate_url_RESERVED"},
|
||||
{16, 22, 22, "certificate_status_RESERVED"},
|
||||
{17, 22, 23, "supplemental_data_RESERVED"},
|
||||
{18, 22, 24, "key_update"},
|
||||
{19, 22, 25, "compressed_certificate"},
|
||||
{20, 22, 254, "message_hash"},
|
||||
{21, 20, 0, "change_cipher_spec"},
|
||||
{22, 21, 0, "alert"},
|
||||
{23, 23, 0, "application_data"},
|
||||
{24, 24, 0, "heartbeat"},
|
||||
{25, 25, 0, "tls12_cid"},
|
||||
};
|
||||
|
||||
struct pkt_stat_info{
|
||||
struct timeval pkt_time;
|
||||
int bytes;
|
||||
@@ -65,6 +101,12 @@ struct pkt_parsed_info{
|
||||
uint16_t data_len;
|
||||
};
|
||||
|
||||
struct tls_message_info{
|
||||
int dir;
|
||||
int type;
|
||||
int length;
|
||||
};
|
||||
|
||||
struct pme_info{
|
||||
int _errno;
|
||||
char sip[INET_ADDRSTRLEN];
|
||||
@@ -83,6 +125,8 @@ struct pme_info{
|
||||
int last_s2c_pkt_index;
|
||||
struct pkt_stat_info pkt_info_list[STREAM_PACKET_COUNT_MAX];
|
||||
struct ssl_chello chello;
|
||||
int tls_message_count;
|
||||
struct tls_message_info tls_info_list[STREAM_PACKET_COUNT_MAX];
|
||||
};
|
||||
|
||||
int ipv4_header_parse(const void *a_packet, struct pkt_parsed_info* pktinfo){
|
||||
@@ -108,7 +152,7 @@ int ipv4_header_parse(const void *a_packet, struct pkt_parsed_info* pktinfo){
|
||||
|
||||
int packet_stat(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info* pktinfo){
|
||||
if(pmeinfo->total_pkts == STREAM_PACKET_COUNT_MAX){
|
||||
printf("packet nums > STREAM_PACKET_COUNT_MAX\n");
|
||||
LOG_INFO(g_logger, "packet nums > STREAM_PACKET_COUNT_MAX(%d)\n", STREAM_PACKET_COUNT_MAX);
|
||||
g_exceed_max_pkts_count++;
|
||||
return -1;
|
||||
}
|
||||
@@ -143,12 +187,59 @@ int chello_packet_parse(struct pme_info *pmeinfo, struct pkt_parsed_info *pktinf
|
||||
int len = pktinfo->data_len;
|
||||
ssl_chello_parse(&(pmeinfo->chello), (const unsigned char*)buff, len, &chello_status);
|
||||
if(chello_status != CHELLO_PARSE_SUCCESS){
|
||||
printf("Error: chello parse failed\n");
|
||||
LOG_ERROR(g_logger, "Error: chello parse failed\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_tls_message_type(int content_type, int handshake_type){
|
||||
int type_count = sizeof(g_tls_types) / sizeof(struct tls_message_type);
|
||||
for(int i = 0; i < type_count; i++){
|
||||
if(g_tls_types[i].content_type == content_type && g_tls_types[i].handshake_type == handshake_type){
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int tls_header_parse(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo){
|
||||
unsigned char *buff = (unsigned char*)pktinfo->data;
|
||||
int len = pktinfo->data_len;
|
||||
int i = 0;
|
||||
while(i < len){
|
||||
if(i + 4 >= len){
|
||||
return -1;
|
||||
}
|
||||
int content_type = buff[i];
|
||||
int handshake_type = 0;
|
||||
if(buff[i] == 0x16){
|
||||
if(i + 5 >= len){
|
||||
return -1;
|
||||
}
|
||||
handshake_type = buff[i + 5];
|
||||
}
|
||||
int message_type = get_tls_message_type(content_type, handshake_type);
|
||||
if(message_type < 0){
|
||||
return -1;
|
||||
}
|
||||
int version = (uint16_t)(buff[i + 1] << 8) + (uint8_t)buff[i + 2];
|
||||
if(version < 0x0300 || version > 0x0304){
|
||||
return -1;
|
||||
}
|
||||
int len = (uint16_t)(buff[i + 3] << 8) + (uint8_t)buff[i + 4];
|
||||
if(len < 0){
|
||||
printf("%02hhx %02hhx\n", buff[i + 3], buff[i + 4]);
|
||||
}
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].dir = stream->curdir;
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].type = message_type;
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].length = len;
|
||||
pmeinfo->tls_message_count++;
|
||||
i += (5 + len);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int packet_need_filter(struct pkt_parsed_info *pktinfo){
|
||||
struct iphdr *_iphdr = pktinfo->iphdr.v4;
|
||||
int ttl = _iphdr->ttl;
|
||||
@@ -168,6 +259,7 @@ char pending_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct
|
||||
pmeinfo->last_c2s_pkt_index = -1;
|
||||
pmeinfo->last_s2c_pkt_index = -1;
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->start_time));
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
struct stream_tuple4_v4 *tuple4 = stream->addr.tuple4_v4;
|
||||
inet_ntop(AF_INET, &(tuple4->saddr), pmeinfo->sip, INET_ADDRSTRLEN);
|
||||
inet_ntop(AF_INET, &(tuple4->daddr), pmeinfo->dip, INET_ADDRSTRLEN);
|
||||
@@ -181,12 +273,15 @@ char pending_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct
|
||||
pmeinfo->_errno = -1;
|
||||
return APP_STATE_FAWPKT | APP_STATE_DROPME;
|
||||
}
|
||||
tls_header_parse(stream, pmeinfo, pktinfo);
|
||||
packet_stat(stream, pmeinfo, pktinfo);
|
||||
return APP_STATE_FAWPKT | APP_STATE_GIVEME;
|
||||
}
|
||||
|
||||
char data_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo){
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
if(packet_need_filter(pktinfo) == 0){
|
||||
tls_header_parse(stream, pmeinfo, pktinfo);
|
||||
int ret = packet_stat(stream, pmeinfo, pktinfo);
|
||||
if(ret == -1){
|
||||
return APP_STATE_FAWPKT | APP_STATE_DROPME;
|
||||
@@ -195,6 +290,18 @@ char data_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pk
|
||||
return APP_STATE_FAWPKT | APP_STATE_GIVEME;
|
||||
}
|
||||
|
||||
void time_tostring(struct timeval tv, char *buf, int buflen){
|
||||
char tmbuf[64];
|
||||
time_t nowtime;
|
||||
struct tm *nowtm;
|
||||
nowtime = tv.tv_sec;
|
||||
//printf("nowtime = %lld\n", nowtime);
|
||||
nowtm = localtime(&nowtime);
|
||||
strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
|
||||
snprintf(buf, buflen, "%s.%06ld", tmbuf, tv.tv_usec);
|
||||
return;
|
||||
}
|
||||
|
||||
void output_result(struct pme_info *pmeinfo){
|
||||
cJSON *log_obj = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(log_obj, "sip", pmeinfo->sip);
|
||||
@@ -207,6 +314,12 @@ void output_result(struct pme_info *pmeinfo){
|
||||
cJSON_AddNumberToObject(log_obj, "c2s_pkts", pmeinfo->c2s_pkts);
|
||||
cJSON_AddNumberToObject(log_obj, "s2c_pkts", pmeinfo->s2c_pkts);
|
||||
cJSON_AddNumberToObject(log_obj, "total_pkts", pmeinfo->total_pkts);
|
||||
char time_str[64] = {};
|
||||
time_tostring(pmeinfo->start_time, time_str, sizeof(time_str));
|
||||
cJSON_AddStringToObject(log_obj, "start_time", time_str);
|
||||
time_tostring(pmeinfo->end_time, time_str, sizeof(time_str));
|
||||
cJSON_AddStringToObject(log_obj, "end_time", time_str);
|
||||
pmeinfo->duration = (pmeinfo->end_time.tv_sec - pmeinfo->start_time.tv_sec) * 1000 + (pmeinfo->end_time.tv_usec - pmeinfo->start_time.tv_usec) / 1000;
|
||||
cJSON_AddNumberToObject(log_obj, "duration", pmeinfo->duration);
|
||||
cJSON *pkt_info_list = cJSON_CreateArray();
|
||||
for(int i = 0; i < pmeinfo->total_pkts; i++){
|
||||
@@ -214,6 +327,8 @@ void output_result(struct pme_info *pmeinfo){
|
||||
cJSON_AddNumberToObject(pkt_info, "bytes", pmeinfo->pkt_info_list[i].bytes);
|
||||
cJSON_AddNumberToObject(pkt_info, "dir", pmeinfo->pkt_info_list[i].dir);
|
||||
cJSON_AddNumberToObject(pkt_info, "interval", pmeinfo->pkt_info_list[i].interval);
|
||||
time_tostring(pmeinfo->pkt_info_list[i].pkt_time, time_str, sizeof(time_str));
|
||||
cJSON_AddStringToObject(pkt_info, "pkt_time", time_str);
|
||||
cJSON_AddItemToArray(pkt_info_list, pkt_info);
|
||||
}
|
||||
cJSON_AddItemToObject(log_obj, "packets", pkt_info_list);
|
||||
@@ -234,6 +349,17 @@ void output_result(struct pme_info *pmeinfo){
|
||||
}
|
||||
cJSON_AddItemToObject(tls_info, "extensions_list", extensions_list);
|
||||
cJSON_AddItemToObject(log_obj, "tls", tls_info);
|
||||
|
||||
cJSON *tls_message_list = cJSON_CreateArray();
|
||||
cJSON_AddItemToObject(tls_info, "tls_message_list", tls_message_list);
|
||||
for(int i = 0; i < pmeinfo->tls_message_count; i++){
|
||||
cJSON *tls_message = cJSON_CreateObject();
|
||||
cJSON_AddNumberToObject(tls_message, "dir", pmeinfo->tls_info_list[i].dir);
|
||||
cJSON_AddNumberToObject(tls_message, "type", pmeinfo->tls_info_list[i].type);
|
||||
cJSON_AddNumberToObject(tls_message, "length", pmeinfo->tls_info_list[i].length);
|
||||
cJSON_AddItemToArray(tls_message_list, tls_message);
|
||||
}
|
||||
|
||||
char *log_msg = cJSON_PrintUnformatted(log_obj);
|
||||
//printf("%s\n\n", log_msg);
|
||||
LOG_INFO(g_logger, log_msg);
|
||||
@@ -246,12 +372,12 @@ void output_result(struct pme_info *pmeinfo){
|
||||
|
||||
char close_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo, const void *a_packet){
|
||||
if(a_packet != NULL){
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
if(packet_need_filter(pktinfo) == 0){
|
||||
tls_header_parse(stream, pmeinfo, pktinfo);
|
||||
packet_stat(stream, pmeinfo, pktinfo);
|
||||
}
|
||||
}
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
pmeinfo->duration = (pmeinfo->end_time.tv_sec - pmeinfo->start_time.tv_sec) * 1000 + (pmeinfo->end_time.tv_usec - pmeinfo->start_time.tv_usec) / 1000;
|
||||
return APP_STATE_FAWPKT | APP_STATE_DROPME;
|
||||
}
|
||||
|
||||
@@ -262,8 +388,8 @@ void pme_info_destroy(struct pme_info *pmeinfo){
|
||||
|
||||
extern "C" char stmstat_entry(struct streaminfo *stream, void** pme, int thread_seq, const void* a_packet){
|
||||
if(g_count % 10 == 5){
|
||||
printf("handle %d packets\n", g_count);
|
||||
printf("stream_count: %d\nsucc_count: %d\nfail_count: %d\ng_log_succ_count: %d, g_exceed_max_pkts_count: %d\n",
|
||||
LOG_DEBUG(g_logger, "handle %d packets\n", g_count);
|
||||
LOG_DEBUG(g_logger, "stream_count: %d\nsucc_count: %d\nfail_count: %d\ng_log_succ_count: %d, g_exceed_max_pkts_count: %d\n",
|
||||
g_stream_count, g_stream_succ_count, g_stream_fail_count, g_log_succ_count, g_exceed_max_pkts_count);
|
||||
}
|
||||
g_count++;
|
||||
|
||||
1772
DataSet/result/2019-12-06-0/examples.csv
Normal file
1772
DataSet/result/2019-12-06-0/examples.csv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
77
Experiment/expertFeature/expertCsvBuild.py
Normal file
77
Experiment/expertFeature/expertCsvBuild.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
def label_dict_build(date):
|
||||
example_label_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_tag.txt'
|
||||
example_label_df = pd.read_table(example_label_file, sep='\s+', header=None)
|
||||
example_label_df[3] = 443
|
||||
example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}
|
||||
return example_label
|
||||
|
||||
app_cert = dict()
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_label = label_dict_build(date)
|
||||
#print(example_label)
|
||||
row_count = 1771
|
||||
cloumn_count = 25
|
||||
example_json_file = PREFIX_DIR + 'DataSet/result/' + date + '/ssl_stat.txt'
|
||||
example_json_f = open(example_json_file, 'r')
|
||||
array_shape = (row_count, cloumn_count)
|
||||
result_data = np.zeros(array_shape)
|
||||
result_label = list()
|
||||
i = 0
|
||||
for line in example_json_f.readlines():
|
||||
example_json = json.loads(line)
|
||||
#标签
|
||||
try:
|
||||
flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])
|
||||
label = example_label[flow_key]
|
||||
except Exception:
|
||||
#traceback.print_exc()
|
||||
continue
|
||||
#专家特征
|
||||
result_label.append(label)
|
||||
san_count = 0
|
||||
if 'san' in example_json.keys():
|
||||
san = example_json['san']
|
||||
san_count = len(san.split(';'))
|
||||
cert_count = example_json['Cert']['cert_count']
|
||||
'''
|
||||
cert_len_str = ''
|
||||
for cert in example_json['Cert']['cert_list']:
|
||||
cert_len_str += (str(cert['length']) + ',')
|
||||
if label not in app_cert.keys():
|
||||
app_cert[label] = set()
|
||||
app_cert[label].add(cert_len_str)
|
||||
'''
|
||||
if label not in app_cert.keys():
|
||||
app_cert[label] = set()
|
||||
app_cert[label].add(san_count)
|
||||
#result_data[i,:] = result
|
||||
i += 1
|
||||
|
||||
print(i)
|
||||
for k, v in app_cert.items():
|
||||
print(k)
|
||||
print(v)
|
||||
|
||||
'''
|
||||
print('row = ' + str(row_count))
|
||||
print("result_label = " + str(len(result_label)))
|
||||
base_head = ['cert_count', 'cert_len', 'san_len', 's2c_pkts']
|
||||
header = base_head
|
||||
result_df = pd.DataFrame(result_data, columns=header)
|
||||
result_df['label'] = np.array(result_label)
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
result_df.to_csv(example_csv_file, index=False)
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
1772
Experiment/statFeature/csvFile/2019-12-06-0/examples.csv
Normal file
1772
Experiment/statFeature/csvFile/2019-12-06-0/examples.csv
Normal file
File diff suppressed because it is too large
Load Diff
51
Experiment/statFeature/mlAlgo.py
Normal file
51
Experiment/statFeature/mlAlgo.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn import tree
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.metrics import f1_score,recall_score,precision_score
|
||||
import random
|
||||
import matplotlib.pyplot as plt
|
||||
%matplotlib inline
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
|
||||
def RF():
|
||||
classifer = RandomForestClassifier()
|
||||
classifer.fit(x_train, y_train)
|
||||
y_pred = classifer.predict(x_test)
|
||||
f1_score_list.append(f1_score(y_test, y_pred, average='micro'))
|
||||
recall_score_list.append(recall_score(y_test, y_pred, average='micro'))
|
||||
precision_score_list.append(precision_score(y_test, y_pred, average='micro'))
|
||||
scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]
|
||||
score_df.loc['RandomForest'] = scores
|
||||
score_df.plot.bar()
|
||||
print(scores)
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
examples_df = pd.read_csv(example_csv_file)
|
||||
class_counts = examples_df['label'].value_counts().plot.bar()
|
||||
examples = examples_df.values.copy()
|
||||
score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \
|
||||
columns = ['precision', 'recall', 'f1'])
|
||||
f1_score_list = list()
|
||||
recall_score_list = list()
|
||||
precision_score_list = list()
|
||||
for i in range(50):
|
||||
np.random.shuffle(examples)
|
||||
examples_train = examples[:int(len(examples)*0.75)]
|
||||
examples_test = examples[int(len(examples)*0.75):]
|
||||
x_train = examples_train[:,0:-1]
|
||||
y_train = examples_train[:,-1]
|
||||
x_test = examples_test[:,0:-1]
|
||||
y_test = examples_test[:,-1]
|
||||
RF(score_df, f1_score_list, recall_score_list, precision_score_list, \
|
||||
x_train, y_train, x_test, y_test)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
124
Experiment/statFeature/statCsvBuild.py
Normal file
124
Experiment/statFeature/statCsvBuild.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
def label_dict_build(date):
|
||||
example_label_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_tag.txt'
|
||||
example_label_df = pd.read_table(example_label_file, sep='\s+', header=None)
|
||||
example_label_df[3] = 443
|
||||
example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}
|
||||
return example_label
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_label = label_dict_build(date)
|
||||
row_count = 1771
|
||||
cloumn_count = 25
|
||||
example_json_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_feature.txt'
|
||||
example_json_f = open(example_json_file, 'r')
|
||||
array_shape = (row_count, cloumn_count)
|
||||
result_data = np.zeros(array_shape)
|
||||
result_label = list()
|
||||
i = 0
|
||||
for line in example_json_f.readlines():
|
||||
example_json = json.loads(line)
|
||||
#标签
|
||||
try:
|
||||
flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])
|
||||
result_label.append(example_label[flow_key])
|
||||
except Exception:
|
||||
continue
|
||||
#统计特征
|
||||
packets = example_json['packets']
|
||||
c2s_packets_bytes = list()
|
||||
s2c_packets_bytes = list()
|
||||
c2s_packets_intervals = list()
|
||||
s2c_packets_intervals = list()
|
||||
for packet in packets:
|
||||
if packet['dir'] == 1:
|
||||
c2s_packets_bytes.append(packet['bytes'])
|
||||
c2s_packets_intervals.append(packet['interval'])
|
||||
elif packet['dir'] == 2:
|
||||
s2c_packets_bytes.append(packet['bytes'])
|
||||
s2c_packets_intervals.append(packet['interval'])
|
||||
c2s_bytes = example_json['c2s_bytes']
|
||||
s2c_bytes = example_json['s2c_bytes']
|
||||
c2s_pkts = example_json['c2s_pkts']
|
||||
s2c_pkts = example_json['s2c_pkts']
|
||||
duration = example_json['duration']
|
||||
c2s_packets_bytes_mean = 0
|
||||
c2s_packets_bytes_median = 0
|
||||
c2s_packets_bytes_std = 0
|
||||
c2s_packets_bytes_max = 0
|
||||
c2s_packets_bytes_min = 0
|
||||
|
||||
c2s_packets_intervals_mean = 0
|
||||
c2s_packets_intervals_median = 0
|
||||
c2s_packets_intervals_std = 0
|
||||
c2s_packets_intervals_max = 0
|
||||
c2s_packets_intervals_min = 0
|
||||
|
||||
s2c_packets_bytes_mean = 0
|
||||
s2c_packets_bytes_median = 0
|
||||
s2c_packets_bytes_std = 0
|
||||
s2c_packets_bytes_max = 0
|
||||
s2c_packets_bytes_min = 0
|
||||
|
||||
s2c_packets_intervals_mean = 0
|
||||
s2c_packets_intervals_median = 0
|
||||
s2c_packets_intervals_std = 0
|
||||
s2c_packets_intervals_max = 0
|
||||
s2c_packets_intervals_min = 0
|
||||
|
||||
if c2s_bytes > 0:
|
||||
c2s_packets_bytes_mean = np.mean(c2s_packets_bytes)
|
||||
c2s_packets_bytes_median = np.median(c2s_packets_bytes)
|
||||
c2s_packets_bytes_std = np.std(c2s_packets_bytes)
|
||||
c2s_packets_bytes_max = np.max(c2s_packets_bytes)
|
||||
c2s_packets_bytes_min = np.min(c2s_packets_bytes)
|
||||
|
||||
c2s_packets_intervals_mean = np.mean(c2s_packets_intervals)
|
||||
c2s_packets_intervals_median = np.median(c2s_packets_intervals)
|
||||
c2s_packets_intervals_std = np.std(c2s_packets_intervals)
|
||||
c2s_packets_intervals_max = np.max(c2s_packets_intervals)
|
||||
c2s_packets_intervals_min = np.min(c2s_packets_intervals)
|
||||
|
||||
if s2c_bytes > 0:
|
||||
s2c_packets_bytes_mean = np.mean(s2c_packets_bytes)
|
||||
s2c_packets_bytes_median = np.median(s2c_packets_bytes)
|
||||
s2c_packets_bytes_std = np.std(s2c_packets_bytes)
|
||||
s2c_packets_bytes_max = np.max(s2c_packets_bytes)
|
||||
s2c_packets_bytes_min = np.min(s2c_packets_bytes)
|
||||
|
||||
s2c_packets_intervals_mean = np.mean(s2c_packets_intervals)
|
||||
s2c_packets_intervals_median = np.median(s2c_packets_intervals)
|
||||
s2c_packets_intervals_std = np.std(s2c_packets_intervals)
|
||||
s2c_packets_intervals_max = np.max(s2c_packets_intervals)
|
||||
s2c_packets_intervals_min = np.min(s2c_packets_intervals)
|
||||
result = [c2s_bytes, c2s_pkts, s2c_bytes, s2c_pkts, duration, c2s_packets_bytes_mean, c2s_packets_bytes_median, c2s_packets_bytes_std,\
|
||||
c2s_packets_bytes_max, c2s_packets_bytes_min, c2s_packets_intervals_mean, c2s_packets_intervals_median, c2s_packets_intervals_std,\
|
||||
c2s_packets_intervals_max, c2s_packets_intervals_min, s2c_packets_bytes_mean, s2c_packets_bytes_median, s2c_packets_bytes_std,\
|
||||
s2c_packets_bytes_max, s2c_packets_bytes_min, s2c_packets_intervals_mean, s2c_packets_intervals_median, s2c_packets_intervals_std,\
|
||||
s2c_packets_intervals_max, s2c_packets_intervals_min]
|
||||
result_data[i,:] = result
|
||||
i += 1
|
||||
print('row = ' + str(row_count))
|
||||
print("result_label = " + str(len(result_label)))
|
||||
base_head = ['c2s_bytes', 'c2s_pkts', 's2c_bytes', 's2c_pkts', 'duration', 'c2s_packets_bytes_mean', 'c2s_packets_bytes_median', 'c2s_packets_bytes_std',\
|
||||
'c2s_packets_bytes_max', 'c2s_packets_bytes_min', 'c2s_packets_intervals_mean', 'c2s_packets_intervals_median', 'c2s_packets_intervals_std',\
|
||||
'c2s_packets_intervals_max', 'c2s_packets_intervals_min', 's2c_packets_bytes_mean', 's2c_packets_bytes_median', 's2c_packets_bytes_std',\
|
||||
's2c_packets_bytes_max', 's2c_packets_bytes_min', 's2c_packets_intervals_mean', 's2c_packets_intervals_median', 's2c_packets_intervals_std',\
|
||||
's2c_packets_intervals_max', 's2c_packets_intervals_min']
|
||||
header = base_head
|
||||
result_df = pd.DataFrame(result_data, columns=header)
|
||||
result_df['label'] = np.array(result_label)
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
result_df.to_csv(example_csv_file, index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user