增加中期实验数据,代码,ppt
This commit is contained in:
@@ -99,6 +99,7 @@ def main():
|
||||
stream = li[3]
|
||||
host = li[4]
|
||||
if(stream.split(' ')[4] != '443'):
|
||||
traceback.print_exc()
|
||||
continue
|
||||
ua = ""
|
||||
for index in range(5, len(li), 1):
|
||||
|
||||
@@ -4,11 +4,15 @@ import traceback
|
||||
|
||||
filenameList = [
|
||||
#"http.log.test",
|
||||
"./log/2019-12-06/http.log.2019-12-06-0",
|
||||
"./log/2019-12-04/http2.log.2019-12-06-0",
|
||||
"./log/2019-12-20_21/http.log.2019-12-20",
|
||||
"./log/2019-12-20_21/http2.log.2019-12-20",
|
||||
"./log/2019-12-20_21/http.log.2019-12-21",
|
||||
"./log/2019-12-20_21/http2.log.2019-12-21",
|
||||
]
|
||||
|
||||
outputFile = "./result.txt"
|
||||
|
||||
'''
|
||||
appDict = {
|
||||
"wechat" : ["wechat", "MicroMessenger Client", "MicroMessenger"],
|
||||
"qq" : ["qq", "TencentMidasConnect"],
|
||||
@@ -34,7 +38,15 @@ appDict = {
|
||||
"safari" : ["Version/12.1.2", "MobileSafari"],
|
||||
"firefox" : ["FxiOS"],
|
||||
}
|
||||
'''
|
||||
|
||||
appDict = {
|
||||
"douyin" : ["Aweme", "ttplayer"],
|
||||
"weibo" : ["weibo", "微博", "afma-sdk-onShow-v", "SensorsAnalytics"],
|
||||
"toutiao" : ["News", "今日头条"],
|
||||
"hupu" : ["hupu", "prokanqiu", "虎扑", "AVMDL"],
|
||||
"zhihu": ["osee2unifiedRelease",]
|
||||
}
|
||||
|
||||
def getAppName(ua):
|
||||
for name, ids in appDict.items():
|
||||
@@ -74,6 +86,7 @@ filterUaList = {
|
||||
"swcd",
|
||||
"null",
|
||||
"SafariSafeBrowsing",
|
||||
"CriOS"
|
||||
}
|
||||
|
||||
def handleUnknownApp(host, stream, ua):
|
||||
@@ -91,16 +104,19 @@ def main():
|
||||
stm2app_dict = dict()
|
||||
with open(outputFile, "w+") as f1:
|
||||
for filename in filenameList:
|
||||
with open(filename) as f:
|
||||
with open(filename, errors='ignore') as f:
|
||||
logs = f.readlines()
|
||||
for log in logs:
|
||||
try:
|
||||
li = log.split(',')
|
||||
stream = li[3]
|
||||
host = li[4]
|
||||
if(stream.split(' ')[4] != '443'):
|
||||
try:
|
||||
if(stream.split(' ')[4] != '443'):
|
||||
continue
|
||||
ua = ""
|
||||
except:
|
||||
continue
|
||||
ua = ""
|
||||
for index in range(5, len(li), 1):
|
||||
ua += li[index]
|
||||
host = host.strip()
|
||||
@@ -108,14 +124,22 @@ def main():
|
||||
ua = ua.strip()
|
||||
appName = getAppName(ua)
|
||||
if appName != None:
|
||||
stm2app_dict[stream] = appName
|
||||
if stream not in stm2app_dict.keys():
|
||||
stm2app_dict[stream] = set()
|
||||
stm2app_dict[stream].add(appName)
|
||||
else:
|
||||
handleUnknownApp(host, stream, ua)
|
||||
except:
|
||||
print("log: " + log)
|
||||
traceback.print_exc()
|
||||
for stream, app in stm2app_dict.items():
|
||||
f1.write(stream + ": " + app + "\n")
|
||||
for stream, apps in stm2app_dict.items():
|
||||
if len(apps) > 1:
|
||||
continue
|
||||
f1.write(stream + " ")
|
||||
for app in apps:
|
||||
f1.write(app + " ")
|
||||
f1.write("\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
43978
DataSet/DataTag/log/2019-12-20_21/http.log.2019-12-20
Normal file
43978
DataSet/DataTag/log/2019-12-20_21/http.log.2019-12-20
Normal file
File diff suppressed because it is too large
Load Diff
25857
DataSet/DataTag/log/2019-12-20_21/http.log.2019-12-21
Normal file
25857
DataSet/DataTag/log/2019-12-20_21/http.log.2019-12-21
Normal file
File diff suppressed because it is too large
Load Diff
14058
DataSet/DataTag/log/2019-12-20_21/http2.log.2019-12-20
Normal file
14058
DataSet/DataTag/log/2019-12-20_21/http2.log.2019-12-20
Normal file
File diff suppressed because it is too large
Load Diff
23127
DataSet/DataTag/log/2019-12-20_21/http2.log.2019-12-21
Normal file
23127
DataSet/DataTag/log/2019-12-20_21/http2.log.2019-12-21
Normal file
File diff suppressed because it is too large
Load Diff
10540
DataSet/DataTag/result.txt
Normal file
10540
DataSet/DataTag/result.txt
Normal file
File diff suppressed because it is too large
Load Diff
16
DataSet/DataTag/test.py
Normal file
16
DataSet/DataTag/test.py
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
|
||||
|
||||
filename = "./log/2019-12-20_21/http2.log.2019-12-21"
|
||||
|
||||
|
||||
with open(filename) as f:
|
||||
lines = f.readlines()
|
||||
print(len(lines))
|
||||
@@ -122,7 +122,7 @@ extern "C" unsigned char sslstat_entry(stSessionInfo *session_info, void **param
|
||||
|
||||
|
||||
extern "C" int sslstat_init(){
|
||||
g_fp = fopen("./ssl_stat.txt", "w+");
|
||||
g_fp = fopen("./ssl_stat.txt", "a+");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -78,6 +78,7 @@ struct tls_message_type g_tls_types[] = {
|
||||
{23, 23, 0, "application_data"},
|
||||
{24, 24, 0, "heartbeat"},
|
||||
{25, 25, 0, "tls12_cid"},
|
||||
{26, 22, -1, "handshake_unknown"},
|
||||
};
|
||||
|
||||
struct pkt_stat_info{
|
||||
@@ -127,6 +128,13 @@ struct pme_info{
|
||||
struct ssl_chello chello;
|
||||
int tls_message_count;
|
||||
struct tls_message_info tls_info_list[STREAM_PACKET_COUNT_MAX];
|
||||
unsigned char c2s_tls_payload[1500];
|
||||
int c2s_tls_last_segment_len;
|
||||
int c2s_tls_current_segment_offset;
|
||||
unsigned char s2c_tls_payload[1500];
|
||||
int s2c_tls_last_segment_len;
|
||||
int s2c_tls_current_segment_offset;
|
||||
int has_fin_rst;
|
||||
};
|
||||
|
||||
int ipv4_header_parse(const void *a_packet, struct pkt_parsed_info* pktinfo){
|
||||
@@ -200,43 +208,103 @@ int get_tls_message_type(int content_type, int handshake_type){
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if(content_type == 22){
|
||||
return type_count - 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int tls_header_parse(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo){
|
||||
unsigned char *buff = (unsigned char*)pktinfo->data;
|
||||
int len = pktinfo->data_len;
|
||||
int curdir = stream->curdir;
|
||||
unsigned char *buff = NULL;
|
||||
int len = 0;
|
||||
if(curdir == 1){
|
||||
if(pmeinfo->c2s_tls_current_segment_offset >= pktinfo->data_len){
|
||||
pmeinfo->c2s_tls_current_segment_offset -= pktinfo->data_len;
|
||||
return 0;
|
||||
}
|
||||
memcpy((char*)pmeinfo->c2s_tls_payload + pmeinfo->c2s_tls_last_segment_len,
|
||||
pktinfo->data + pmeinfo->c2s_tls_current_segment_offset, pktinfo->data_len - pmeinfo->c2s_tls_current_segment_offset);
|
||||
buff = pmeinfo->c2s_tls_payload;
|
||||
len = pktinfo->data_len + pmeinfo->c2s_tls_last_segment_len - pmeinfo->c2s_tls_current_segment_offset;
|
||||
}
|
||||
if(curdir == 2){
|
||||
if(pmeinfo->s2c_tls_current_segment_offset >= pktinfo->data_len){
|
||||
pmeinfo->s2c_tls_current_segment_offset -= pktinfo->data_len;
|
||||
return 0;
|
||||
}
|
||||
memcpy((char*)pmeinfo->s2c_tls_payload + pmeinfo->s2c_tls_last_segment_len,
|
||||
pktinfo->data + pmeinfo->s2c_tls_current_segment_offset, pktinfo->data_len - pmeinfo->s2c_tls_current_segment_offset);
|
||||
buff = pmeinfo->s2c_tls_payload;
|
||||
len = pktinfo->data_len + pmeinfo->s2c_tls_last_segment_len - pmeinfo->s2c_tls_current_segment_offset;
|
||||
}
|
||||
int i = 0;
|
||||
int flag = 0;
|
||||
while(i < len){
|
||||
if(i + 4 >= len){
|
||||
return -1;
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
int content_type = buff[i];
|
||||
int handshake_type = 0;
|
||||
if(buff[i] == 0x16){
|
||||
if(i + 5 >= len){
|
||||
return -1;
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
handshake_type = buff[i + 5];
|
||||
}
|
||||
int message_type = get_tls_message_type(content_type, handshake_type);
|
||||
if(message_type < 0){
|
||||
return -1;
|
||||
LOG_ERROR(g_logger, "message_type unknown, value = %02x %02x %02x %02x %02x\n", buff[i], buff[i + 1], buff[i + 2], buff[i + 3], buff[i + 4]);
|
||||
flag = 2;
|
||||
break;
|
||||
}
|
||||
int version = (uint16_t)(buff[i + 1] << 8) + (uint8_t)buff[i + 2];
|
||||
if(version < 0x0300 || version > 0x0304){
|
||||
return -1;
|
||||
LOG_ERROR(g_logger, "version unknown, value = %02x %02x\n", buff[i + 1], buff[i + 2]);
|
||||
flag = 2;
|
||||
break;
|
||||
}
|
||||
int len = (uint16_t)(buff[i + 3] << 8) + (uint8_t)buff[i + 4];
|
||||
if(len < 0){
|
||||
printf("%02hhx %02hhx\n", buff[i + 3], buff[i + 4]);
|
||||
}
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].dir = stream->curdir;
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].type = message_type;
|
||||
pmeinfo->tls_info_list[pmeinfo->tls_message_count].length = len;
|
||||
pmeinfo->tls_message_count++;
|
||||
i += (5 + len);
|
||||
}
|
||||
if(flag == 1){
|
||||
if(curdir == 1){
|
||||
memcpy((char*)pmeinfo->c2s_tls_payload, pktinfo->data, len - i);
|
||||
pmeinfo->c2s_tls_last_segment_len = len - i;
|
||||
pmeinfo->c2s_tls_current_segment_offset = 0;
|
||||
}
|
||||
if(curdir == 2){
|
||||
memcpy((char*)pmeinfo->s2c_tls_payload, pktinfo->data, len - i);
|
||||
pmeinfo->s2c_tls_last_segment_len = len - i;
|
||||
pmeinfo->s2c_tls_current_segment_offset = 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if(flag == 2){
|
||||
if(curdir == 1){
|
||||
pmeinfo->c2s_tls_last_segment_len = 0;
|
||||
pmeinfo->c2s_tls_current_segment_offset = 0;
|
||||
}
|
||||
if(curdir == 2){
|
||||
pmeinfo->s2c_tls_last_segment_len = 0;
|
||||
pmeinfo->s2c_tls_current_segment_offset = 0;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
if(curdir == 1){
|
||||
pmeinfo->c2s_tls_last_segment_len = 0;
|
||||
pmeinfo->c2s_tls_current_segment_offset = i - len;
|
||||
}
|
||||
if(curdir == 2){
|
||||
pmeinfo->s2c_tls_last_segment_len = 0;
|
||||
pmeinfo->s2c_tls_current_segment_offset = i - len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -256,6 +324,10 @@ int packet_need_filter(struct pkt_parsed_info *pktinfo){
|
||||
}
|
||||
|
||||
char pending_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo){
|
||||
struct tcphdr *_tcphdr = pktinfo->tcphdr;
|
||||
if(_tcphdr->fin || _tcphdr->rst){
|
||||
pmeinfo->has_fin_rst = 1;
|
||||
}
|
||||
pmeinfo->last_c2s_pkt_index = -1;
|
||||
pmeinfo->last_s2c_pkt_index = -1;
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->start_time));
|
||||
@@ -280,6 +352,10 @@ char pending_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct
|
||||
|
||||
char data_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo){
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
struct tcphdr *_tcphdr = pktinfo->tcphdr;
|
||||
if(_tcphdr->fin || _tcphdr->rst){
|
||||
pmeinfo->has_fin_rst = 1;
|
||||
}
|
||||
if(packet_need_filter(pktinfo) == 0){
|
||||
tls_header_parse(stream, pmeinfo, pktinfo);
|
||||
int ret = packet_stat(stream, pmeinfo, pktinfo);
|
||||
@@ -303,6 +379,9 @@ void time_tostring(struct timeval tv, char *buf, int buflen){
|
||||
}
|
||||
|
||||
void output_result(struct pme_info *pmeinfo){
|
||||
if(pmeinfo->has_fin_rst == 0){
|
||||
return;
|
||||
}
|
||||
cJSON *log_obj = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(log_obj, "sip", pmeinfo->sip);
|
||||
cJSON_AddNumberToObject(log_obj, "sport", pmeinfo->sport);
|
||||
@@ -373,6 +452,10 @@ void output_result(struct pme_info *pmeinfo){
|
||||
char close_opstate(struct streaminfo *stream, struct pme_info *pmeinfo, struct pkt_parsed_info *pktinfo, const void *a_packet){
|
||||
if(a_packet != NULL){
|
||||
get_rawpkt_opt_from_streaminfo(stream, RAW_PKT_GET_TIMESTAMP, &(pmeinfo->end_time));
|
||||
struct tcphdr *_tcphdr = pktinfo->tcphdr;
|
||||
if(_tcphdr->fin || _tcphdr->rst){
|
||||
pmeinfo->has_fin_rst = 1;
|
||||
}
|
||||
if(packet_need_filter(pktinfo) == 0){
|
||||
tls_header_parse(stream, pmeinfo, pktinfo);
|
||||
packet_stat(stream, pmeinfo, pktinfo);
|
||||
@@ -438,7 +521,7 @@ extern "C" int stmstat_init(){
|
||||
char *log_path = (char*)"./stream_stat.log";
|
||||
int log_level = 10;
|
||||
g_logger = MESA_create_runtime_log_handle(log_path, log_level);
|
||||
g_fp = fopen("./stream_stat.txt", "w+");
|
||||
g_fp = fopen("./stream_stat.txt", "a+");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
22463
DataSet/result/2019-12-20_21/ssl_stat.txt
Normal file
22463
DataSet/result/2019-12-20_21/ssl_stat.txt
Normal file
File diff suppressed because one or more lines are too long
42322
DataSet/result/2019-12-20_21/stream_stat.txt
Normal file
42322
DataSet/result/2019-12-20_21/stream_stat.txt
Normal file
File diff suppressed because one or more lines are too long
10540
DataSet/result/2019-12-20_21/stream_tag.txt
Normal file
10540
DataSet/result/2019-12-20_21/stream_tag.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
542
Experiment/ExpertFeature/tls_cert_length.ipynb
Normal file
542
Experiment/ExpertFeature/tls_cert_length.ipynb
Normal file
File diff suppressed because one or more lines are too long
48566
Experiment/MarkovModel/.ipynb_checkpoints/markov-checkpoint.ipynb
Normal file
48566
Experiment/MarkovModel/.ipynb_checkpoints/markov-checkpoint.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
4671
Experiment/MarkovModel/CsvFile/2019-12-20_21/test.csv
Normal file
4671
Experiment/MarkovModel/CsvFile/2019-12-20_21/test.csv
Normal file
File diff suppressed because it is too large
Load Diff
14009
Experiment/MarkovModel/CsvFile/2019-12-20_21/train.csv
Normal file
14009
Experiment/MarkovModel/CsvFile/2019-12-20_21/train.csv
Normal file
File diff suppressed because it is too large
Load Diff
14142
Experiment/MarkovModel/markov.ipynb
Normal file
14142
Experiment/MarkovModel/markov.ipynb
Normal file
File diff suppressed because one or more lines are too long
327
Experiment/MarkovModel/markov_tofig.ipynb
Normal file
327
Experiment/MarkovModel/markov_tofig.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -1,77 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
def label_dict_build(date):
|
||||
example_label_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_tag.txt'
|
||||
example_label_df = pd.read_table(example_label_file, sep='\s+', header=None)
|
||||
example_label_df[3] = 443
|
||||
example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}
|
||||
return example_label
|
||||
|
||||
app_cert = dict()
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_label = label_dict_build(date)
|
||||
#print(example_label)
|
||||
row_count = 1771
|
||||
cloumn_count = 25
|
||||
example_json_file = PREFIX_DIR + 'DataSet/result/' + date + '/ssl_stat.txt'
|
||||
example_json_f = open(example_json_file, 'r')
|
||||
array_shape = (row_count, cloumn_count)
|
||||
result_data = np.zeros(array_shape)
|
||||
result_label = list()
|
||||
i = 0
|
||||
for line in example_json_f.readlines():
|
||||
example_json = json.loads(line)
|
||||
#标签
|
||||
try:
|
||||
flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])
|
||||
label = example_label[flow_key]
|
||||
except Exception:
|
||||
#traceback.print_exc()
|
||||
continue
|
||||
#专家特征
|
||||
result_label.append(label)
|
||||
san_count = 0
|
||||
if 'san' in example_json.keys():
|
||||
san = example_json['san']
|
||||
san_count = len(san.split(';'))
|
||||
cert_count = example_json['Cert']['cert_count']
|
||||
'''
|
||||
cert_len_str = ''
|
||||
for cert in example_json['Cert']['cert_list']:
|
||||
cert_len_str += (str(cert['length']) + ',')
|
||||
if label not in app_cert.keys():
|
||||
app_cert[label] = set()
|
||||
app_cert[label].add(cert_len_str)
|
||||
'''
|
||||
if label not in app_cert.keys():
|
||||
app_cert[label] = set()
|
||||
app_cert[label].add(san_count)
|
||||
#result_data[i,:] = result
|
||||
i += 1
|
||||
|
||||
print(i)
|
||||
for k, v in app_cert.items():
|
||||
print(k)
|
||||
print(v)
|
||||
|
||||
'''
|
||||
print('row = ' + str(row_count))
|
||||
print("result_label = " + str(len(result_label)))
|
||||
base_head = ['cert_count', 'cert_len', 'san_len', 's2c_pkts']
|
||||
header = base_head
|
||||
result_df = pd.DataFrame(result_data, columns=header)
|
||||
result_df['label'] = np.array(result_label)
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
result_df.to_csv(example_csv_file, index=False)
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
File diff suppressed because one or more lines are too long
615
Experiment/statFeature/StatFeature.ipynb
Normal file
615
Experiment/statFeature/StatFeature.ipynb
Normal file
File diff suppressed because one or more lines are too long
18679
Experiment/statFeature/csvFile/2019-12-20_21/examples.csv
Normal file
18679
Experiment/statFeature/csvFile/2019-12-20_21/examples.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,51 +0,0 @@
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn import tree
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.metrics import f1_score,recall_score,precision_score
|
||||
import random
|
||||
import matplotlib.pyplot as plt
|
||||
%matplotlib inline
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
|
||||
def RF():
|
||||
classifer = RandomForestClassifier()
|
||||
classifer.fit(x_train, y_train)
|
||||
y_pred = classifer.predict(x_test)
|
||||
f1_score_list.append(f1_score(y_test, y_pred, average='micro'))
|
||||
recall_score_list.append(recall_score(y_test, y_pred, average='micro'))
|
||||
precision_score_list.append(precision_score(y_test, y_pred, average='micro'))
|
||||
scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]
|
||||
score_df.loc['RandomForest'] = scores
|
||||
score_df.plot.bar()
|
||||
print(scores)
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
examples_df = pd.read_csv(example_csv_file)
|
||||
class_counts = examples_df['label'].value_counts().plot.bar()
|
||||
examples = examples_df.values.copy()
|
||||
score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \
|
||||
columns = ['precision', 'recall', 'f1'])
|
||||
f1_score_list = list()
|
||||
recall_score_list = list()
|
||||
precision_score_list = list()
|
||||
for i in range(50):
|
||||
np.random.shuffle(examples)
|
||||
examples_train = examples[:int(len(examples)*0.75)]
|
||||
examples_test = examples[int(len(examples)*0.75):]
|
||||
x_train = examples_train[:,0:-1]
|
||||
y_train = examples_train[:,-1]
|
||||
x_test = examples_test[:,0:-1]
|
||||
y_test = examples_test[:,-1]
|
||||
RF(score_df, f1_score_list, recall_score_list, precision_score_list, \
|
||||
x_train, y_train, x_test, y_test)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,124 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
||||
|
||||
def label_dict_build(date):
|
||||
example_label_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_tag.txt'
|
||||
example_label_df = pd.read_table(example_label_file, sep='\s+', header=None)
|
||||
example_label_df[3] = 443
|
||||
example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}
|
||||
return example_label
|
||||
|
||||
def main():
|
||||
date = sys.argv[1]
|
||||
example_label = label_dict_build(date)
|
||||
row_count = 1771
|
||||
cloumn_count = 25
|
||||
example_json_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_feature.txt'
|
||||
example_json_f = open(example_json_file, 'r')
|
||||
array_shape = (row_count, cloumn_count)
|
||||
result_data = np.zeros(array_shape)
|
||||
result_label = list()
|
||||
i = 0
|
||||
for line in example_json_f.readlines():
|
||||
example_json = json.loads(line)
|
||||
#标签
|
||||
try:
|
||||
flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])
|
||||
result_label.append(example_label[flow_key])
|
||||
except Exception:
|
||||
continue
|
||||
#统计特征
|
||||
packets = example_json['packets']
|
||||
c2s_packets_bytes = list()
|
||||
s2c_packets_bytes = list()
|
||||
c2s_packets_intervals = list()
|
||||
s2c_packets_intervals = list()
|
||||
for packet in packets:
|
||||
if packet['dir'] == 1:
|
||||
c2s_packets_bytes.append(packet['bytes'])
|
||||
c2s_packets_intervals.append(packet['interval'])
|
||||
elif packet['dir'] == 2:
|
||||
s2c_packets_bytes.append(packet['bytes'])
|
||||
s2c_packets_intervals.append(packet['interval'])
|
||||
c2s_bytes = example_json['c2s_bytes']
|
||||
s2c_bytes = example_json['s2c_bytes']
|
||||
c2s_pkts = example_json['c2s_pkts']
|
||||
s2c_pkts = example_json['s2c_pkts']
|
||||
duration = example_json['duration']
|
||||
c2s_packets_bytes_mean = 0
|
||||
c2s_packets_bytes_median = 0
|
||||
c2s_packets_bytes_std = 0
|
||||
c2s_packets_bytes_max = 0
|
||||
c2s_packets_bytes_min = 0
|
||||
|
||||
c2s_packets_intervals_mean = 0
|
||||
c2s_packets_intervals_median = 0
|
||||
c2s_packets_intervals_std = 0
|
||||
c2s_packets_intervals_max = 0
|
||||
c2s_packets_intervals_min = 0
|
||||
|
||||
s2c_packets_bytes_mean = 0
|
||||
s2c_packets_bytes_median = 0
|
||||
s2c_packets_bytes_std = 0
|
||||
s2c_packets_bytes_max = 0
|
||||
s2c_packets_bytes_min = 0
|
||||
|
||||
s2c_packets_intervals_mean = 0
|
||||
s2c_packets_intervals_median = 0
|
||||
s2c_packets_intervals_std = 0
|
||||
s2c_packets_intervals_max = 0
|
||||
s2c_packets_intervals_min = 0
|
||||
|
||||
if c2s_bytes > 0:
|
||||
c2s_packets_bytes_mean = np.mean(c2s_packets_bytes)
|
||||
c2s_packets_bytes_median = np.median(c2s_packets_bytes)
|
||||
c2s_packets_bytes_std = np.std(c2s_packets_bytes)
|
||||
c2s_packets_bytes_max = np.max(c2s_packets_bytes)
|
||||
c2s_packets_bytes_min = np.min(c2s_packets_bytes)
|
||||
|
||||
c2s_packets_intervals_mean = np.mean(c2s_packets_intervals)
|
||||
c2s_packets_intervals_median = np.median(c2s_packets_intervals)
|
||||
c2s_packets_intervals_std = np.std(c2s_packets_intervals)
|
||||
c2s_packets_intervals_max = np.max(c2s_packets_intervals)
|
||||
c2s_packets_intervals_min = np.min(c2s_packets_intervals)
|
||||
|
||||
if s2c_bytes > 0:
|
||||
s2c_packets_bytes_mean = np.mean(s2c_packets_bytes)
|
||||
s2c_packets_bytes_median = np.median(s2c_packets_bytes)
|
||||
s2c_packets_bytes_std = np.std(s2c_packets_bytes)
|
||||
s2c_packets_bytes_max = np.max(s2c_packets_bytes)
|
||||
s2c_packets_bytes_min = np.min(s2c_packets_bytes)
|
||||
|
||||
s2c_packets_intervals_mean = np.mean(s2c_packets_intervals)
|
||||
s2c_packets_intervals_median = np.median(s2c_packets_intervals)
|
||||
s2c_packets_intervals_std = np.std(s2c_packets_intervals)
|
||||
s2c_packets_intervals_max = np.max(s2c_packets_intervals)
|
||||
s2c_packets_intervals_min = np.min(s2c_packets_intervals)
|
||||
result = [c2s_bytes, c2s_pkts, s2c_bytes, s2c_pkts, duration, c2s_packets_bytes_mean, c2s_packets_bytes_median, c2s_packets_bytes_std,\
|
||||
c2s_packets_bytes_max, c2s_packets_bytes_min, c2s_packets_intervals_mean, c2s_packets_intervals_median, c2s_packets_intervals_std,\
|
||||
c2s_packets_intervals_max, c2s_packets_intervals_min, s2c_packets_bytes_mean, s2c_packets_bytes_median, s2c_packets_bytes_std,\
|
||||
s2c_packets_bytes_max, s2c_packets_bytes_min, s2c_packets_intervals_mean, s2c_packets_intervals_median, s2c_packets_intervals_std,\
|
||||
s2c_packets_intervals_max, s2c_packets_intervals_min]
|
||||
result_data[i,:] = result
|
||||
i += 1
|
||||
print('row = ' + str(row_count))
|
||||
print("result_label = " + str(len(result_label)))
|
||||
base_head = ['c2s_bytes', 'c2s_pkts', 's2c_bytes', 's2c_pkts', 'duration', 'c2s_packets_bytes_mean', 'c2s_packets_bytes_median', 'c2s_packets_bytes_std',\
|
||||
'c2s_packets_bytes_max', 'c2s_packets_bytes_min', 'c2s_packets_intervals_mean', 'c2s_packets_intervals_median', 'c2s_packets_intervals_std',\
|
||||
'c2s_packets_intervals_max', 'c2s_packets_intervals_min', 's2c_packets_bytes_mean', 's2c_packets_bytes_median', 's2c_packets_bytes_std',\
|
||||
's2c_packets_bytes_max', 's2c_packets_bytes_min', 's2c_packets_intervals_mean', 's2c_packets_intervals_median', 's2c_packets_intervals_std',\
|
||||
's2c_packets_intervals_max', 's2c_packets_intervals_min']
|
||||
header = base_head
|
||||
result_df = pd.DataFrame(result_data, columns=header)
|
||||
result_df['label'] = np.array(result_label)
|
||||
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
||||
result_df.to_csv(example_csv_file, index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
1207
Presentation/Mid-term/illustration/base_feature.svg
Normal file
1207
Presentation/Mid-term/illustration/base_feature.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 36 KiB |
1150
Presentation/Mid-term/illustration/ssl.svg
Normal file
1150
Presentation/Mid-term/illustration/ssl.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 34 KiB |
1330
Presentation/Mid-term/illustration/type.svg
Normal file
1330
Presentation/Mid-term/illustration/type.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 39 KiB |
1252
Presentation/Mid-term/illustration/type_length.svg
Executable file
1252
Presentation/Mid-term/illustration/type_length.svg
Executable file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 37 KiB |
1283
Presentation/Mid-term/illustration/type_length_burst.svg
Normal file
1283
Presentation/Mid-term/illustration/type_length_burst.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 38 KiB |
BIN
Presentation/Mid-term/信工所硕士-2017级-中期报告-崔一鸣.pptx
Normal file
BIN
Presentation/Mid-term/信工所硕士-2017级-中期报告-崔一鸣.pptx
Normal file
Binary file not shown.
Reference in New Issue
Block a user