From 03d3e4dbff93dee985db46ff3b3fe4d6c040b19e Mon Sep 17 00:00:00 2001 From: fang xiaoyu Date: Thu, 16 Mar 2023 15:01:07 +0000 Subject: [PATCH] Delete test4.py --- test4.py | 91 -------------------------------------------------------- 1 file changed, 91 deletions(-) delete mode 100644 test4.py diff --git a/test4.py b/test4.py deleted file mode 100644 index d74862e..0000000 --- a/test4.py +++ /dev/null @@ -1,91 +0,0 @@ -# Name:fang xiaoyu -# Time: 2023/3/11 18:43 -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import dpkt -import socket -import struct -import binascii -from scapy.all import * -from sklearn.neighbors import KNeighborsClassifier -from sklearn.model_selection import cross_val_score -#import tshark -import scapy - -# 从pcap文件中读取流量数据 -def read_pcap(filename): - packets = rdpcap(filename) - flows = {} - for packet in packets: - if packet.haslayer(TCP): - src_ip = packet[IP].src - dst_ip = packet[IP].dst - src_port = packet[TCP].sport - dst_port = packet[TCP].dport - key = (src_ip, dst_ip, src_port, dst_port) - if key not in flows: - flows[key] = [packet] - else: - flows[key].append(packet) - return flows - -# 提取流量数据的特征 -def extract_features(flow): - features = [] - total_len = 0 - total_pkts = len(flow) - start_time = flow[0].time - end_time = flow[-1].time - for packet in flow: - total_len += len(packet) - duration = end_time - start_time - features.append(total_len) - features.append(total_pkts) - features.append(duration) - return features - -# 将特征向量转换为numpy数组 -def vectorize_data(data): - return np.array(data) - -# 读取VPN和non-VPN流量数据 -vpn_traffic = read_pcap('vpn_traffic.pcap') -nonvpn_traffic = read_pcap('nonvpn_traffic.pcap') - -# 提取VPN和non-VPN流量的特征 -vpn_traffic = [extract_features(flow) for flow in vpn_traffic.values()] -nonvpn_traffic = [extract_features(flow) for flow in nonvpn_traffic.values()] - -# 将VPN和non-VPN流量数据转换为numpy数组 -vpn_traffic = vectorize_data(vpn_traffic) -nonvpn_traffic = vectorize_data(nonvpn_traffic) - -# 将VPN和non-VPN流量数据合并 -X = np.concatenate((vpn_traffic, nonvpn_traffic)) -y = np.concatenate((np.ones(len(vpn_traffic)), np.zeros(len(nonvpn_traffic)))) - -# 使用交叉验证选择最佳的K值 -cv_scores = [] -for k in range(1, 31): - knn = KNeighborsClassifier(n_neighbors=k) - scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy') - cv_scores.append(scores.mean()) - -# 可视化交叉验证结果 -plt.plot(range(1, 31), cv_scores) -plt.xlabel('K') -plt.ylabel('Accuracy') -plt.show() - -# 使用最佳的K值进行模型训练和预测 -best_k = np.argmax(cv_scores) + 1 -knn = KNeighborsClassifier(n_neighbors=best_k) -knn.fit(X, y) - -# 对新数据进行预测 -new_data = read_pcap('new_traffic.pcap') -new_data = [extract_features(flow) for flow in new_data.values()] -new_data = vectorize_data(new_data) -prediction = knn.predict(new_data) -print(prediction)