Delete test4.py
This commit is contained in:
91
test4.py
91
test4.py
@@ -1,91 +0,0 @@
|
|||||||
# Name:fang xiaoyu
|
|
||||||
# Time: 2023/3/11 18:43
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import dpkt
|
|
||||||
import socket
|
|
||||||
import struct
|
|
||||||
import binascii
|
|
||||||
from scapy.all import *
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
|
||||||
from sklearn.model_selection import cross_val_score
|
|
||||||
#import tshark
|
|
||||||
import scapy
|
|
||||||
|
|
||||||
# 从pcap文件中读取流量数据
|
|
||||||
def read_pcap(filename):
|
|
||||||
packets = rdpcap(filename)
|
|
||||||
flows = {}
|
|
||||||
for packet in packets:
|
|
||||||
if packet.haslayer(TCP):
|
|
||||||
src_ip = packet[IP].src
|
|
||||||
dst_ip = packet[IP].dst
|
|
||||||
src_port = packet[TCP].sport
|
|
||||||
dst_port = packet[TCP].dport
|
|
||||||
key = (src_ip, dst_ip, src_port, dst_port)
|
|
||||||
if key not in flows:
|
|
||||||
flows[key] = [packet]
|
|
||||||
else:
|
|
||||||
flows[key].append(packet)
|
|
||||||
return flows
|
|
||||||
|
|
||||||
# 提取流量数据的特征
|
|
||||||
def extract_features(flow):
|
|
||||||
features = []
|
|
||||||
total_len = 0
|
|
||||||
total_pkts = len(flow)
|
|
||||||
start_time = flow[0].time
|
|
||||||
end_time = flow[-1].time
|
|
||||||
for packet in flow:
|
|
||||||
total_len += len(packet)
|
|
||||||
duration = end_time - start_time
|
|
||||||
features.append(total_len)
|
|
||||||
features.append(total_pkts)
|
|
||||||
features.append(duration)
|
|
||||||
return features
|
|
||||||
|
|
||||||
# 将特征向量转换为numpy数组
|
|
||||||
def vectorize_data(data):
|
|
||||||
return np.array(data)
|
|
||||||
|
|
||||||
# 读取VPN和non-VPN流量数据
|
|
||||||
vpn_traffic = read_pcap('vpn_traffic.pcap')
|
|
||||||
nonvpn_traffic = read_pcap('nonvpn_traffic.pcap')
|
|
||||||
|
|
||||||
# 提取VPN和non-VPN流量的特征
|
|
||||||
vpn_traffic = [extract_features(flow) for flow in vpn_traffic.values()]
|
|
||||||
nonvpn_traffic = [extract_features(flow) for flow in nonvpn_traffic.values()]
|
|
||||||
|
|
||||||
# 将VPN和non-VPN流量数据转换为numpy数组
|
|
||||||
vpn_traffic = vectorize_data(vpn_traffic)
|
|
||||||
nonvpn_traffic = vectorize_data(nonvpn_traffic)
|
|
||||||
|
|
||||||
# 将VPN和non-VPN流量数据合并
|
|
||||||
X = np.concatenate((vpn_traffic, nonvpn_traffic))
|
|
||||||
y = np.concatenate((np.ones(len(vpn_traffic)), np.zeros(len(nonvpn_traffic))))
|
|
||||||
|
|
||||||
# 使用交叉验证选择最佳的K值
|
|
||||||
cv_scores = []
|
|
||||||
for k in range(1, 31):
|
|
||||||
knn = KNeighborsClassifier(n_neighbors=k)
|
|
||||||
scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
|
|
||||||
cv_scores.append(scores.mean())
|
|
||||||
|
|
||||||
# 可视化交叉验证结果
|
|
||||||
plt.plot(range(1, 31), cv_scores)
|
|
||||||
plt.xlabel('K')
|
|
||||||
plt.ylabel('Accuracy')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# 使用最佳的K值进行模型训练和预测
|
|
||||||
best_k = np.argmax(cv_scores) + 1
|
|
||||||
knn = KNeighborsClassifier(n_neighbors=best_k)
|
|
||||||
knn.fit(X, y)
|
|
||||||
|
|
||||||
# 对新数据进行预测
|
|
||||||
new_data = read_pcap('new_traffic.pcap')
|
|
||||||
new_data = [extract_features(flow) for flow in new_data.values()]
|
|
||||||
new_data = vectorize_data(new_data)
|
|
||||||
prediction = knn.predict(new_data)
|
|
||||||
print(prediction)
|
|
||||||
Reference in New Issue
Block a user