five modes duplication
This commit is contained in:
91
test4.py
Normal file
91
test4.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# Name:fang xiaoyu
|
||||
# Time: 2023/3/11 18:43
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import dpkt
|
||||
import socket
|
||||
import struct
|
||||
import binascii
|
||||
from scapy.all import *
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.model_selection import cross_val_score
|
||||
#import tshark
|
||||
import scapy
|
||||
|
||||
# 从pcap文件中读取流量数据
|
||||
def read_pcap(filename):
|
||||
packets = rdpcap(filename)
|
||||
flows = {}
|
||||
for packet in packets:
|
||||
if packet.haslayer(TCP):
|
||||
src_ip = packet[IP].src
|
||||
dst_ip = packet[IP].dst
|
||||
src_port = packet[TCP].sport
|
||||
dst_port = packet[TCP].dport
|
||||
key = (src_ip, dst_ip, src_port, dst_port)
|
||||
if key not in flows:
|
||||
flows[key] = [packet]
|
||||
else:
|
||||
flows[key].append(packet)
|
||||
return flows
|
||||
|
||||
# 提取流量数据的特征
|
||||
def extract_features(flow):
|
||||
features = []
|
||||
total_len = 0
|
||||
total_pkts = len(flow)
|
||||
start_time = flow[0].time
|
||||
end_time = flow[-1].time
|
||||
for packet in flow:
|
||||
total_len += len(packet)
|
||||
duration = end_time - start_time
|
||||
features.append(total_len)
|
||||
features.append(total_pkts)
|
||||
features.append(duration)
|
||||
return features
|
||||
|
||||
# 将特征向量转换为numpy数组
|
||||
def vectorize_data(data):
|
||||
return np.array(data)
|
||||
|
||||
# 读取VPN和non-VPN流量数据
|
||||
vpn_traffic = read_pcap('vpn_traffic.pcap')
|
||||
nonvpn_traffic = read_pcap('nonvpn_traffic.pcap')
|
||||
|
||||
# 提取VPN和non-VPN流量的特征
|
||||
vpn_traffic = [extract_features(flow) for flow in vpn_traffic.values()]
|
||||
nonvpn_traffic = [extract_features(flow) for flow in nonvpn_traffic.values()]
|
||||
|
||||
# 将VPN和non-VPN流量数据转换为numpy数组
|
||||
vpn_traffic = vectorize_data(vpn_traffic)
|
||||
nonvpn_traffic = vectorize_data(nonvpn_traffic)
|
||||
|
||||
# 将VPN和non-VPN流量数据合并
|
||||
X = np.concatenate((vpn_traffic, nonvpn_traffic))
|
||||
y = np.concatenate((np.ones(len(vpn_traffic)), np.zeros(len(nonvpn_traffic))))
|
||||
|
||||
# 使用交叉验证选择最佳的K值
|
||||
cv_scores = []
|
||||
for k in range(1, 31):
|
||||
knn = KNeighborsClassifier(n_neighbors=k)
|
||||
scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
|
||||
cv_scores.append(scores.mean())
|
||||
|
||||
# 可视化交叉验证结果
|
||||
plt.plot(range(1, 31), cv_scores)
|
||||
plt.xlabel('K')
|
||||
plt.ylabel('Accuracy')
|
||||
plt.show()
|
||||
|
||||
# 使用最佳的K值进行模型训练和预测
|
||||
best_k = np.argmax(cv_scores) + 1
|
||||
knn = KNeighborsClassifier(n_neighbors=best_k)
|
||||
knn.fit(X, y)
|
||||
|
||||
# 对新数据进行预测
|
||||
new_data = read_pcap('new_traffic.pcap')
|
||||
new_data = [extract_features(flow) for flow in new_data.values()]
|
||||
new_data = vectorize_data(new_data)
|
||||
prediction = knn.predict(new_data)
|
||||
print(prediction)
|
||||
Reference in New Issue
Block a user