# Name:fang xiaoyu # Time: 2023/3/11 18:43 import numpy as np import pandas as pd import matplotlib.pyplot as plt import dpkt import socket import struct import binascii from scapy.all import * from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import cross_val_score #import tshark import scapy # 从pcap文件中读取流量数据 def read_pcap(filename): packets = rdpcap(filename) flows = {} for packet in packets: if packet.haslayer(TCP): src_ip = packet[IP].src dst_ip = packet[IP].dst src_port = packet[TCP].sport dst_port = packet[TCP].dport key = (src_ip, dst_ip, src_port, dst_port) if key not in flows: flows[key] = [packet] else: flows[key].append(packet) return flows # 提取流量数据的特征 def extract_features(flow): features = [] total_len = 0 total_pkts = len(flow) start_time = flow[0].time end_time = flow[-1].time for packet in flow: total_len += len(packet) duration = end_time - start_time features.append(total_len) features.append(total_pkts) features.append(duration) return features # 将特征向量转换为numpy数组 def vectorize_data(data): return np.array(data) # 读取VPN和non-VPN流量数据 vpn_traffic = read_pcap('vpn_traffic.pcap') nonvpn_traffic = read_pcap('nonvpn_traffic.pcap') # 提取VPN和non-VPN流量的特征 vpn_traffic = [extract_features(flow) for flow in vpn_traffic.values()] nonvpn_traffic = [extract_features(flow) for flow in nonvpn_traffic.values()] # 将VPN和non-VPN流量数据转换为numpy数组 vpn_traffic = vectorize_data(vpn_traffic) nonvpn_traffic = vectorize_data(nonvpn_traffic) # 将VPN和non-VPN流量数据合并 X = np.concatenate((vpn_traffic, nonvpn_traffic)) y = np.concatenate((np.ones(len(vpn_traffic)), np.zeros(len(nonvpn_traffic)))) # 使用交叉验证选择最佳的K值 cv_scores = [] for k in range(1, 31): knn = KNeighborsClassifier(n_neighbors=k) scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy') cv_scores.append(scores.mean()) # 可视化交叉验证结果 plt.plot(range(1, 31), cv_scores) plt.xlabel('K') plt.ylabel('Accuracy') plt.show() # 使用最佳的K值进行模型训练和预测 best_k = np.argmax(cv_scores) + 1 knn = KNeighborsClassifier(n_neighbors=best_k) knn.fit(X, y) # 对新数据进行预测 new_data = read_pcap('new_traffic.pcap') new_data = [extract_features(flow) for flow in new_data.values()] new_data = vectorize_data(new_data) prediction = knn.predict(new_data) print(prediction)