92 lines
2.6 KiB
Python
92 lines
2.6 KiB
Python
|
|
# Name:fang xiaoyu
|
||
|
|
# Time: 2023/3/11 18:43
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
import matplotlib.pyplot as plt
|
||
|
|
import dpkt
|
||
|
|
import socket
|
||
|
|
import struct
|
||
|
|
import binascii
|
||
|
|
from scapy.all import *
|
||
|
|
from sklearn.neighbors import KNeighborsClassifier
|
||
|
|
from sklearn.model_selection import cross_val_score
|
||
|
|
#import tshark
|
||
|
|
import scapy
|
||
|
|
|
||
|
|
# 从pcap文件中读取流量数据
|
||
|
|
def read_pcap(filename):
|
||
|
|
packets = rdpcap(filename)
|
||
|
|
flows = {}
|
||
|
|
for packet in packets:
|
||
|
|
if packet.haslayer(TCP):
|
||
|
|
src_ip = packet[IP].src
|
||
|
|
dst_ip = packet[IP].dst
|
||
|
|
src_port = packet[TCP].sport
|
||
|
|
dst_port = packet[TCP].dport
|
||
|
|
key = (src_ip, dst_ip, src_port, dst_port)
|
||
|
|
if key not in flows:
|
||
|
|
flows[key] = [packet]
|
||
|
|
else:
|
||
|
|
flows[key].append(packet)
|
||
|
|
return flows
|
||
|
|
|
||
|
|
# 提取流量数据的特征
|
||
|
|
def extract_features(flow):
|
||
|
|
features = []
|
||
|
|
total_len = 0
|
||
|
|
total_pkts = len(flow)
|
||
|
|
start_time = flow[0].time
|
||
|
|
end_time = flow[-1].time
|
||
|
|
for packet in flow:
|
||
|
|
total_len += len(packet)
|
||
|
|
duration = end_time - start_time
|
||
|
|
features.append(total_len)
|
||
|
|
features.append(total_pkts)
|
||
|
|
features.append(duration)
|
||
|
|
return features
|
||
|
|
|
||
|
|
# 将特征向量转换为numpy数组
|
||
|
|
def vectorize_data(data):
|
||
|
|
return np.array(data)
|
||
|
|
|
||
|
|
# 读取VPN和non-VPN流量数据
|
||
|
|
vpn_traffic = read_pcap('vpn_traffic.pcap')
|
||
|
|
nonvpn_traffic = read_pcap('nonvpn_traffic.pcap')
|
||
|
|
|
||
|
|
# 提取VPN和non-VPN流量的特征
|
||
|
|
vpn_traffic = [extract_features(flow) for flow in vpn_traffic.values()]
|
||
|
|
nonvpn_traffic = [extract_features(flow) for flow in nonvpn_traffic.values()]
|
||
|
|
|
||
|
|
# 将VPN和non-VPN流量数据转换为numpy数组
|
||
|
|
vpn_traffic = vectorize_data(vpn_traffic)
|
||
|
|
nonvpn_traffic = vectorize_data(nonvpn_traffic)
|
||
|
|
|
||
|
|
# 将VPN和non-VPN流量数据合并
|
||
|
|
X = np.concatenate((vpn_traffic, nonvpn_traffic))
|
||
|
|
y = np.concatenate((np.ones(len(vpn_traffic)), np.zeros(len(nonvpn_traffic))))
|
||
|
|
|
||
|
|
# 使用交叉验证选择最佳的K值
|
||
|
|
cv_scores = []
|
||
|
|
for k in range(1, 31):
|
||
|
|
knn = KNeighborsClassifier(n_neighbors=k)
|
||
|
|
scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
|
||
|
|
cv_scores.append(scores.mean())
|
||
|
|
|
||
|
|
# 可视化交叉验证结果
|
||
|
|
plt.plot(range(1, 31), cv_scores)
|
||
|
|
plt.xlabel('K')
|
||
|
|
plt.ylabel('Accuracy')
|
||
|
|
plt.show()
|
||
|
|
|
||
|
|
# 使用最佳的K值进行模型训练和预测
|
||
|
|
best_k = np.argmax(cv_scores) + 1
|
||
|
|
knn = KNeighborsClassifier(n_neighbors=best_k)
|
||
|
|
knn.fit(X, y)
|
||
|
|
|
||
|
|
# 对新数据进行预测
|
||
|
|
new_data = read_pcap('new_traffic.pcap')
|
||
|
|
new_data = [extract_features(flow) for flow in new_data.values()]
|
||
|
|
new_data = vectorize_data(new_data)
|
||
|
|
prediction = knn.predict(new_data)
|
||
|
|
print(prediction)
|