This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
grityu-model-duplication/Knn_test.py

43 lines
1.3 KiB
Python
Raw Permalink Normal View History

2023-03-16 22:42:35 +08:00
# Name:fang xiaoyu
# Time: 2023/3/11 22:05
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
2023-03-16 15:16:42 +00:00
#读取数据
2023-03-16 22:42:35 +08:00
data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv')
2023-03-16 15:16:42 +00:00
#将类别标签转换为数字
2023-03-16 22:42:35 +08:00
data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0})
#print(data)
2023-03-16 15:16:42 +00:00
#划分训练集和测试集
2023-03-16 22:42:35 +08:00
X_train, X_test, y_train, y_test = train_test_split(
data.iloc[:, :-1], data.iloc[:, -1], test_size=0.2, random_state=42)
2023-03-16 15:16:42 +00:00
#创建KNN分类器
2023-03-16 22:42:35 +08:00
knn = KNeighborsClassifier(n_neighbors=3)
2023-03-16 15:16:42 +00:00
#训练模型
2023-03-16 22:42:35 +08:00
knn.fit(X_train, y_train)
2023-03-16 15:16:42 +00:00
#在测试集上测试模型性能
2023-03-16 22:42:35 +08:00
y_pred = knn.predict(X_test)
2023-03-16 15:16:42 +00:00
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy}")
#输出测试结果
2023-03-16 22:42:35 +08:00
print(classification_report(y_test, y_pred))
#Accuracy: 0.8200959488272921
# precision recall f1-score support
#
# 0 0.79 0.83 0.81 1767
# 1 0.84 0.81 0.83 1985
#
# accuracy 0.82 3752
# macro avg 0.82 0.82 0.82 3752
2023-03-16 15:16:42 +00:00
# weighted avg 0.82 0.82 0.82 3752