This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
grityu-model-duplication/Randomforest_test.py

56 lines
1.7 KiB
Python
Raw Normal View History

2023-03-16 22:42:35 +08:00
# Name:fang xiaoyu
# Time: 2023/3/11 23:18
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix , precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# 读取数据
data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv')
# 将类别转换为数字标签
data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0})
# 提取特征和标签
X = data.drop('class1', axis=1)
y = data['class1']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 创建随机森林分类器
rfc = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=10, min_samples_split=2, min_samples_leaf=1,
max_features='auto', bootstrap=True, random_state=42)
# 训练模型
rfc.fit(X_train, y_train)
# 预测测试集
y_pred = rfc.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test,y_pred)
f1 = f1_score(y_test,y_pred)
print('Accuracy:', accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("f1_score:",f1)
print(classification_report(y_test, y_pred))
# Accuracy: 0.8909026297085999
# Precision: 0.8626424391746227
# Recall: 0.9434152913438868
# f1_score: 0.9012226512226512
# 混淆矩阵可视化
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()