five modes duplication

This commit is contained in:
fangxiaoyu
2023-03-16 22:42:35 +08:00
commit a9f5451c36
34 changed files with 28042 additions and 0 deletions

55
Randomforest_test.py Normal file
View File

@@ -0,0 +1,55 @@
# Name:fang xiaoyu
# Time: 2023/3/11 23:18
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix , precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# 读取数据
data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv')
# 将类别转换为数字标签
data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0})
# 提取特征和标签
X = data.drop('class1', axis=1)
y = data['class1']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 创建随机森林分类器
rfc = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=10, min_samples_split=2, min_samples_leaf=1,
max_features='auto', bootstrap=True, random_state=42)
# 训练模型
rfc.fit(X_train, y_train)
# 预测测试集
y_pred = rfc.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test,y_pred)
f1 = f1_score(y_test,y_pred)
print('Accuracy:', accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("f1_score:",f1)
print(classification_report(y_test, y_pred))
# Accuracy: 0.8909026297085999
# Precision: 0.8626424391746227
# Recall: 0.9434152913438868
# f1_score: 0.9012226512226512
# 混淆矩阵可视化
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()