# Name:fang xiaoyu # Time: 2023/3/11 23:18 import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, confusion_matrix , precision_score, recall_score, f1_score from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report # 读取数据 data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv') # 将类别转换为数字标签 data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0}) # 提取特征和标签 X = data.drop('class1', axis=1) y = data['class1'] # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 创建随机森林分类器 rfc = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=10, min_samples_split=2, min_samples_leaf=1, max_features='auto', bootstrap=True, random_state=42) # 训练模型 rfc.fit(X_train, y_train) # 预测测试集 y_pred = rfc.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test,y_pred) recall = recall_score(y_test,y_pred) f1 = f1_score(y_test,y_pred) print('Accuracy:', accuracy) print("Precision:",precision) print("Recall:",recall) print("f1_score:",f1) print(classification_report(y_test, y_pred)) # Accuracy: 0.8909026297085999 # Precision: 0.8626424391746227 # Recall: 0.9434152913438868 # f1_score: 0.9012226512226512 # 混淆矩阵可视化 conf_mat = confusion_matrix(y_test, y_pred) sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g') plt.xlabel('Predicted') plt.ylabel('Actual') plt.show()