# Name:fang xiaoyu # Time: 2023/3/11 22:35 import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix , precision_score, recall_score, f1_score import xgboost as xgb from sklearn.metrics import classification_report # 读取CSV文件 df = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv') # 将VPN和非VPN数据分为两个类别 df['class1'] = np.where(df['class1'] == 'VPN', 1, 0) # 划分训练集和测试集 train, test = train_test_split(df, test_size=0.2, random_state=42) # 将数据转换为DMatrix格式 train_dmatrix = xgb.DMatrix(data=train.drop(['class1'], axis=1), label=train['class1']) test_dmatrix = xgb.DMatrix(data=test.drop(['class1'], axis=1), label=test['class1']) # print(train_dmatrix) # print(test_dmatrix) # 定义XGBoost模型参数 params = { 'objective': 'binary:logistic', 'eval_metric': 'auc', 'eta': 0.1, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'seed': 42 } # 训练XGBoost模型 xgb_model = xgb.train( params=params, dtrain=train_dmatrix, num_boost_round=100, early_stopping_rounds=10, evals=[(test_dmatrix, 'test')] ) # 对测试集进行预测 y_pred = xgb_model.predict(test_dmatrix) # 将预测结果转换为类别标签 y_pred_label = np.where(y_pred > 0.5, 1, 0) # 计算模型准确性 accuracy = accuracy_score(test['class1'], y_pred_label) precision = precision_score(test['class1'], y_pred_label) recall = recall_score(test['class1'], y_pred_label) f1 = f1_score(test['class1'], y_pred_label) print(f"Accuracy: {accuracy}") print(f"Precision: {precision}") print(f"Recall: {recall}") print(f"F1-score: {f1}") print(classification_report(test['class1'], y_pred_label)) # Accuracy: 0.9139125799573561 # Precision: 0.8972275334608031 # Recall: 0.9455919395465995 # F1-score: 0.9207750797154771