This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
grityu-model-duplication/XGBoost_test.py
2023-03-16 22:42:35 +08:00

68 lines
1.9 KiB
Python

# Name:fang xiaoyu
# Time: 2023/3/11 22:35
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix , precision_score, recall_score, f1_score
import xgboost as xgb
from sklearn.metrics import classification_report
# 读取CSV文件
df = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv')
# 将VPN和非VPN数据分为两个类别
df['class1'] = np.where(df['class1'] == 'VPN', 1, 0)
# 划分训练集和测试集
train, test = train_test_split(df, test_size=0.2, random_state=42)
# 将数据转换为DMatrix格式
train_dmatrix = xgb.DMatrix(data=train.drop(['class1'], axis=1), label=train['class1'])
test_dmatrix = xgb.DMatrix(data=test.drop(['class1'], axis=1), label=test['class1'])
# print(train_dmatrix)
# print(test_dmatrix)
# 定义XGBoost模型参数
params = {
'objective': 'binary:logistic',
'eval_metric': 'auc',
'eta': 0.1,
'max_depth': 6,
'min_child_weight': 1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'seed': 42
}
# 训练XGBoost模型
xgb_model = xgb.train(
params=params,
dtrain=train_dmatrix,
num_boost_round=100,
early_stopping_rounds=10,
evals=[(test_dmatrix, 'test')]
)
# 对测试集进行预测
y_pred = xgb_model.predict(test_dmatrix)
# 将预测结果转换为类别标签
y_pred_label = np.where(y_pred > 0.5, 1, 0)
# 计算模型准确性
accuracy = accuracy_score(test['class1'], y_pred_label)
precision = precision_score(test['class1'], y_pred_label)
recall = recall_score(test['class1'], y_pred_label)
f1 = f1_score(test['class1'], y_pred_label)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")
print(classification_report(test['class1'], y_pred_label))
# Accuracy: 0.9139125799573561
# Precision: 0.8972275334608031
# Recall: 0.9455919395465995
# F1-score: 0.9207750797154771