47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
|
|
# Name:fang xiaoyu
|
||
|
|
# Time: 2023/3/11 22:28
|
||
|
|
import pandas as pd
|
||
|
|
from sklearn.model_selection import train_test_split
|
||
|
|
from sklearn.tree import DecisionTreeClassifier
|
||
|
|
from sklearn.metrics import accuracy_score
|
||
|
|
from sklearn import preprocessing
|
||
|
|
from sklearn import tree
|
||
|
|
from sklearn.metrics import classification_report
|
||
|
|
|
||
|
|
# 加载CSV文件
|
||
|
|
data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv')
|
||
|
|
|
||
|
|
# 将类别转换为数字标签
|
||
|
|
# le = preprocessing.LabelEncoder()
|
||
|
|
# data['label'] = le.fit_transform(data['label'])
|
||
|
|
data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0})
|
||
|
|
|
||
|
|
# 分离特征和类别
|
||
|
|
X = data.iloc[:, :-1]
|
||
|
|
y = data.iloc[:, -1]
|
||
|
|
|
||
|
|
# 划分数据集
|
||
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
|
||
|
|
|
||
|
|
# 初始化分类器并训练模型
|
||
|
|
clf = DecisionTreeClassifier(criterion="entropy")
|
||
|
|
clf.fit(X_train, y_train)
|
||
|
|
|
||
|
|
# 预测测试数据集
|
||
|
|
y_pred = clf.predict(X_test)
|
||
|
|
|
||
|
|
# 评估分类器的性能
|
||
|
|
print("Accuracy:", accuracy_score(y_test, y_pred))
|
||
|
|
print(classification_report(y_test, y_pred))
|
||
|
|
# 可视化决策树
|
||
|
|
tree.plot_tree(clf)
|
||
|
|
|
||
|
|
#Accuracy: 0.8841506751954513
|
||
|
|
# precision recall f1-score support
|
||
|
|
#
|
||
|
|
# 0 0.89 0.86 0.87 2708
|
||
|
|
# 1 0.88 0.90 0.89 2920
|
||
|
|
#
|
||
|
|
# accuracy 0.88 5628
|
||
|
|
# macro avg 0.88 0.88 0.88 5628
|
||
|
|
# weighted avg 0.88 0.88 0.88 5628
|