# Name:fang xiaoyu # Time: 2023/3/11 22:28 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score from sklearn import preprocessing from sklearn import tree from sklearn.metrics import classification_report # 加载CSV文件 data = pd.read_csv('sufshark_openvpn_tcp+youdao_header.csv') # 将类别转换为数字标签 # le = preprocessing.LabelEncoder() # data['label'] = le.fit_transform(data['label']) data["class1"] = data["class1"].replace({"VPN": 1, "Non-VPN": 0}) # 分离特征和类别 X = data.iloc[:, :-1] y = data.iloc[:, -1] # 划分数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 初始化分类器并训练模型 clf = DecisionTreeClassifier(criterion="entropy") clf.fit(X_train, y_train) # 预测测试数据集 y_pred = clf.predict(X_test) # 评估分类器的性能 print("Accuracy:", accuracy_score(y_test, y_pred)) print(classification_report(y_test, y_pred)) # 可视化决策树 tree.plot_tree(clf) #Accuracy: 0.8841506751954513 # precision recall f1-score support # # 0 0.89 0.86 0.87 2708 # 1 0.88 0.90 0.89 2920 # # accuracy 0.88 5628 # macro avg 0.88 0.88 0.88 5628 # weighted avg 0.88 0.88 0.88 5628