# Name:fang xiaoyu # Time: 2023/3/10 09:17 import os import json import pandas as pd from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split # 定义Tranalyzer2命令和特征提取命令 tranalyzer_cmd = "t2 -r {} -w {} -t" feature_cmd = "t2 -r {} --bidir --tcp --protoid --statsonly --export json" # 定义pcap文件路径和输出文件路径 pcap_file = "20230309_fxy_psiphon_operation.pcapng" binetflow_file = "capture.binetflow" # 转换pcap文件为binetflow格式 os.system(tranalyzer_cmd.format(pcap_file, binetflow_file)) # 提取特征并保存到json文件中 os.system(feature_cmd.format(binetflow_file) + " > features.json") # 读取json文件中的特征数据并转换为DataFrame格式 with open("features.json", "r") as f: data = json.load(f) df = pd.DataFrame(data) # 将标签列转换为数值类型(0或1) df["label"] = df["label"].apply(lambda x: 0 if x == "normal" else 1) # 将数据集划分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(df.drop("label", axis=1), df["label"], test_size=0.2) # 创建KNN分类器对象,设置邻居数量为5 knn_model = KNeighborsClassifier(n_neighbors=5) # 训练模型并预测测试集结果 knn_model.fit(X_train, y_train) y_pred = knn_model.predict(X_test) # 输出准确率和混淆矩阵等评估指标 from sklearn.metrics import accuracy_score, confusion_matrix print("Accuracy:", accuracy_score(y_test, y_pred)) print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) #$ tranalyzer2 -r sample.flow -w sample.features -t templates/plugins/ipfix-allfields.txt