grityu-model-duplication/test2.py

# Name:fang xiaoyu
# Time: 2023/3/10 09:17
import os
import json
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

# 定义Tranalyzer2命令和特征提取命令
tranalyzer_cmd = "t2 -r {} -w {} -t"
feature_cmd = "t2 -r {} --bidir --tcp --protoid --statsonly --export json"

# 定义pcap文件路径和输出文件路径
pcap_file = "20230309_fxy_psiphon_operation.pcapng"
binetflow_file = "capture.binetflow"

# 转换pcap文件为binetflow格式
os.system(tranalyzer_cmd.format(pcap_file, binetflow_file))

# 提取特征并保存到json文件中
os.system(feature_cmd.format(binetflow_file) + " > features.json")

# 读取json文件中的特征数据并转换为DataFrame格式
with open("features.json", "r") as f:
    data = json.load(f)
df = pd.DataFrame(data)

# 将标签列转换为数值类型（0或1）
df["label"] = df["label"].apply(lambda x: 0 if x == "normal" else 1)

# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df.drop("label", axis=1), df["label"], test_size=0.2)

# 创建KNN分类器对象，设置邻居数量为5
knn_model = KNeighborsClassifier(n_neighbors=5)

# 训练模型并预测测试集结果
knn_model.fit(X_train, y_train)
y_pred = knn_model.predict(X_test)

# 输出准确率和混淆矩阵等评估指标
from sklearn.metrics import accuracy_score, confusion_matrix

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


#$ tranalyzer2 -r sample.flow -w sample.features -t templates/plugins/ipfix-allfields.txt