This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
grityu-model-duplication/test2.py
2023-03-16 22:42:35 +08:00

49 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Name:fang xiaoyu
# Time: 2023/3/10 09:17
import os
import json
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
# 定义Tranalyzer2命令和特征提取命令
tranalyzer_cmd = "t2 -r {} -w {} -t"
feature_cmd = "t2 -r {} --bidir --tcp --protoid --statsonly --export json"
# 定义pcap文件路径和输出文件路径
pcap_file = "20230309_fxy_psiphon_operation.pcapng"
binetflow_file = "capture.binetflow"
# 转换pcap文件为binetflow格式
os.system(tranalyzer_cmd.format(pcap_file, binetflow_file))
# 提取特征并保存到json文件中
os.system(feature_cmd.format(binetflow_file) + " > features.json")
# 读取json文件中的特征数据并转换为DataFrame格式
with open("features.json", "r") as f:
data = json.load(f)
df = pd.DataFrame(data)
# 将标签列转换为数值类型0或1
df["label"] = df["label"].apply(lambda x: 0 if x == "normal" else 1)
# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df.drop("label", axis=1), df["label"], test_size=0.2)
# 创建KNN分类器对象设置邻居数量为5
knn_model = KNeighborsClassifier(n_neighbors=5)
# 训练模型并预测测试集结果
knn_model.fit(X_train, y_train)
y_pred = knn_model.predict(X_test)
# 输出准确率和混淆矩阵等评估指标
from sklearn.metrics import accuracy_score, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
#$ tranalyzer2 -r sample.flow -w sample.features -t templates/plugins/ipfix-allfields.txt