Delete test_2.py
This commit is contained in:
167
test_2.py
167
test_2.py
@@ -1,167 +0,0 @@
|
||||
# # Name:fang xiaoyu
|
||||
# # Time: 2023/3/10 23:53
|
||||
# # 导入所需库
|
||||
# import pandas as pd
|
||||
# from sklearn.neighbors import KNeighborsClassifier
|
||||
# from sklearn.model_selection import train_test_split
|
||||
# from subprocess import run
|
||||
#
|
||||
# # 使用Tranalyzer2分析PCAP文件,并提取TCP流量特征
|
||||
# def extract_features(pcap_file):
|
||||
# # 定义Tranalyzer2命令行参数
|
||||
# tranalyzer_args = [
|
||||
# "t2build", "-x", "tcp", "--no-tests", "--no-progress",
|
||||
# "--tcp-fields", "sip dip sport dport tcp_flags tcp_flags_str bytes",
|
||||
# "--histo", "sip dip sport dport", "--top", "sip dip sport dport bytes",
|
||||
# "--both-ways", "--export", "csv", "-w", "-"
|
||||
# ]
|
||||
# # 运行Tranalyzer2命令行,并将结果保存为CSV文件
|
||||
# result = run(["sudo", "tshark", "-r", pcap_file, "-w", "-", "-F", "pcapng", "-Y", "tcp"],
|
||||
# stdout=PIPE, check=True)
|
||||
# result = run(["sudo", "tranalyzer2", "-r", "-", *tranalyzer_args], input=result.stdout, stdout=PIPE, check=True)
|
||||
# result_csv = result.stdout.decode()
|
||||
# # 解析CSV文件,并返回特征数据框
|
||||
# features_df = pd.read_csv(pd.compat.StringIO(result_csv))
|
||||
# return features_df
|
||||
#
|
||||
# # 加载训练数据集,并提取特征
|
||||
# train_pcap = "20230309_fxy_psiphon_operation.pcapng"
|
||||
# train_labels = pd.read_csv("/path/to/train_labels.csv")
|
||||
# train_features = extract_features(train_pcap)
|
||||
#
|
||||
# # 将标签与特征合并为单个数据框
|
||||
# train_data = pd.merge(train_features, train_labels, on="flow_key")
|
||||
#
|
||||
# # 分割数据集为训练集和测试集
|
||||
# X_train, X_test, y_train, y_test = train_test_split(train_data.drop(["flow_key", "label"], axis=1), train_data["label"], test_size=0.3)
|
||||
#
|
||||
# # 训练KNN模型
|
||||
# knn = KNeighborsClassifier(n_neighbors=5)
|
||||
# knn.fit(X_train, y_train)
|
||||
#
|
||||
# # 在测试集上评估模型性能
|
||||
# accuracy = knn.score(X_test, y_test)
|
||||
# print("Accuracy:", accuracy)
|
||||
#
|
||||
#
|
||||
|
||||
'''
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
# Step 1: Install Tranalyzer2 and required Python modules
|
||||
|
||||
# Step 2: Extract features using Tranalyzer2
|
||||
pcap_file = '20230309_fxy_psiphon_operation.pcapng'
|
||||
output_file = 'output.csv'
|
||||
command = f'sudo t2 -r {pcap_file} -w {output_file} -c basic'
|
||||
subprocess.call(command, shell=True)
|
||||
|
||||
# Step 3: Load features into a Pandas dataframe and convert to NumPy array
|
||||
data = pd.read_csv(output_file)
|
||||
features = np.array(data)
|
||||
|
||||
# Step 4: Prepare the dataset
|
||||
X = features[:, :-1]
|
||||
y = features[:, -1]
|
||||
|
||||
le = LabelEncoder()
|
||||
y = le.fit_transform(y)
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
|
||||
|
||||
# Step 5: Train the KNN model
|
||||
knn = KNeighborsClassifier(n_neighbors=5)
|
||||
knn.fit(X_train, y_train)
|
||||
|
||||
# Step 6: Evaluate the model
|
||||
y_pred = knn.predict(X_test)
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
|
||||
print("Accuracy:", accuracy)
|
||||
|
||||
'''
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import subprocess
|
||||
from sklearn.model_selection import cross_val_score, train_test_split
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
# 提取流量特征
|
||||
def extract_features(pcap_file):
|
||||
# 运行tranalyzer2命令行工具
|
||||
command = "t2 -r {pcap_file} -w {pcap_file}.csv -f features.csv"
|
||||
subprocess.run(command, shell=True)
|
||||
|
||||
# 读取特征数据
|
||||
features = pd.read_csv('features.csv', skiprows=6, header=None, delimiter=';', index_col=0)
|
||||
|
||||
# 删除无用列
|
||||
features.drop([1, 2, 3, 4, 5], axis=1, inplace=True)
|
||||
|
||||
# 重命名列名
|
||||
features.columns = ['duration', 'protocol', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'packets', 'bytes', 'flows',
|
||||
'flags', 'tos', 'class']
|
||||
|
||||
# 删除class列,因为我们不需要使用它
|
||||
features.drop(['class'], axis=1, inplace=True)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
# 获取所有pcap文件
|
||||
# pcap_files = []
|
||||
# for file in os.listdir('.'):
|
||||
# if file.endswith('.pcap'):
|
||||
# pcap_files.append(file)
|
||||
|
||||
folder_path = "wcx-抓包-用于模型复现"
|
||||
pcap_files = []
|
||||
for file in glob.glob(os.path.join(folder_path, "*.pcap")):
|
||||
pcap_files.append(file)
|
||||
|
||||
# 提取所有pcap文件的特征
|
||||
features_list = []
|
||||
for pcap_file in pcap_files:
|
||||
features = extract_features(pcap_file)
|
||||
features_list.append(features)
|
||||
|
||||
# 将所有特征合并成一个DataFrame
|
||||
all_features = pd.concat(features_list)
|
||||
|
||||
# 标准化特征数据
|
||||
mean = all_features.mean()
|
||||
std = all_features.std()
|
||||
normalized_features = (all_features - mean) / std
|
||||
|
||||
# 将标准化后的特征数据和SVM模型拟合
|
||||
X_train, X_test, y_train, y_test = train_test_split(normalized_features.values, np.zeros(len(normalized_features)),
|
||||
test_size=0.2, random_state=42)
|
||||
|
||||
clf = SVC()
|
||||
|
||||
# 使用交叉验证来评估模型性能
|
||||
scores = cross_val_score(clf, X_train, y_train, cv=5)
|
||||
print(f"Cross Validation Scores: {scores}")
|
||||
print(f"Mean Score: {np.mean(scores)}")
|
||||
print(f"Std Score: {np.std(scores)}")
|
||||
|
||||
# 在测试集上测试并计算准确率
|
||||
clf.fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print(f"Accuracy: {accuracy}")
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user