Delete test_2.py

2023-03-16 15:00:55 +00:00
parent 8a0493110b
commit 9e686da745
1 changed files with 0 additions and 167 deletions
--- a/test_2.py
+++ b/test_2.py
@@ -1,167 +0,0 @@
-# # Name:fang xiaoyu
-# # Time: 2023/3/10 23:53
-# # 导入所需库
-# import pandas as pd
-# from sklearn.neighbors import KNeighborsClassifier
-# from sklearn.model_selection import train_test_split
-# from subprocess import run
-#
-# # 使用Tranalyzer2分析PCAP文件，并提取TCP流量特征
-# def extract_features(pcap_file):
-#     # 定义Tranalyzer2命令行参数
-#     tranalyzer_args = [
-#         "t2build", "-x", "tcp", "--no-tests", "--no-progress",
-#         "--tcp-fields", "sip dip sport dport tcp_flags tcp_flags_str bytes",
-#         "--histo", "sip dip sport dport", "--top", "sip dip sport dport bytes",
-#         "--both-ways", "--export", "csv", "-w", "-"
-#     ]
-#     # 运行Tranalyzer2命令行，并将结果保存为CSV文件
-#     result = run(["sudo", "tshark", "-r", pcap_file, "-w", "-", "-F", "pcapng", "-Y", "tcp"],
-#                  stdout=PIPE, check=True)
-#     result = run(["sudo", "tranalyzer2", "-r", "-", *tranalyzer_args], input=result.stdout, stdout=PIPE, check=True)
-#     result_csv = result.stdout.decode()
-#     # 解析CSV文件，并返回特征数据框
-#     features_df = pd.read_csv(pd.compat.StringIO(result_csv))
-#     return features_df
-#
-# # 加载训练数据集，并提取特征
-# train_pcap = "20230309_fxy_psiphon_operation.pcapng"
-# train_labels = pd.read_csv("/path/to/train_labels.csv")
-# train_features = extract_features(train_pcap)
-#
-# # 将标签与特征合并为单个数据框
-# train_data = pd.merge(train_features, train_labels, on="flow_key")
-#
-# # 分割数据集为训练集和测试集
-# X_train, X_test, y_train, y_test = train_test_split(train_data.drop(["flow_key", "label"], axis=1), train_data["label"], test_size=0.3)
-#
-# # 训练KNN模型
-# knn = KNeighborsClassifier(n_neighbors=5)
-# knn.fit(X_train, y_train)
-#
-# # 在测试集上评估模型性能
-# accuracy = knn.score(X_test, y_test)
-# print("Accuracy:", accuracy)
-#
-#
-
-'''
-import subprocess
-import pandas as pd
-import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.metrics import accuracy_score
-
-# Step 1: Install Tranalyzer2 and required Python modules
-
-# Step 2: Extract features using Tranalyzer2
-pcap_file = '20230309_fxy_psiphon_operation.pcapng'
-output_file = 'output.csv'
-command = f'sudo t2 -r {pcap_file} -w {output_file} -c basic'
-subprocess.call(command, shell=True)
-
-# Step 3: Load features into a Pandas dataframe and convert to NumPy array
-data = pd.read_csv(output_file)
-features = np.array(data)
-
-# Step 4: Prepare the dataset
-X = features[:, :-1]
-y = features[:, -1]
-
-le = LabelEncoder()
-y = le.fit_transform(y)
-
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
-
-# Step 5: Train the KNN model
-knn = KNeighborsClassifier(n_neighbors=5)
-knn.fit(X_train, y_train)
-
-# Step 6: Evaluate the model
-y_pred = knn.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-
-print("Accuracy:", accuracy)
-
-'''
-
-import pandas as pd
-import numpy as np
-import os
-import glob
-import subprocess
-from sklearn.model_selection import cross_val_score, train_test_split
-from sklearn.svm import SVC
-from sklearn.metrics import accuracy_score
-
-
-# 提取流量特征
-def extract_features(pcap_file):
-    # 运行tranalyzer2命令行工具
-    command =  "t2 -r {pcap_file} -w {pcap_file}.csv -f features.csv"
-    subprocess.run(command, shell=True)
-
-    # 读取特征数据
-    features = pd.read_csv('features.csv', skiprows=6, header=None, delimiter=';', index_col=0)
-
-    # 删除无用列
-    features.drop([1, 2, 3, 4, 5], axis=1, inplace=True)
-
-    # 重命名列名
-    features.columns = ['duration', 'protocol', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'packets', 'bytes', 'flows',
-                        'flags', 'tos', 'class']
-
-    # 删除class列，因为我们不需要使用它
-    features.drop(['class'], axis=1, inplace=True)
-
-    return features
-
-
-# 获取所有pcap文件
-# pcap_files = []
-# for file in os.listdir('.'):
-#     if file.endswith('.pcap'):
-#         pcap_files.append(file)
-
-folder_path = "wcx-抓包-用于模型复现"
-pcap_files = []
-for file in glob.glob(os.path.join(folder_path, "*.pcap")):
-    pcap_files.append(file)
-
-# 提取所有pcap文件的特征
-features_list = []
-for pcap_file in pcap_files:
-    features = extract_features(pcap_file)
-    features_list.append(features)
-
-# 将所有特征合并成一个DataFrame
-all_features = pd.concat(features_list)
-
-# 标准化特征数据
-mean = all_features.mean()
-std = all_features.std()
-normalized_features = (all_features - mean) / std
-
-# 将标准化后的特征数据和SVM模型拟合
-X_train, X_test, y_train, y_test = train_test_split(normalized_features.values, np.zeros(len(normalized_features)),
-                                                    test_size=0.2, random_state=42)
-
-clf = SVC()
-
-# 使用交叉验证来评估模型性能
-scores = cross_val_score(clf, X_train, y_train, cv=5)
-print(f"Cross Validation Scores: {scores}")
-print(f"Mean Score: {np.mean(scores)}")
-print(f"Std Score: {np.std(scores)}")
-
-# 在测试集上测试并计算准确率
-clf.fit(X_train, y_train)
-y_pred = clf.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-print(f"Accuracy: {accuracy}")
-
-
-
-