diff --git a/test_2.py b/test_2.py deleted file mode 100644 index 78c8772..0000000 --- a/test_2.py +++ /dev/null @@ -1,167 +0,0 @@ -# # Name:fang xiaoyu -# # Time: 2023/3/10 23:53 -# # 导入所需库 -# import pandas as pd -# from sklearn.neighbors import KNeighborsClassifier -# from sklearn.model_selection import train_test_split -# from subprocess import run -# -# # 使用Tranalyzer2分析PCAP文件,并提取TCP流量特征 -# def extract_features(pcap_file): -# # 定义Tranalyzer2命令行参数 -# tranalyzer_args = [ -# "t2build", "-x", "tcp", "--no-tests", "--no-progress", -# "--tcp-fields", "sip dip sport dport tcp_flags tcp_flags_str bytes", -# "--histo", "sip dip sport dport", "--top", "sip dip sport dport bytes", -# "--both-ways", "--export", "csv", "-w", "-" -# ] -# # 运行Tranalyzer2命令行,并将结果保存为CSV文件 -# result = run(["sudo", "tshark", "-r", pcap_file, "-w", "-", "-F", "pcapng", "-Y", "tcp"], -# stdout=PIPE, check=True) -# result = run(["sudo", "tranalyzer2", "-r", "-", *tranalyzer_args], input=result.stdout, stdout=PIPE, check=True) -# result_csv = result.stdout.decode() -# # 解析CSV文件,并返回特征数据框 -# features_df = pd.read_csv(pd.compat.StringIO(result_csv)) -# return features_df -# -# # 加载训练数据集,并提取特征 -# train_pcap = "20230309_fxy_psiphon_operation.pcapng" -# train_labels = pd.read_csv("/path/to/train_labels.csv") -# train_features = extract_features(train_pcap) -# -# # 将标签与特征合并为单个数据框 -# train_data = pd.merge(train_features, train_labels, on="flow_key") -# -# # 分割数据集为训练集和测试集 -# X_train, X_test, y_train, y_test = train_test_split(train_data.drop(["flow_key", "label"], axis=1), train_data["label"], test_size=0.3) -# -# # 训练KNN模型 -# knn = KNeighborsClassifier(n_neighbors=5) -# knn.fit(X_train, y_train) -# -# # 在测试集上评估模型性能 -# accuracy = knn.score(X_test, y_test) -# print("Accuracy:", accuracy) -# -# - -''' -import subprocess -import pandas as pd -import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn.metrics import accuracy_score - -# Step 1: Install Tranalyzer2 and required Python modules - -# Step 2: Extract features using Tranalyzer2 -pcap_file = '20230309_fxy_psiphon_operation.pcapng' -output_file = 'output.csv' -command = f'sudo t2 -r {pcap_file} -w {output_file} -c basic' -subprocess.call(command, shell=True) - -# Step 3: Load features into a Pandas dataframe and convert to NumPy array -data = pd.read_csv(output_file) -features = np.array(data) - -# Step 4: Prepare the dataset -X = features[:, :-1] -y = features[:, -1] - -le = LabelEncoder() -y = le.fit_transform(y) - -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) - -# Step 5: Train the KNN model -knn = KNeighborsClassifier(n_neighbors=5) -knn.fit(X_train, y_train) - -# Step 6: Evaluate the model -y_pred = knn.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) - -print("Accuracy:", accuracy) - -''' - -import pandas as pd -import numpy as np -import os -import glob -import subprocess -from sklearn.model_selection import cross_val_score, train_test_split -from sklearn.svm import SVC -from sklearn.metrics import accuracy_score - - -# 提取流量特征 -def extract_features(pcap_file): - # 运行tranalyzer2命令行工具 - command = "t2 -r {pcap_file} -w {pcap_file}.csv -f features.csv" - subprocess.run(command, shell=True) - - # 读取特征数据 - features = pd.read_csv('features.csv', skiprows=6, header=None, delimiter=';', index_col=0) - - # 删除无用列 - features.drop([1, 2, 3, 4, 5], axis=1, inplace=True) - - # 重命名列名 - features.columns = ['duration', 'protocol', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'packets', 'bytes', 'flows', - 'flags', 'tos', 'class'] - - # 删除class列,因为我们不需要使用它 - features.drop(['class'], axis=1, inplace=True) - - return features - - -# 获取所有pcap文件 -# pcap_files = [] -# for file in os.listdir('.'): -# if file.endswith('.pcap'): -# pcap_files.append(file) - -folder_path = "wcx-抓包-用于模型复现" -pcap_files = [] -for file in glob.glob(os.path.join(folder_path, "*.pcap")): - pcap_files.append(file) - -# 提取所有pcap文件的特征 -features_list = [] -for pcap_file in pcap_files: - features = extract_features(pcap_file) - features_list.append(features) - -# 将所有特征合并成一个DataFrame -all_features = pd.concat(features_list) - -# 标准化特征数据 -mean = all_features.mean() -std = all_features.std() -normalized_features = (all_features - mean) / std - -# 将标准化后的特征数据和SVM模型拟合 -X_train, X_test, y_train, y_test = train_test_split(normalized_features.values, np.zeros(len(normalized_features)), - test_size=0.2, random_state=42) - -clf = SVC() - -# 使用交叉验证来评估模型性能 -scores = cross_val_score(clf, X_train, y_train, cv=5) -print(f"Cross Validation Scores: {scores}") -print(f"Mean Score: {np.mean(scores)}") -print(f"Std Score: {np.std(scores)}") - -# 在测试集上测试并计算准确率 -clf.fit(X_train, y_train) -y_pred = clf.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) -print(f"Accuracy: {accuracy}") - - - -