This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
wujiating-detection/cicflow.py
nanwct 8165bf52b6 abc
2022-05-19 14:40:58 +08:00

127 lines
5.3 KiB
Python

import csv
import os
import pandas as pd
import _pickle as pkl
import numpy as np
def merge_csv(input_dir="C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\OW\\web",
save_filename="./result/ow_doh_features.csv", truncated_num=5, label=0):
files = os.listdir(input_dir)
# df = pd.DataFrame(columns=["features", "labels"])
frames = []
for filename in files:
if not filename.endswith(".csv"):
continue
full_filename = os.path.join(input_dir, filename)
df = pd.read_csv(full_filename)
# print(len(df))
frames.append(df)
index = 0
df = pd.concat(frames).fillna(1e10)
for row in df.iloc[:, :-1].values.tolist():
proto = row[5]
if proto != 6:
continue
features = row[7:]
if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
continue
index += 1
# print(len(df))
save_df = pd.DataFrame(columns=["features", "label"], index=range(index))
index = 0
for row in df.iloc[:, :-1].values.tolist():
proto = row[5]
if proto != 6:
continue
features = row[7:]
features = features[0:3] + features[5:13] + features[37:41] + features[15:23] + features[24:28] + features[50:51]
# print(type(features[1]))
# print(row)
# print(features,features[-1])
if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
continue
for i in range(len(features)):
feature = features[i]
if isinstance(feature, str):
# print(type(feature),feature)
features[i] = float(feature)
feature = float(feature)
if np.isnan(feature) or np.isinf(feature) or not np.isfinite(feature):
print(np.isnan(feature), np.isinf(feature), not np.isfinite(feature), feature)
print(features)
features[i] = 1e7
save_df.loc[index] = [features, label]
index += 1
# print(index)
# print(save_df)
save_df.to_csv(save_filename)
pkl_name = save_filename.replace("csv", "pkl")
f_pkl = open(pkl_name, "wb")
pkl.dump(save_df, f_pkl)
f_pkl.close()
def merge_all_pkl():
cw_doh_dataset = pkl.load(open("./result/cw_doh_features.pkl", "rb"))
cw_web_dataset = pkl.load(open("./result/cw_web_features.pkl", "rb"))
cw_file_dataset = pkl.load(open("./result/cw_file_features.pkl", "rb"))
cw_voip_dataset = pkl.load(open("./result/cw_voip_features.pkl", "rb"))
cw_chat_dataset = pkl.load(open("./result/cw_chat_features.pkl", "rb"))
cw_email_dataset = pkl.load(open("./result/cw_email_features.pkl", "rb"))
cw_streaming_dataset = pkl.load(open("./result/cw_streaming_features.pkl", "rb"))
# cw_web_dataset['label'] = cw_web_dataset['label'].map(lambda x: 1)
# cw_web_dataset.to_csv("./result/cw_web_features.pkl")
#
# cw_file_dataset['label'] = cw_file_dataset['label'].map(lambda x: 2)
# cw_file_dataset.to_csv("./result/cw_file_features.pkl")
#
# cw_voip_dataset['label'] = cw_voip_dataset['label'].map(lambda x: 3)
# cw_voip_dataset.to_csv("./result/cw_voip_features.pkl")
#
# cw_chat_dataset['label'] = cw_chat_dataset['label'].map(lambda x: 4)
# cw_chat_dataset.to_csv("./result/cw_chat_features.pkl")
#
# cw_email_dataset['label'] = cw_email_dataset['label'].map(lambda x: 5)
# cw_email_dataset.to_csv("./result/cw_email_features.pkl")
#
# cw_streaming_dataset['label'] = cw_streaming_dataset['label'].map(lambda x: 6)
# cw_streaming_dataset.to_csv("./result/cw_streaming_features.pkl")
frames = [cw_doh_dataset, cw_web_dataset, cw_chat_dataset, cw_email_dataset, cw_streaming_dataset, cw_file_dataset,
cw_voip_dataset]
df = pd.concat(frames)
save_filename = "./result/all_features.csv"
df.to_csv(save_filename)
pkl_name = save_filename.replace("csv", "pkl")
f_pkl = open(pkl_name, "wb")
pkl.dump(df, f_pkl)
f_pkl.close()
if __name__ == '__main__':
input_and_output_tuple = []
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\doh", "./result/doh_features.csv"))
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\web", "./result/web_features.csv"))
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\File", "./result/file_features.csv"))
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Email", "./result/email_features.csv"))
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\VoIP", "./result/voip_features.csv"))
input_and_output_tuple.append(
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Chat", "./result/chat_features.csv"))
label = 0
for input_dir, save_filename in input_and_output_tuple:
print(input_dir)
print(save_filename)
truncated_num = 5
merge_csv(input_dir, save_filename, truncated_num, label=label)
label += 1
# merge_all_pkl()