127 lines
5.3 KiB
Python
127 lines
5.3 KiB
Python
import csv
|
|
import os
|
|
import pandas as pd
|
|
import _pickle as pkl
|
|
import numpy as np
|
|
|
|
|
|
def merge_csv(input_dir="C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\OW\\web",
|
|
save_filename="./result/ow_doh_features.csv", truncated_num=5, label=0):
|
|
files = os.listdir(input_dir)
|
|
# df = pd.DataFrame(columns=["features", "labels"])
|
|
frames = []
|
|
for filename in files:
|
|
if not filename.endswith(".csv"):
|
|
continue
|
|
full_filename = os.path.join(input_dir, filename)
|
|
df = pd.read_csv(full_filename)
|
|
# print(len(df))
|
|
frames.append(df)
|
|
index = 0
|
|
df = pd.concat(frames).fillna(1e10)
|
|
for row in df.iloc[:, :-1].values.tolist():
|
|
proto = row[5]
|
|
if proto != 6:
|
|
continue
|
|
features = row[7:]
|
|
if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
|
|
continue
|
|
index += 1
|
|
|
|
# print(len(df))
|
|
save_df = pd.DataFrame(columns=["features", "label"], index=range(index))
|
|
index = 0
|
|
for row in df.iloc[:, :-1].values.tolist():
|
|
proto = row[5]
|
|
if proto != 6:
|
|
continue
|
|
|
|
features = row[7:]
|
|
features = features[0:3] + features[5:13] + features[37:41] + features[15:23] + features[24:28] + features[50:51]
|
|
# print(type(features[1]))
|
|
# print(row)
|
|
# print(features,features[-1])
|
|
if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
|
|
continue
|
|
for i in range(len(features)):
|
|
feature = features[i]
|
|
if isinstance(feature, str):
|
|
# print(type(feature),feature)
|
|
features[i] = float(feature)
|
|
feature = float(feature)
|
|
if np.isnan(feature) or np.isinf(feature) or not np.isfinite(feature):
|
|
print(np.isnan(feature), np.isinf(feature), not np.isfinite(feature), feature)
|
|
print(features)
|
|
features[i] = 1e7
|
|
save_df.loc[index] = [features, label]
|
|
index += 1
|
|
# print(index)
|
|
# print(save_df)
|
|
save_df.to_csv(save_filename)
|
|
pkl_name = save_filename.replace("csv", "pkl")
|
|
f_pkl = open(pkl_name, "wb")
|
|
pkl.dump(save_df, f_pkl)
|
|
f_pkl.close()
|
|
|
|
|
|
def merge_all_pkl():
|
|
cw_doh_dataset = pkl.load(open("./result/cw_doh_features.pkl", "rb"))
|
|
cw_web_dataset = pkl.load(open("./result/cw_web_features.pkl", "rb"))
|
|
cw_file_dataset = pkl.load(open("./result/cw_file_features.pkl", "rb"))
|
|
cw_voip_dataset = pkl.load(open("./result/cw_voip_features.pkl", "rb"))
|
|
cw_chat_dataset = pkl.load(open("./result/cw_chat_features.pkl", "rb"))
|
|
cw_email_dataset = pkl.load(open("./result/cw_email_features.pkl", "rb"))
|
|
cw_streaming_dataset = pkl.load(open("./result/cw_streaming_features.pkl", "rb"))
|
|
|
|
# cw_web_dataset['label'] = cw_web_dataset['label'].map(lambda x: 1)
|
|
# cw_web_dataset.to_csv("./result/cw_web_features.pkl")
|
|
#
|
|
# cw_file_dataset['label'] = cw_file_dataset['label'].map(lambda x: 2)
|
|
# cw_file_dataset.to_csv("./result/cw_file_features.pkl")
|
|
#
|
|
# cw_voip_dataset['label'] = cw_voip_dataset['label'].map(lambda x: 3)
|
|
# cw_voip_dataset.to_csv("./result/cw_voip_features.pkl")
|
|
#
|
|
# cw_chat_dataset['label'] = cw_chat_dataset['label'].map(lambda x: 4)
|
|
# cw_chat_dataset.to_csv("./result/cw_chat_features.pkl")
|
|
#
|
|
# cw_email_dataset['label'] = cw_email_dataset['label'].map(lambda x: 5)
|
|
# cw_email_dataset.to_csv("./result/cw_email_features.pkl")
|
|
#
|
|
# cw_streaming_dataset['label'] = cw_streaming_dataset['label'].map(lambda x: 6)
|
|
# cw_streaming_dataset.to_csv("./result/cw_streaming_features.pkl")
|
|
frames = [cw_doh_dataset, cw_web_dataset, cw_chat_dataset, cw_email_dataset, cw_streaming_dataset, cw_file_dataset,
|
|
cw_voip_dataset]
|
|
df = pd.concat(frames)
|
|
save_filename = "./result/all_features.csv"
|
|
df.to_csv(save_filename)
|
|
pkl_name = save_filename.replace("csv", "pkl")
|
|
f_pkl = open(pkl_name, "wb")
|
|
pkl.dump(df, f_pkl)
|
|
f_pkl.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
input_and_output_tuple = []
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\doh", "./result/doh_features.csv"))
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\web", "./result/web_features.csv"))
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\File", "./result/file_features.csv"))
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Email", "./result/email_features.csv"))
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\VoIP", "./result/voip_features.csv"))
|
|
input_and_output_tuple.append(
|
|
("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Chat", "./result/chat_features.csv"))
|
|
|
|
label = 0
|
|
for input_dir, save_filename in input_and_output_tuple:
|
|
print(input_dir)
|
|
print(save_filename)
|
|
truncated_num = 5
|
|
merge_csv(input_dir, save_filename, truncated_num, label=label)
|
|
label += 1
|
|
# merge_all_pkl()
|