This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
dengzeyi-sequenceshield/代码/sequenceShield/cicdos2017/script/parse_meta.py
2022-11-21 12:08:58 +08:00

51 lines
1.3 KiB
Python

import pandas as pd
# 将meta文件转换为模型可以直接使用的label文件
meta_csv_path = "cicdos2017/dataset/meta.csv"
label_csv_path = "cicdos2017/label/label.csv"
df = pd.read_csv(meta_csv_path)
mixed_row_num =0
high_row_num = 0
low_row_num = 0
normal_row_num =0
label_ls =[]
for index, row in df.iterrows():
if index % 20000 == 0:
print("processing index:",index)
high_num = int(row[0])
low_num = int(row[1])
normal_num = int(row[2])
features = row[3:43]
label = "normal"
if low_num==0 and normal_num==0:
label = "high"
high_row_num +=1
elif high_num==0 and normal_num==0:
label = "low"
low_row_num+=1
elif high_num==0 and low_num==0:
label = "normal"
normal_row_num+=1
else:
mixed_row_num+=1
#print("high,low,normal:",high_num,low_num,normal_num)
continue #混合的row直接丢弃
label_row =[label]
label_row.extend(features)
label_ls.append(label_row)
print("mix row num:",mixed_row_num)
print("high row num:",high_row_num)
print("low row num:",low_row_num)
print("normal row num:",normal_row_num)
# write to csv file
data = pd.DataFrame(data=label_ls)
# print(total_data)
data.to_csv(label_csv_path, index=False,
encoding="utf-8", sep=',', mode='w', header=True)