51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
import pandas as pd
|
|
|
|
# 将meta文件转换为模型可以直接使用的label文件
|
|
meta_csv_path = "cicdos2017/dataset/meta.csv"
|
|
label_csv_path = "cicdos2017/label/label.csv"
|
|
|
|
df = pd.read_csv(meta_csv_path)
|
|
mixed_row_num =0
|
|
high_row_num = 0
|
|
low_row_num = 0
|
|
normal_row_num =0
|
|
|
|
label_ls =[]
|
|
for index, row in df.iterrows():
|
|
if index % 20000 == 0:
|
|
print("processing index:",index)
|
|
high_num = int(row[0])
|
|
low_num = int(row[1])
|
|
normal_num = int(row[2])
|
|
|
|
features = row[3:43]
|
|
|
|
label = "normal"
|
|
if low_num==0 and normal_num==0:
|
|
label = "high"
|
|
high_row_num +=1
|
|
elif high_num==0 and normal_num==0:
|
|
label = "low"
|
|
low_row_num+=1
|
|
elif high_num==0 and low_num==0:
|
|
label = "normal"
|
|
normal_row_num+=1
|
|
else:
|
|
mixed_row_num+=1
|
|
#print("high,low,normal:",high_num,low_num,normal_num)
|
|
continue #混合的row直接丢弃
|
|
label_row =[label]
|
|
label_row.extend(features)
|
|
label_ls.append(label_row)
|
|
|
|
print("mix row num:",mixed_row_num)
|
|
print("high row num:",high_row_num)
|
|
print("low row num:",low_row_num)
|
|
print("normal row num:",normal_row_num)
|
|
|
|
|
|
# write to csv file
|
|
data = pd.DataFrame(data=label_ls)
|
|
# print(total_data)
|
|
data.to_csv(label_csv_path, index=False,
|
|
encoding="utf-8", sep=',', mode='w', header=True) |