61 lines
1.7 KiB
Python
61 lines
1.7 KiB
Python
|
|
import pandas as pd
|
||
|
|
# 检查testday中.1和.4数据包的数量
|
||
|
|
# -*- coding : utf-8-*-
|
||
|
|
import pandas as pd
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
|
||
|
|
|
||
|
|
DOS2019_FLOWS = {'attackers': ['172.16.0.5'],
|
||
|
|
'victims': ['192.168.50.1', '192.168.50.4']}
|
||
|
|
|
||
|
|
input_dir = "cicddos2019/input"
|
||
|
|
|
||
|
|
feature_latitude = 40 + 1 # 第一个元素是label
|
||
|
|
|
||
|
|
|
||
|
|
def get_label(srcIP, dstIP):
|
||
|
|
if (srcIP == "172.16.0.5" and dstIP == "192.168.50.1") or (srcIP == "192.168.50.1" and dstIP == "172.16.0.5"):
|
||
|
|
return 1
|
||
|
|
elif (srcIP == "172.16.0.5" and dstIP == "192.168.50.4") or (srcIP == "192.168.50.4" and dstIP == "172.16.0.5"):
|
||
|
|
return 4
|
||
|
|
else:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
def pre_process(dir):
|
||
|
|
number_1 = 0
|
||
|
|
number_4 = 0
|
||
|
|
files = os.listdir(dir)
|
||
|
|
files.sort(key=lambda x: int(x[0:1]))
|
||
|
|
for filename in files:
|
||
|
|
input_path = dir+"/"+filename
|
||
|
|
df = pd.read_csv(input_path, encoding='ISO-8859-1')
|
||
|
|
print("------------")
|
||
|
|
print("processing file: {} ".format(input_path))
|
||
|
|
for index, row in df.iterrows():
|
||
|
|
try:
|
||
|
|
srcIP = str(row["Source"])
|
||
|
|
dstIP = str(row["Destination"])
|
||
|
|
except:
|
||
|
|
print(srcIP)
|
||
|
|
print(dstIP)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if not re.match(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$", srcIP) or not re.match(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$", srcIP):
|
||
|
|
continue
|
||
|
|
# get label
|
||
|
|
label = get_label(srcIP, dstIP)
|
||
|
|
if label == 1:
|
||
|
|
number_1+=1
|
||
|
|
elif label==4:
|
||
|
|
number_4+=1
|
||
|
|
else:
|
||
|
|
continue
|
||
|
|
|
||
|
|
print("number_1:{},number_4:{}".format(number_1,number_4))
|
||
|
|
# 结果 0,60294326
|
||
|
|
return
|
||
|
|
|
||
|
|
|
||
|
|
pre_process(input_dir)
|