import pandas as pd # 检查testday中.1和.4数据包的数量 # -*- coding : utf-8-*- import pandas as pd import os import re DOS2019_FLOWS = {'attackers': ['172.16.0.5'], 'victims': ['192.168.50.1', '192.168.50.4']} input_dir = "cicddos2019/input" feature_latitude = 40 + 1 # 第一个元素是label def get_label(srcIP, dstIP): if (srcIP == "172.16.0.5" and dstIP == "192.168.50.1") or (srcIP == "192.168.50.1" and dstIP == "172.16.0.5"): return 1 elif (srcIP == "172.16.0.5" and dstIP == "192.168.50.4") or (srcIP == "192.168.50.4" and dstIP == "172.16.0.5"): return 4 else: return 0 def pre_process(dir): number_1 = 0 number_4 = 0 files = os.listdir(dir) files.sort(key=lambda x: int(x[0:1])) for filename in files: input_path = dir+"/"+filename df = pd.read_csv(input_path, encoding='ISO-8859-1') print("------------") print("processing file: {} ".format(input_path)) for index, row in df.iterrows(): try: srcIP = str(row["Source"]) dstIP = str(row["Destination"]) except: print(srcIP) print(dstIP) continue if not re.match(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$", srcIP) or not re.match(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$", srcIP): continue # get label label = get_label(srcIP, dstIP) if label == 1: number_1+=1 elif label==4: number_4+=1 else: continue print("number_1:{},number_4:{}".format(number_1,number_4)) # 结果 0,60294326 return pre_process(input_dir)