77 lines
2.5 KiB
Python
77 lines
2.5 KiB
Python
import sys
|
|
import os
|
|
import json
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
|
|
|
def label_dict_build(date):
|
|
example_label_file = PREFIX_DIR + 'DataSet/result/' + date + '/stream_tag.txt'
|
|
example_label_df = pd.read_table(example_label_file, sep='\s+', header=None)
|
|
example_label_df[3] = 443
|
|
example_label = {tuple(example_label_df.iloc[i,0:4].values):example_label_df.iloc[i,4] for i in example_label_df.index}
|
|
return example_label
|
|
|
|
app_cert = dict()
|
|
|
|
def main():
|
|
date = sys.argv[1]
|
|
example_label = label_dict_build(date)
|
|
#print(example_label)
|
|
row_count = 1771
|
|
cloumn_count = 25
|
|
example_json_file = PREFIX_DIR + 'DataSet/result/' + date + '/ssl_stat.txt'
|
|
example_json_f = open(example_json_file, 'r')
|
|
array_shape = (row_count, cloumn_count)
|
|
result_data = np.zeros(array_shape)
|
|
result_label = list()
|
|
i = 0
|
|
for line in example_json_f.readlines():
|
|
example_json = json.loads(line)
|
|
#标签
|
|
try:
|
|
flow_key = (example_json['sip'], example_json['sport'], example_json['dip'], example_json['dport'])
|
|
label = example_label[flow_key]
|
|
except Exception:
|
|
#traceback.print_exc()
|
|
continue
|
|
#专家特征
|
|
result_label.append(label)
|
|
san_count = 0
|
|
if 'san' in example_json.keys():
|
|
san = example_json['san']
|
|
san_count = len(san.split(';'))
|
|
cert_count = example_json['Cert']['cert_count']
|
|
'''
|
|
cert_len_str = ''
|
|
for cert in example_json['Cert']['cert_list']:
|
|
cert_len_str += (str(cert['length']) + ',')
|
|
if label not in app_cert.keys():
|
|
app_cert[label] = set()
|
|
app_cert[label].add(cert_len_str)
|
|
'''
|
|
if label not in app_cert.keys():
|
|
app_cert[label] = set()
|
|
app_cert[label].add(san_count)
|
|
#result_data[i,:] = result
|
|
i += 1
|
|
|
|
print(i)
|
|
for k, v in app_cert.items():
|
|
print(k)
|
|
print(v)
|
|
|
|
'''
|
|
print('row = ' + str(row_count))
|
|
print("result_label = " + str(len(result_label)))
|
|
base_head = ['cert_count', 'cert_len', 'san_len', 's2c_pkts']
|
|
header = base_head
|
|
result_df = pd.DataFrame(result_data, columns=header)
|
|
result_df['label'] = np.array(result_label)
|
|
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
|
result_df.to_csv(example_csv_file, index=False)
|
|
'''
|
|
|
|
if __name__ == '__main__':
|
|
main() |