引入FQDN Engine,以支持FQDN Plugin。

This commit is contained in:
zhengchao
2020-09-17 15:20:26 +08:00
parent 34de556665
commit 58daab14ad
8 changed files with 512 additions and 2 deletions

343
src/entry/FQDN_engine.cpp Normal file
View File

@@ -0,0 +1,343 @@
/*
*
* Copyright (c) 2020
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2020-09-01
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "FQDN_engine.h"
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/*************************************************************************************/
#include <nmmintrin.h>
#define popcnt_u64 _mm_popcnt_u64
#define FOR(i, n) for(int i=0, _n=(int)(n); i<_n; i++)
struct packedRT_t
{
unsigned long long bitmap[4];
unsigned int A;
unsigned char B[4];
};
void * aligned_malloc(size_t size, size_t align)
{
void * malloc_ptr;
void * aligned_ptr;
/* Error if align is not a power of two. */
if (align & (align - 1))
{
return ((void*) 0);
}
if (align==0 || size == 0)
{
return ((void *) 0);
}
malloc_ptr = malloc (sizeof(void *) + align - 1 + size);
if (!malloc_ptr)
{
return ((void *) 0);
}
aligned_ptr = (void *) (((size_t)malloc_ptr + sizeof(void *) + align-1) & ~(align-1));
((void **) aligned_ptr) [-1] = malloc_ptr;
return aligned_ptr;
}
void aligned_free(void * aligned_ptr)
{
if (aligned_ptr)
{
free (((void **) aligned_ptr) [-1]);
}
}
/*************************************************************************************/
struct domain_impl_t
{
unsigned int id;
int suf_match;
unsigned int len;
unsigned long long hash; /*<2A><>64λ<34><CEBB>ϣֵΨһ<CEA8><D2BB>ʾһ<CABE><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
domain_impl_t * next;
void * utag;
};
class CHashTrieFQDN
{
public:
CHashTrieFQDN();
~CHashTrieFQDN();
int initialize(const struct FQDN_rule * rules, size_t n_rule);
int search(const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result);
protected:
unsigned int rank(unsigned int h);
protected:
unsigned int m_num;
domain_impl_t * m_domains;
unsigned int m_H;
unsigned int m_max_pat_len;
unsigned long long * m_B;
packedRT_t * m_RT;
domain_impl_t ** m_matched;
unsigned char m_case_tab[256];
};
const unsigned long long ONE=1;
#define is_bit_set(tbl, off) ( tbl[off>>6] & ( ONE<<(off&63) ) )
#define set_bit(tbl, off) ( tbl[off>>6] |= ( ONE<<(off&63) ) )
const unsigned long long A=6364136223846793005LL;
CHashTrieFQDN::CHashTrieFQDN()
{
m_num=0;
m_domains=NULL;
m_B=NULL;
m_RT=NULL;
m_matched=NULL;
FOR(c, 256) m_case_tab[c]=tolower(c);
}
CHashTrieFQDN::~CHashTrieFQDN()
{
if(m_domains!=NULL)
{
delete [] m_domains;
}
if(m_B!=NULL)
{
delete [] m_B;
}
if(m_RT!=NULL)
{
aligned_free(m_RT);
}
if(m_matched!=NULL)
{
delete [] m_matched;
}
}
int CHashTrieFQDN::initialize(const struct FQDN_rule * rules, size_t n_rule)
{
long long mem_bytes=0;
if(n_rule==0) return -1;
m_num=n_rule;
m_domains=new domain_impl_t[m_num];
mem_bytes+=m_num*sizeof(domain_impl_t);
unsigned int N=m_num;
m_max_pat_len=0;
FOR(k, m_num)
{
m_domains[k].id =rules[k].id;
m_domains[k].suf_match=rules[k].is_suffix_match;
m_domains[k].len=rules[k].len;
m_domains[k].next=NULL;
m_domains[k].utag=rules[k].user_tag;
FOR(j, rules[k].len)
{
if(rules[k].FQDN[j]=='.') ++N;
}
if(m_max_pat_len<rules[k].len)
{
m_max_pat_len=rules[k].len;
}
}
unsigned int w=(int)(log10((double)N)/log10(2.0)+0.5)+4;
if(w<8) w=8;
m_H=(1U<<w);
m_B=new unsigned long long[m_H>>6];
mem_bytes+=(m_H>>6)*sizeof(unsigned long long);
FOR(i, (m_H>>6)) m_B[i]=0;
m_RT=(packedRT_t *)aligned_malloc(sizeof(packedRT_t)*((m_H>>8)+1), 64);
mem_bytes+=((m_H>>8)+1)*sizeof(packedRT_t);
FOR(i, (m_H>>8))
{
FOR(j, 4) m_RT[i].bitmap[j]=0;
}
FOR(k, m_num)
{
const unsigned char * pb=(const unsigned char *)rules[k].FQDN;
m_domains[k].hash=0;
for(int j=rules[k].len-1; j>=-1; --j)
{
if(j==-1 || pb[j]=='.')
{
unsigned int h=m_domains[k].hash&(m_H-1);
set_bit(m_B, h);
if(j==-1)
{
int q=h&255;
m_RT[h>>8].bitmap[q>>6]|=(ONE<<(q&63));
}
}
else
{
m_domains[k].hash=A*m_domains[k].hash+m_case_tab[pb[j]];
}
}
}
m_RT[0].A=0;
FOR(i, (m_H>>8))
{
m_RT[i].B[0]=0;
m_RT[i].B[1]= popcnt_u64(m_RT[i].bitmap[0]);
m_RT[i].B[2]= m_RT[i].B[1]+popcnt_u64(m_RT[i].bitmap[1]);
m_RT[i].B[3]= m_RT[i].B[2]+popcnt_u64(m_RT[i].bitmap[2]);
m_RT[i+1].A=m_RT[i].A+m_RT[i].B[3]+popcnt_u64(m_RT[i].bitmap[3]);
}
int tn=m_RT[m_H>>8].A;
m_matched=new domain_impl_t *[tn];
mem_bytes+=tn*sizeof(domain_impl_t *);
FOR(i, tn) m_matched[i]=NULL;
FOR(k, m_num)
{
unsigned int h=m_domains[k].hash&(m_H-1);
unsigned idx=rank(h);
m_domains[k].next=m_matched[idx];
m_matched[idx]=&(m_domains[k]);
}
printf("mem_bytes=%u(MB)\n", mem_bytes/(1U<<20));
return 1;
}
unsigned int CHashTrieFQDN::rank(unsigned int h)
{
int p=(h>>8);
int r=((h&255)>>6);
int s=(h&63);
unsigned long long e=m_RT[p].bitmap[r]&((ONE<<s)-1);
return m_RT[p].A+m_RT[p].B[r]+popcnt_u64(e);
}
int CHashTrieFQDN::search(const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result)
{
if(m_num==0 || FQDN_len==0 || FQDN==NULL || n_result==0) return -1;
int match_num=0;
const unsigned char * pb=(unsigned char *)FQDN;
unsigned long long HASH[16]; /*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>16*/
unsigned int P[16];
int t=0;
unsigned long long hash=0;
for(int j=FQDN_len-1; j>=-1; --j)
{
if(j==-1 || pb[j]=='.')
{
unsigned int h=hash&(m_H-1);
if(is_bit_set(m_B, h)==0) break;
HASH[t]=hash;
P[t]=j+1;
++t;
}
else if(j+m_max_pat_len<FQDN_len) break;
else
{
hash=A*hash+m_case_tab[pb[j]];
}
}
for(--t; t>=0; t--)
{
unsigned int h=HASH[t]&(m_H-1);
int q=h&255;
if(m_RT[h>>8].bitmap[q>>6]&(ONE<<(q&63)))
{
unsigned idx=rank(h);
for(domain_impl_t * pt=m_matched[idx]; pt!=NULL; pt=pt->next)
{
if(P[t]!=0 && pt->suf_match==0) continue;
if(pt->len+P[t]==FQDN_len && pt->hash==HASH[t])
{
if(match_num>0 && P[t]!=results[match_num-1].offset) return match_num;
results[match_num].id=pt->id;
results[match_num].offset=P[t];
results[match_num].user_tag=pt->utag;
++match_num;
if(match_num==n_result) return match_num;
}
}
}
}
return match_num;
}
/*************************************************************************************/
struct FQDN_engine
{
CHashTrieFQDN ht;
};
struct FQDN_engine * FQDN_engine_new(const struct FQDN_rule * rules, size_t n_rule)
{
struct FQDN_engine * instance=new struct FQDN_engine;
if(instance->ht.initialize(rules, n_rule)<0)
{
delete instance;
return NULL;
}
else
{
return instance;
}
}
int FQDN_engine_search(struct FQDN_engine * instance, const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result)
{
if(instance==NULL) return -1;
return instance->ht.search(FQDN, FQDN_len, results, n_result);
}
void FQDN_engine_free(struct FQDN_engine * instance)
{
if(instance!=NULL) delete instance;
}

View File

@@ -1392,6 +1392,11 @@ int add_digest_rule(struct Maat_table_schema* table, struct db_digest_rule* db_r
scanner->gie_update_q_size++; scanner->gie_update_q_size++;
return 0; return 0;
} }
int add_fqdn_rule(struct Maat_table_schema* table, struct db_digest_rule* db_rule, struct Maat_scanner *scanner,void* logger)
{
}
int del_region_rule(struct Maat_table_schema* table, int region_id, int group_id, int rule_type, struct Maat_scanner *maat_scanner, void* logger) int del_region_rule(struct Maat_table_schema* table, int region_id, int group_id, int rule_type, struct Maat_scanner *maat_scanner, void* logger)
{ {
int i=0; int i=0;
@@ -1430,6 +1435,10 @@ int del_region_rule(struct Maat_table_schema* table, int region_id, int group_id
table_rt=Maat_table_runtime_get(maat_scanner->table_rt_mgr, table->table_id); table_rt=Maat_table_runtime_get(maat_scanner->table_rt_mgr, table->table_id);
Maat_table_runtime_digest_del(table_rt, region->expr_id_lb); Maat_table_runtime_digest_del(table_rt, region->expr_id_lb);
maat_scanner->gie_update_q_size++; maat_scanner->gie_update_q_size++;
break;
case TABLE_TYPE_FQDN:
case TABLE_TYPE_FQDN_PLUGIN:
break; break;
default: default:
assert(0); assert(0);
@@ -2163,6 +2172,65 @@ error_out:
digest_rule=NULL; digest_rule=NULL;
return; return;
} }
void update_fqdn_rule(struct Maat_table_schema* table, const char* table_line, struct Maat_scanner *scanner, void* logger)
{
struct Maat_table_runtime* table_rt=Maat_table_runtime_get(scanner->table_rt_mgr, table->table_id);
struct db_fqdn_rule* fqdn_rule=ALLOC(struct db_fqdn_rule, 1);
int ret=0;
char fqdn_buff[MAX_TABLE_LINE_SIZE]={'\0'};
ret=sscanf(table_line,"%d\t%d\t%llu\t%s\t%hd\t%d", &(fqdn_rule->region_id),
&(fqdn_rule->group_id),
&(fqdn_rule->is_suffix_match),
fqdn_buff,
&(fqdn_rule->is_valid));
fqdn_rule->fqdn=fqdn_buff;
if(ret!=5)
{
MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module ,
"update error, invalid format of fqdn table %s:%s"
,table->table_name[table->updating_name],table_line);
table->udpate_err_cnt++;
goto error_out;
}
if(fqdn_rule->is_valid==FALSE)
{
//digest rule is not build with rulescan, this rule type is useless in count_rs_region funciton.
ret=del_region_rule(table, fqdn_rule->region_id, fqdn_rule->group_id, 0, scanner, logger);
if(ret<0)
{
table->udpate_err_cnt++;
}
else
{
table_rt->origin_rule_num--;
}
}
else
{
ret=add_fqdn_rule(table, digest_rule,scanner,logger);
if(ret<0)
{
MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module ,
"duplicate config of intval table %s config_id=%d"
,table->table_name[table->updating_name],digest_rule->region_id);
table->udpate_err_cnt++;
}
else
{
table_rt->origin_rule_num++;
}
}
error_out:
fqdn_rule->fqdn=NULL;
free(fqdn_rule);
fqdn_rule=NULL;
return;
}
void update_plugin_table(struct Maat_table_schema* table_schema, const char* row, Maat_scanner* scanner, const struct rule_tag* tags, int n_tags, void* logger) void update_plugin_table(struct Maat_table_schema* table_schema, const char* row, Maat_scanner* scanner, const struct rule_tag* tags, int n_tags, void* logger)
{ {

View File

@@ -491,6 +491,8 @@ struct Maat_table_manager* Maat_table_manager_create(const char* table_info_path
map_register(string2int_map,"compile", TABLE_TYPE_COMPILE); map_register(string2int_map,"compile", TABLE_TYPE_COMPILE);
map_register(string2int_map,"plugin", TABLE_TYPE_PLUGIN); map_register(string2int_map,"plugin", TABLE_TYPE_PLUGIN);
map_register(string2int_map,"ip_plugin", TABLE_TYPE_IP_PLUGIN); map_register(string2int_map,"ip_plugin", TABLE_TYPE_IP_PLUGIN);
map_register(string2int_map,"fqdn", TABLE_TYPE_FQDN);
map_register(string2int_map,"fqdn_plugin", TABLE_TYPE_FQDN_PLUGIN);
map_register(string2int_map,"intval", TABLE_TYPE_INTERVAL); map_register(string2int_map,"intval", TABLE_TYPE_INTERVAL);
map_register(string2int_map,"interval", TABLE_TYPE_INTERVAL); map_register(string2int_map,"interval", TABLE_TYPE_INTERVAL);
map_register(string2int_map,"intval_plus", TABLE_TYPE_INTERVAL_PLUS); map_register(string2int_map,"intval_plus", TABLE_TYPE_INTERVAL_PLUS);
@@ -616,6 +618,9 @@ struct Maat_table_manager* Maat_table_manager_create(const char* table_info_path
{ {
p->compile.user_region_encoding=USER_REGION_ENCODE_NONE; p->compile.user_region_encoding=USER_REGION_ENCODE_NONE;
} }
break
case TABLE_TYPE_FQDN:
default: default:
break; break;
} }

View File

@@ -357,6 +357,10 @@ void Maat_table_runtime_digest_del(struct Maat_table_runtime* table_rt, int expr
,NULL); ,NULL);
MESA_lqueue_join_tail(table_rt->similar.update_q,&digest_rule, sizeof(void*)); MESA_lqueue_join_tail(table_rt->similar.update_q,&digest_rule, sizeof(void*));
return; return;
}
void Maat_table_runtime_fqdn_add(struct Maat_table_runtime* table_rt, int expr_id, const char* fqdn, int is_suffix_match, void* tag)
{
} }
int Maat_table_runtime_digest_batch_udpate(struct Maat_table_runtime* table_rt) int Maat_table_runtime_digest_batch_udpate(struct Maat_table_runtime* table_rt)
{ {

View File

@@ -0,0 +1,69 @@
/*
*
* Copyright (c) 2020
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2020-09-01
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_FQDN_ENGINE_H
#define H_FQDN_ENGINE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
struct FQDN_rule
{
unsigned int id;
int is_suffix_match; /* is_suffix_match==0: exact match; is_suffix_match==1: longest suffix matching. */
size_t len;
char * FQDN; /* Non-ASCII character is allowed. */
void * user_tag; /* A transparent user tag for convenient accessing, the caller is responsible for its memory management. */
};
struct FQDN_engine;
struct FQDN_engine * FQDN_engine_new(const struct FQDN_rule * rules, size_t n_rule);
struct FQDN_match
{
unsigned int id;
unsigned int offset; /* offset==0 for exact matching; offset>0 for longest suffix matching. */
void * user_tag;
};
/*
*Function:
* Search FQDN in the rule base
*Paramters:
* instance[in]: Instance of FQDN engine
* FQDN[in]: FQDN for search
* FQDN_len[in]: Length of FQDN
* results[out]: An array to store matched FQDNs
* n_result[in]: Number of element in the result array
* Return:
* 0: No matched FQDN;
* >0: Number of matched FQNDs which were stored in results;
* <0: Error.
*/
int FQDN_engine_search(struct FQDN_engine * instance, const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result);
void FQDN_engine_free(struct FQDN_engine * instance);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -16,6 +16,7 @@
#include "stream_fuzzy_hash.h" #include "stream_fuzzy_hash.h"
#include "gram_index_engine.h" #include "gram_index_engine.h"
#include "alignment_int64.h" #include "alignment_int64.h"
#include "FQDN_engine.h"
#include <pthread.h> #include <pthread.h>
#include <iconv.h> #include <iconv.h>
#include <openssl/md5.h> #include <openssl/md5.h>
@@ -78,6 +79,15 @@ struct db_digest_rule
short confidence_degree; short confidence_degree;
int is_valid; int is_valid;
}; };
struct db_fqdn_rule
{
int region_id;
int group_id;
int is_suffix_match;
char* fqdn;
int is_valid;
};
struct Maat_rule_head struct Maat_rule_head
{ {
int config_id; int config_id;

View File

@@ -36,7 +36,9 @@ enum MAAT_TABLE_TYPE
TABLE_TYPE_COMPILE, TABLE_TYPE_COMPILE,
TABLE_TYPE_PLUGIN, TABLE_TYPE_PLUGIN,
TABLE_TYPE_IP_PLUGIN, TABLE_TYPE_IP_PLUGIN,
TABLE_TYPE_INTERVAL_PLUS TABLE_TYPE_INTERVAL_PLUS,
TABLE_TYPE_FQDN,
TABLE_TYPE_FQDN_PLUGIN
}; };
struct compile_ex_data_idx struct compile_ex_data_idx

View File

@@ -4,6 +4,7 @@
#include "IPMatcher.h" #include "IPMatcher.h"
#include "gram_index_engine.h" #include "gram_index_engine.h"
#include "FQDN_engine.h"
#include "alignment_int64.h" #include "alignment_int64.h"
#include "dynamic_array.h" #include "dynamic_array.h"
#include <MESA/MESA_htable.h> #include <MESA/MESA_htable.h>
@@ -14,7 +15,14 @@ struct similar_runtime
GIE_handle_t* gie_handle; GIE_handle_t* gie_handle;
MESA_lqueue_head update_q; MESA_lqueue_head update_q;
}; };
struct fqdn_runtime
{
struct FQDN_engine* fqdn_engine;
struct FQDN_engine* old_fqdn_engine;
struct EX_data_rt* ex_data_rt; //for fqdn_plugin ONLY
struct Maat_garbage_bin* bin;
int changed_flag;
};
struct plugin_runtime struct plugin_runtime
{ {
struct EX_data_rt* ex_data_rt; struct EX_data_rt* ex_data_rt;
@@ -51,6 +59,7 @@ struct Maat_table_runtime
union union
{ {
struct similar_runtime similar; //for digest and similarity struct similar_runtime similar; //for digest and similarity
struct fqdn_runtime fqdn;//for fqdn and fqdn_plugin
struct plugin_runtime plugin; struct plugin_runtime plugin;
struct ip_plugin_runtime ip_plugin; struct ip_plugin_runtime ip_plugin;
struct expr_runtime expr; struct expr_runtime expr;