ipmatcher rule_id -> long long & scanner engine centralization

This commit is contained in:
liuwentan
2023-03-01 09:32:36 +08:00
parent a6fb2b6fdd
commit 1566a30002
63 changed files with 4695 additions and 115 deletions

View File

@@ -5,5 +5,10 @@ include_directories(${PROJECT_SOURCE_DIR}/deps)
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
add_library(adapter-static adapter_hs.cpp bool_matcher.cpp fqdn_engine.cpp)
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static ipmatcher-static)
add_subdirectory(ip_matcher/IntervalIndex)
add_library(adapter-static adapter_hs/adapter_hs.cpp bool_matcher/bool_matcher.cpp
fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
interval_matcher/interval_matcher.cpp)
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static interval_index_static)

View File

@@ -20,7 +20,7 @@
#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "bool_matcher.h"
#include "../bool_matcher/bool_matcher.h"
pid_t hs_gettid()
{

View File

@@ -0,0 +1,85 @@
/*
* @Author: Yang Yubo yangyubo@geedgenetworks.com
* @Date: 2022-12-19 22:10:26
* @LastEditors: Yang Yubo yangyubo@geedgenetworks.com
* @LastEditTime: 2022-12-19 22:13:57
* @FilePath: /flag_matcher/src/flag_matcher.cpp
* @Description: Here is the implementation of flag matcher.
*/
#include <malloc.h>
#include <string.h>
#include <assert.h>
#include "flag_matcher.h"
#define ALLOC(type, number) (type *)calloc(sizeof(type), number)
#define FREE(p) {free(p);p=NULL;}
struct flag_matcher
{
struct flag_rule *rule_table;
uint32_t number;
};
struct flag_matcher *flag_matcher_new(struct flag_rule *rule, size_t n_rule)
{
if (!rule || !n_rule)
return NULL;
struct flag_matcher *flag_matcher;
flag_matcher = ALLOC(struct flag_matcher , 1);
assert(flag_matcher);
flag_matcher->number = n_rule;
flag_matcher->rule_table = ALLOC(struct flag_rule , n_rule);
assert(flag_matcher->rule_table);
memcpy(flag_matcher->rule_table, rule, sizeof(struct flag_rule) * n_rule);
return flag_matcher;
}
void flag_matcher_free(struct flag_matcher *flag_matcher)
{
if (!flag_matcher)
{
return;
}
FREE(flag_matcher->rule_table);
FREE(flag_matcher);
return;
}
int flag_matcher_match(struct flag_matcher *flag_matcher, uint64_t flag, struct flag_result *result, size_t n_result)
{
if (!flag_matcher || !result)
{
return -1;
}
uint32_t result_number = 0;
uint32_t rules_number = flag_matcher->number;
for (uint32_t i = 0; i < rules_number; i ++)
{
if (!((flag ^ flag_matcher->rule_table[i].flag) & flag_matcher->rule_table[i].mask))
{
result[result_number].rule_id = flag_matcher->rule_table[i].rule_id;
result[result_number ++].user_tag = flag_matcher->rule_table[i].user_tag;
if (result_number >= n_result)
{
return result_number;
}
}
}
return result_number;
}

View File

@@ -0,0 +1,77 @@
/*
* @Author: Yang Yubo yangyubo@geedgenetworks.com
* @Date: 2022-12-19
* @LastEditors: Yang Yubo yangyubo@geedgenetworks.com
* @LastEditTime: 2022-12-19 22:14:10
* @FilePath: /flag_matcher/include/flag_matcher.h
*/
#ifndef FLAG_MATCHER_H
#define FLAG_MATCHER_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif
struct flag_rule
{
uint64_t flag;
uint64_t mask;
uint64_t rule_id; // unique for a rule;
/* A transparent user tag for convenient accessing,
the caller is responsible for its memory management. */
void *user_tag;
};
// if matched, return id and tag;
struct flag_result
{
uint64_t rule_id; // unique for a rule;
void *user_tag;
};
/* forward declaration;
The internal structure
is not open to the outside */
struct flag_matcher;
/**
* @description: to build a flag matcher for matching;
* @param {struct flag_rule} *rule: it's a array for rules;
* @param {uint32_t} n_rule: it's the number of rules;
* @return {struct flag_matcher*}: if NULL, build failed!
*/
struct flag_matcher *flag_matcher_new(struct flag_rule *rule, size_t n_rule);
/**
* @description: matching, after this api, user can get an array of rules matched;
* @param {struct flag_matcher} *flag_matcher: a matcher;
* @param {struct result_id} *result: rusult arrays, user alloc memory;
* @param {uint32_t} n_result: the MAX number of rules matched;
* @param {uint64_t} flag: need to match;
* @return {int}: The return value is the number of matched rules, which may be 0;
*/
int flag_matcher_match(struct flag_matcher *flag_matcher, uint64_t flag, struct flag_result *result, size_t n_result);
/**
* @description: to destroy flag matcher after used;
* @param {flag_matcher} *flag_matcher: the target need to free, can't be NULL;
* @return {*}
*/
void flag_matcher_free(struct flag_matcher *flag_matcher);
#ifdef __cplusplus
}
#endif
#endif // FLAG_MATCHER_H

View File

@@ -0,0 +1,305 @@
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include "cgranges.h"
/**************
* Radix sort *
**************/
#define RS_MIN_SIZE 64
#define RS_MAX_BITS 8
#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \
typedef struct { \
rstype_t *b, *e; \
} rsbucket_##name##_t; \
void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \
{ \
rstype_t *i; \
for (i = beg + 1; i < end; ++i) \
if (rskey(*i) < rskey(*(i - 1))) { \
rstype_t *j, tmp = *i; \
for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \
*j = *(j - 1); \
*j = tmp; \
} \
} \
void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \
{ \
rstype_t *i; \
int size = 1<<n_bits, m = size - 1; \
rsbucket_##name##_t *k, b[1<<RS_MAX_BITS], *be = b + size; \
assert(n_bits <= RS_MAX_BITS); \
for (k = b; k != be; ++k) k->b = k->e = beg; \
for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \
for (k = b + 1; k != be; ++k) \
k->e += (k-1)->e - beg, k->b = (k-1)->e; \
for (k = b; k != be;) { \
if (k->b != k->e) { \
rsbucket_##name##_t *l; \
if ((l = b + (rskey(*k->b)>>s&m)) != k) { \
rstype_t tmp = *k->b, swap; \
do { \
swap = tmp; tmp = *l->b; *l->b++ = swap; \
l = b + (rskey(tmp)>>s&m); \
} while (l != k); \
*k->b++ = tmp; \
} else ++k->b; \
} else ++k; \
} \
for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \
if (s) { \
s = s > n_bits? s - n_bits : 0; \
for (k = b; k != be; ++k) \
if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \
else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \
} \
} \
void radix_sort_##name(rstype_t *beg, rstype_t *end) \
{ \
if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \
else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \
}
/*********************
* Convenient macros *
*********************/
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define CALLOC(type, len) ((type*)calloc((len), sizeof(type)))
#define REALLOC(ptr, len) ((ptr) = (__typeof__(ptr))realloc((ptr), (len) * sizeof(*(ptr))))
#define EXPAND(a, m) do { \
(m) = (m)? (m) + ((m)>>1) : 16; \
REALLOC((a), (m)); \
} while (0)
/********************
* Basic operations *
********************/
#define cr_intv_key(r) ((r).x)
KRADIX_SORT_INIT(cr_intv, cr_intv_t, cr_intv_key, 8)
cgranges_t *cr_init(void)
{
cgranges_t *cr;
cr = CALLOC(cgranges_t, 1);
return cr;
}
void cr_destroy(cgranges_t *cr)
{
if (cr == 0) return;
if (cr->n_r && cr->r)
{
free(cr->r);
cr->r = NULL;
}
free(cr);
cr = NULL;
}
// int32_t cr_add_ctg(cgranges_t *cr, const char *ctg, int32_t len)
// {
// int absent;
// khint_t k;
// strhash_t *h = (strhash_t*)cr->hc;
// k = kh_put(str, h, ctg, &absent);
// if (absent) {
// cr_ctg_t *p;
// if (cr->n_ctg == cr->m_ctg)
// EXPAND(cr->ctg, cr->m_ctg);
// kh_val(h, k) = cr->n_ctg;
// p = &cr->ctg[cr->n_ctg++];
// p->name = strdup(ctg);
// kh_key(h, k) = p->name;
// p->len = len;
// p->n = 0, p->off = -1;
// }
// if (len > cr->ctg[kh_val(h, k)].len)
// cr->ctg[kh_val(h, k)].len = len;
// return kh_val(h, k);
// }
// int32_t cr_get_ctg(const cgranges_t *cr, const char *ctg)
// {
// khint_t k;
// strhash_t *h = (strhash_t*)cr->hc;
// k = kh_get(str, h, ctg);
// return k == kh_end(h)? -1 : kh_val(h, k);
// }
cr_intv_t *cr_add(cgranges_t *cr, uint64_t st, uint64_t en, user_label_t label)
{
cr_intv_t *p;
if (st > en) return 0;
if (cr->n_r == cr->m_r)
EXPAND(cr->r, cr->m_r);
p = &cr->r[cr->n_r++];
p->x = st;
p->real_y = en;
p->label = label;
if (cr->len < en)
cr->len = en;
return p;
}
void cr_sort(cgranges_t *cr)
{
// if (cr->n_ctg == 0 || cr->n_r == 0) return;
if (cr->n_r == 0) return;
radix_sort_cr_intv(cr->r, cr->r + cr->n_r);
}
int32_t cr_is_sorted(const cgranges_t *cr)
{
uint64_t i;
for (i = 1; i < cr->n_r; ++i)
if (cr->r[i-1].x > cr->r[i].x)
break;
return (i == cr->n_r);
}
/************
* Indexing *
************/
int64_t cr_index1(cr_intv_t *a, int64_t n)
{
int64_t i, last_i;
uint64_t last;
int64_t k;
if (n <= 0) return -1;
for (i = 0; i < n; i += 2) last_i = i, last = a[i].y = (a[i].real_y);
for (k = 1; 1LL<<k <= n; ++k) {
int64_t x = 1LL<<(k-1), i0 = (x<<1) - 1, step = x<<2;
for (i = i0; i < n; i += step) {
uint64_t el = a[i - x].y;
uint64_t er = i + x < n? a[i + x].y : last;
uint64_t e = a[i].real_y;
e = e > el? e : el;
e = e > er? e : er;
a[i].y = e;
}
last_i = last_i>>k&1? last_i - x : last_i + x;
if (last_i < n && a[last_i].y > last)
last = a[last_i].y;
}
return k - 1;
}
void cr_index(cgranges_t *cr)
{
if (!cr_is_sorted(cr)) cr_sort(cr);
cr->root_k = cr_index1(cr->r, cr->n_r);
}
/*********
* Query *
*********/
// int64_t cr_min_start_int(const cgranges_t *cr, int32_t ctg_id, int32_t st) // find the smallest i such that cr_st(&r[i]) >= st
// {
// int64_t left, right;
// const cr_ctg_t *c;
// const cr_intv_t *r;
// if (ctg_id < 0 || ctg_id >= cr->n_ctg) return -1;
// c = &cr->ctg[ctg_id];
// r = &cr->r[c->off];
// if (c->n == 0) return -1;
// left = 0, right = c->n;
// while (right > left) {
// int64_t mid = left + ((right - left) >> 1);
// if (cr_st(&r[mid]) >= st) right = mid;
// else left = mid + 1;
// }
// assert(left == right);
// return left == c->n? -1 : c->off + left;
// }
typedef struct {
int64_t x;
int64_t k, w;
} istack_t;
int64_t cr_overlap_int(const cgranges_t *cr, uint64_t st, uint64_t en, int64_t **b_, int64_t *m_b_)
{
int32_t t = 0;
const cr_intv_t *r;
int64_t *b = *b_, m_b = *m_b_, n = 0;
istack_t stack[64], *p;
r = cr->r;
p = &stack[t++];
p->k = cr->root_k, p->x = (1LL<<p->k) - 1, p->w = 0; // push the root into the stack
while (t) { // stack is not empyt
istack_t z = stack[--t];
if (z.k <= 3) { // the subtree is no larger than (1<<(z.k+1))-1; do a linear scan
int64_t i, i0 = z.x >> z.k << z.k, i1 = i0 + (1LL<<(z.k+1)) - 1;
if (i1 >= cr->n_r) i1 = cr->n_r;
for (i = i0; i < i1 && cr_st(&r[i]) < en; ++i)
if (st < cr_en(&r[i])) {
if (n == m_b) EXPAND(b, m_b);
b[n++] = i;
}
} else if (z.w == 0) { // if left child not processed
int64_t y = z.x - (1LL<<(z.k-1));
p = &stack[t++];
p->k = z.k, p->x = z.x, p->w = 1;
if (y >= cr->n_r || r[y].y > st) {
p = &stack[t++];
p->k = z.k - 1, p->x = y, p->w = 0; // push the left child to the stack
}
} else if (z.x < cr->n_r && cr_st(&r[z.x]) < en) {
if (st < cr_en(&r[z.x])) { // then z.x overlaps the query; write to the output array
if (n == m_b) EXPAND(b, m_b);
b[n++] = z.x;
}
p = &stack[t++];
p->k = z.k - 1, p->x = z.x + (1LL<<(z.k-1)), p->w = 0; // push the right child
}
}
*b_ = b, *m_b_ = m_b;
return n;
}
// int64_t cr_contain_int(const cgranges_t *cr, int32_t ctg_id, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_)
// {
// int64_t n = 0, i, s, e, *b = *b_, m_b = *m_b_;
// s = cr_min_start_int(cr, ctg_id, st);
// if (s < 0) return 0;
// e = cr->ctg[ctg_id].off + cr->ctg[ctg_id].n;
// for (i = s; i < e; ++i) {
// const cr_intv_t *r = &cr->r[i];
// if (cr_st(r) >= en) break;
// if (cr_st(r) >= st && cr_en(r) <= en) {
// if (n == m_b) EXPAND(b, m_b);
// b[n++] = i;
// }
// }
// *b_ = b, *m_b_ = m_b;
// return n;
// }
// int64_t cr_min_start(const cgranges_t *cr, const char *ctg, int32_t st)
// {
// return cr_min_start_int(cr, cr_get_ctg(cr, ctg), st);
// }
int64_t cr_overlap(const cgranges_t *cr, uint64_t st, uint64_t en, int64_t **b_, int64_t *m_b_)
{
return cr_overlap_int(cr, st, en, b_, m_b_);
}
// int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_)
// {
// return cr_contain_int(cr, cr_get_ctg(cr, ctg), st, en, b_, m_b_);
// }

View File

@@ -0,0 +1,103 @@
/* The MIT License
Copyright (c) 2019 Dana-Farber Cancer Institute
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#ifndef CRANGES_H
#define CRANGES_H
#include <stdint.h>
#include "interval_matcher.h"
typedef struct interval_result user_label_t;
typedef struct { // a contig
char *name; // name of the contig
int32_t len; // max length seen in data
int32_t root_k;
int64_t n, off; // sum of lengths of previous contigs
} cr_ctg_t;
typedef struct { // an interval
// uint64_t x; // prior to cr_index(), x = ctg_id<<32|start_pos; after: x = start_pos<<32|end_pos
// uint32_t y:31, rev:1;
uint64_t x;
uint64_t y;
uint64_t real_y;
user_label_t label; // NOT used
} cr_intv_t;
typedef struct {
int64_t n_r, m_r; // number and max number of intervals
cr_intv_t *r; // list of intervals (of size _n_r_)
// int32_t n_ctg, m_ctg; // number and max number of contigs
// cr_ctg_t ctg; // list of contigs (of size _n_ctg_)
// void *hc; // dictionary for converting contig names to integers
uint64_t len; // max length seen in data
int64_t root_k;
} cgranges_t;
#ifdef __cplusplus
extern "C" {
#endif
// retrieve start and end positions from a cr_intv_t object
// static inline int32_t cr_st(const cr_intv_t *r) { return (int32_t)(r->x>>32); }
// static inline int32_t cr_en(const cr_intv_t *r) { return (int32_t)r->x; }
static inline uint64_t cr_st(const cr_intv_t *r) { return r->x; }
static inline uint64_t cr_en(const cr_intv_t *r) { return r->real_y; }
static inline uint64_t cr_start(const cgranges_t *cr, int64_t i) { return cr_st(&cr->r[i]); }
static inline uint64_t cr_end(const cgranges_t *cr, int64_t i) { return cr_en(&cr->r[i]); }
static inline user_label_t cr_label(const cgranges_t *cr, int64_t i) { return cr->r[i].label; }
// Initialize
cgranges_t *cr_init(void);
// Deallocate
void cr_destroy(cgranges_t *cr);
// Add an interval
// cr_intv_t *cr_add(cgranges_t *cr, const char *ctg, int32_t st, int32_t en, user_label_t label);
cr_intv_t *cr_add(cgranges_t *cr, uint64_t st, uint64_t en, user_label_t label);
// Sort and index intervals
void cr_index(cgranges_t *cr);
// int64_t cr_overlap(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
// int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
int64_t cr_overlap(const cgranges_t *cr, uint64_t st, uint64_t en, int64_t **b_, int64_t *m_b_);
int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
// Add a contig and length. Call this for desired contig ordering. _len_ can be 0.
int32_t cr_add_ctg(cgranges_t *cr, const char *ctg, int32_t len);
// Get the contig ID given its name
int32_t cr_get_ctg(const cgranges_t *cr, const char *ctg);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include "interval_matcher.h"
#include "cgranges.h"
#ifndef MIN
#define MIN(a,b) ((a)>(b) ? (b) : (a))
#endif
struct interval_matcher
{
cgranges_t *cr;
size_t n_rule;
};
struct interval_matcher *interval_matcher_new(struct interval_rule *rule, size_t n_rule)
{
if (!rule || !n_rule)
{
return NULL;
}
struct interval_matcher *matcher = (struct interval_matcher *)calloc(sizeof(struct interval_matcher), 1);
user_label_t label;
matcher->cr = cr_init();
matcher->n_rule = n_rule;
for(size_t i = 0; i < n_rule; i ++)
{
label = rule[i].result;
cr_add(matcher->cr, rule[i].start, rule[i].end + 1, label);
}
cr_index(matcher->cr);
if (matcher->cr->root_k == -1)
{
return NULL;
}
return matcher;
}
void interval_matcher_free(struct interval_matcher *interval_matcher)
{
if (!interval_matcher)
{
return;
}
cr_destroy(interval_matcher->cr);
free(interval_matcher);
interval_matcher = NULL;
return;
}
int interval_matcher_match(struct interval_matcher *interval_matcher, uint64_t target, struct interval_result *result, size_t n_result)
{
if (interval_matcher == NULL || result == NULL || n_result == 0 || target == (uint64_t)(-1))
{
return -1;
}
int64_t i, n, *b = 0, max_b = 0;
n = cr_overlap(interval_matcher->cr, target, target + 1, &b, &max_b);
if (n <= 0 || b == NULL || max_b == 0)
{
return 0;
}
n = MIN(MIN((uint64_t)n, n_result), (uint32_t)(-1));
for (i = 0; i < n; i ++)
{
result[i] = interval_matcher->cr->r[b[i]].label;
}
free(b);
b = NULL;
return (int)n;
}

View File

@@ -0,0 +1,72 @@
/*
* @Author: Yang Yubo yangyubo@geedgenetworks.com
* @Date: 2023-1-18
* @LastEditors: Yang Yubo yangyubo@geedgenetworks.com
* @FilePath: /interval_matcher/include/interval_matcher.h
*/
#ifndef INTERVAL_MATCHER_H
#define INTERVAL_MATCHER_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif
// if matched, return id and tag;
struct interval_result
{
uint64_t rule_id;
/* A transparent user tag for convenient accessing,
the caller is responsible for its memory management. */
void *user_tag;
};
struct interval_rule
{
uint64_t start; // interval's start
uint64_t end; // interval's end the max is ((uint64_t)(-1) - 1)
struct interval_result result;
};
/* forward declaration;
The internal structure
is not open to the outside */
struct interval_matcher;
/**
* @description: to build a interval_matcher for matching;
* @param {struct interval_rule} *rule: it's a array for rules;
* @param {size_t} n_rule: it's the number of rules;
* @return {struct interval_matcher*}: if NULL, build failed!
*/
struct interval_matcher *interval_matcher_new(struct interval_rule *rule, size_t n_rule);
/**
* @description: to destroy interval_matcher after used;
* @param {interval_matcher} *interval_matcher: the target need to free, can't be NULL;
* @return {*}
*/
void interval_matcher_free(struct interval_matcher *interval_matcher);
/**
* @description: matching, after this api, user can get an array of rules matched;
* @param {struct interval_matcher} *interval_matcher: a matcher;
* @param {struct interval_result} *result: rusult arrays, user alloc memory;
* @param {size_t} n_result: the MAX number of rules matched;
* @param {uint64_t} target: need to match, the max is ((uint64_t)(-1) - 1);
* @return {int}: The return value is the number of matched rules, which may be 0; if -1, invalid parameter;
*/
int interval_matcher_match(struct interval_matcher *interval_matcher, uint64_t target, struct interval_result *result, size_t n_result);
#ifdef __cplusplus
}
#endif
#endif // INTERVAL_MATCHER_H

View File

@@ -0,0 +1,430 @@
#include <math.h>
#include <string.h>
#include <memory.h>
#include <time.h>
#include "ACEI.h"
ACEI::ACEI(unsigned int eRange):Exp2Size(32)
{
r = int(((unsigned long long)1 << eRange) - 1); //<2F>ں<EFBFBD><DABA><EFBFBD><EFBFBD><EFBFBD>r<EFBFBD><72>ʹ<EFBFBD><CAB9><EFBFBD>о<EFBFBD><D0BE><EFBFBD><EFBFBD>ȼ<EFBFBD>1
}
ACEI::~ACEI()
{
ruin();
}
long long ACEI::PreProcessing(const std::vector<u_int> &a, const std::vector<u_int> &b)
{
if (a.size() != b.size())
return -1;
initQIMatrix();
int i;
for (i = 0; i < (int)a.size(); i++)
{
HQI.ppQImatrix[HQI.rowID][HQI.colID].l = a[i];//be careful
HQI.ppQImatrix[HQI.rowID][HQI.colID].r = b[i]+1;//<2F>ڲ<EFBFBD><DAB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾΪ[, )
HQI.ppQImatrix[HQI.rowID][HQI.colID].index = i;
++HQI.cnt;
//adjust QI matrix begin
HQI.colID = (HQI.colID + 1) % HQI.ROWLEN;
if(HQI.colID == 0)
{
++HQI.rowID;
if(HQI.rowID < HQI.MAXROWS)
{
HQI.ppQImatrix[HQI.rowID] = (_QueryInterval*)malloc(sizeof(_QueryInterval)*HQI.ROWLEN);
}
else
{
printf("Query Interval matrix run over, reminder discard !!!\n");
return 1;
}
}
//adjust end
}
n = HQI.cnt;
init();
return 0;
}
int ACEI::Find(unsigned int key, unsigned int * result, unsigned int size)
{
return searchX(key, result, size);
}
//input & output
void ACEI::initQIMatrix()
{
HQI.MAXROWS = 10000;
HQI.ROWLEN = 1024;
HQI.ppQImatrix = (_QueryInterval**)malloc(sizeof(_QueryInterval*)*HQI.MAXROWS);
HQI.ppQImatrix[0] = (_QueryInterval*)malloc(sizeof(_QueryInterval)*HQI.ROWLEN);
HQI.rowID = 0;
HQI.colID = 0;
HQI.cnt = 0;
}
void ACEI::ruinQIMatirc()
{
int i;
if(HQI.ppQImatrix)
{
for(i = HQI.rowID; i >= 0; i--)
free(HQI.ppQImatrix[i]);
free(HQI.ppQImatrix);
HQI.ppQImatrix = NULL;
}
HQI.rowID = 0;
HQI.colID = 0;
HQI.cnt = 0;
}
//init & ruin
int ACEI::init()
{
u_int i,j,k;
pExp2 = (unsigned long*)malloc(sizeof(unsigned long)*Exp2Size);
for(i = 0; i < Exp2Size; i++) pExp2[i] = (unsigned long)pow((double)2, (double)i);
w = getAvrgRng();
if(n*w>4*r)
{
H = (u_int)ceil((double)log(sqrt((double)w/4))/log((double)2));
h = (u_int)ceil((double)log(sqrt((double)n*w/4/r))/log((double)2));
if(H <= h) H = h+1;
}
else
{
H = (u_int)ceil((double)log(sqrt((double)r/3/n+w/6))/log((double)2));
h = 0;
if(H <= h) H = h+1;
}
L = pExp2[H];
SNum = r/L + 1;
SCEINum = pExp2[h+1];
pTopCEIs = (_CEI*)malloc(sizeof(_CEI)*SNum*SCEINum);
if(!pTopCEIs)
{
printf("Malloc error !!! for pTopCEIs\n");
return -1;
}
memset(pTopCEIs, 0, SNum*SCEINum*sizeof(_CEI));
ppBotCEIs = (_CEI**)malloc(sizeof(_CEI*) * SNum);
if(!ppBotCEIs)
{
printf("Malloc error !!! for ppBotCEIs\n");
return -1;
}
memset(ppBotCEIs, 0, SNum*sizeof(_CEI*));
for (k = 0; k < HQI.cnt; k++)
{
i = k / HQI.ROWLEN;
j = k % HQI.ROWLEN;
addIntvl(HQI.ppQImatrix[i][j].l, HQI.ppQImatrix[i][j].r, &(HQI.ppQImatrix[i][j]));
}
serial();
return 0;
}
void ACEI::ruin()
{
u_int i,j;
free(pExp2);
//releaseCEIs(&pTopCEIs, SNum*SCEINum);
//for (i = 0; i < SNum; i++)
//{
// if (ppBotCEIs[i])
// {
// releaseCEIs(&(ppBotCEIs[i]), L);
// }
//}
//free(ppBotCEIs);
for (i = 0; i < SNum*SCEINum; i++)
if(pTopSrlCEIs[i].head)
free(pTopSrlCEIs[i].head);
free(pTopSrlCEIs);
for (i = 0; i < SNum; i++)
if(ppBotSrlCEIs[i])
{
for (j = 0; j < L; j++)
if(ppBotSrlCEIs[i][j].head)
free(ppBotSrlCEIs[i][j].head);
free(ppBotSrlCEIs[i]);
}
free(ppBotSrlCEIs);
ruinQIMatirc();
}
void ACEI::releaseCEIs(_CEI **ppIIDSet, u_int lCEIsNum)
{
u_int i;
_IID * pTID, * pTIDpre;
for (i = 0; i < lCEIsNum; i++)
{
if(((*ppIIDSet)[i]).head != NULL)
{
for(pTID = ((*ppIIDSet)[i]).head, pTIDpre = NULL; pTID != NULL;)
{
pTIDpre = pTID;
pTID = pTID->next;
free(pTIDpre);
}
}
}
free(*ppIIDSet);
*ppIIDSet = NULL;
}
//search
int ACEI::searchX(unsigned int x, unsigned int * result, unsigned int size)
{
std::vector<u_int> vecui;
u_int sID, uID, dID, lx;
u_int i;
_SrlIID *pIID;
lx = x;
//if(lx >= r) return vecui;
if(lx > r)
return 0;
sID = lx >> H;
dID = lx - (sID << H);
uID = (dID >> (H - h)) + pExp2[h];
//lx = (u_int)floor(x);
//sID = lx / L;
//dID = lx % L;
//uID = dID / pExp2[H-h] + pExp2[h];
//if (fpQueryRslt) fprintf(fpQueryRslt, "%10.6lf\tin ", x);
//else printf("%10.6lf\tin ", x);
unsigned int n = 0;
if(ppBotSrlCEIs[sID] && ppBotSrlCEIs[sID][dID].head != NULL)
{
for (pIID = ppBotSrlCEIs[sID][dID].head; pIID->iid != NULL; pIID++)
{
if(n >= size)
{
return n;
}
result[n++] = pIID->iid->index;
}
}
for (i = 0; i <= h; i++)
{
if (pTopSrlCEIs[sID*SCEINum+uID].head != NULL)
{
for (pIID = pTopSrlCEIs[sID*SCEINum+uID].head; pIID->iid != NULL; pIID++)
{
if(n >= size)
{
return n;
}
result[n++] = pIID->iid->index;
}
}
uID >>= 1;
}
return n;
}
//insert
//void insertIntvl(u_int l, u_int r)
//{
// _QueryInterval * pQI = (_QueryInterval*)malloc(sizeof(_QueryInterval));
// MemCst += sizeof(_QueryInterval);//for stat.
// pQI->l = l;
// pQI->r = r;
// pQI->next = NULL;
// if (HQI.head != NULL)
// {
// HQI.tail->next = pQI;
// HQI.tail = pQI;
// }
// else
// {
// HQI.head = HQI.tail = pQI;
// }
// HQI.len ++;
// addIntvl(l, r, pQI);
//}
void ACEI::addIntvl(u_int ll, u_int lr, _QueryInterval* pQI)//<2F><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҿ<EFBFBD><D2BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD>ô<EFBFBD><C3B4>Ϊ<EFBFBD>ǿ<EFBFBD><C7BF><EFBFBD><EFBFBD><EFBFBD>
{
unsigned long long lft = ll;
unsigned long long rght = lr;
if (!lr)
rght += ((unsigned long long)1 << 32);
u_int sLID, sRID;
u_int i;
sLID = (u_int)ceil((double)lft/L);
sRID = (u_int)floor((double)rght/L);
if (sLID > sRID) addFrgmnt(ll, lr, sRID, pQI);
else
{
if (sLID < sRID)
{
for(i = sLID; i < sRID; i++) addID(&(pTopCEIs[i*SCEINum+1]), pQI);
}
if (lft < (unsigned long long)sLID * L) addFrgmnt(ll, sLID*L, sLID-1, pQI);
if (rght > (unsigned long long)sRID * L) addFrgmnt(sRID*L, lr, sRID, pQI);
}
}
void ACEI::addFrgmnt(u_int ll, u_int lr, u_int sID, _QueryInterval* pQI)//lr <20><><EFBFBD><EFBFBD>Ϊ0<CEAA><30>ֻҪL<D2AA><4C><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2^32<33><32><EFBFBD>Ͳ<EFBFBD><CDB2><EFBFBD><EFBFBD>ٳ<EFBFBD><D9B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҷ˵<D2B6><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
u_int lft = ll % L;
u_int rght = lr % L;
u_int sLID, sRID;
u_int i,k;
if(!rght) rght+= L;
sLID = (u_int)ceil((double)lft/pExp2[H-h]);
sRID = (u_int)floor((double)rght/pExp2[H-h]);
if (sLID > sRID) addBtmCEIs(lft, rght, sID, pQI);
else
{
if (lft < sLID * pExp2[H-h]) addBtmCEIs(lft, sLID*pExp2[H-h], sID, pQI);
if (rght > sRID * pExp2[H-h]) addBtmCEIs(sRID*pExp2[H-h], rght, sID, pQI);
if (sLID < sRID)
{
sLID += pExp2[h];
sRID += pExp2[h];
i = 1;
k = sRID - sLID;
while(1)
{
if (sLID % 2 == 0 && i*2 <= k)
{
sLID /= 2;
i *= 2;
}
else
{
addID(&(pTopCEIs[sID*SCEINum+sLID]), pQI);
sLID = sRID + i - k;
k = k - i;
i = 1;
if (k == 0) return;
}
}
}
}
return;
}
void ACEI::addBtmCEIs(u_int ll, u_int lr, u_int sID, _QueryInterval *pQI)
{
u_int i;
if (ppBotCEIs[sID] == NULL && ll < lr)
{
ppBotCEIs[sID] = (_CEI*)malloc(sizeof(_CEI) * L);
memset(ppBotCEIs[sID], 0, sizeof(_CEI)*L);
}
for (i = ll; i < lr; i++)
addID(&(ppBotCEIs[sID][i]), pQI);
}
void ACEI::addID(_CEI* pCEI, _QueryInterval* pQI)
{
_IID *pNewIID = (_IID*)malloc(sizeof(_IID));
pNewIID->iid = pQI;
pNewIID->next = NULL;
if (pCEI->head != NULL)
{
pCEI->tail->next = pNewIID;
pCEI->tail = pNewIID;
}
else
{
pCEI->head = pCEI->tail = pNewIID;
}
}
//ultlity
double ACEI::getAvrgRng()
{
double d = 0;
u_int i,j,k;
if (HQI.cnt == 0)
{
printf("!!! NO QueryISet loaded when calc w\n");
return 0;
}
for (k = 0; k < HQI.cnt; k++)
{
i = k / HQI.ROWLEN;
j = k % HQI.ROWLEN;
d += HQI.ppQImatrix[i][j].r - HQI.ppQImatrix[i][j].l;
}
return d / n;
}
//serialization
void ACEI::serializeCEI(_CEI *pCEI, _SrlCEI* pSrlCEI)//suppose pCEI not NULL
{
u_int i,num = 0;
_IID *pIID;
for(pIID = pCEI->head; pIID; ++num, pIID = pIID->next);
pSrlCEI->head = (_SrlIID*)malloc(sizeof(_SrlIID)*(num+1));
for (i = 0, pIID = pCEI->head; pIID; i++)
{
(pSrlCEI->head)[i].iid = pIID->iid;
pIID = pIID->next;
//free(pIIDpre);
}
((pSrlCEI->head)[num]).iid = NULL;
}
void ACEI::serialBtmCEIs()
{
u_int i,j;
ppBotSrlCEIs = (_SrlCEI**)malloc(sizeof(_SrlCEI*)*SNum);
memset(ppBotSrlCEIs, 0, sizeof(_SrlCEI*)*SNum);
for (i = 0; i < SNum; i++)
{
if (ppBotCEIs[i])
{
ppBotSrlCEIs[i] = (_SrlCEI*)malloc(sizeof(_SrlCEI)*L);
memset(ppBotSrlCEIs[i], 0, sizeof(_SrlCEI)*L);
for (j = 0; j < L; j++)
if (ppBotCEIs[i][j].head)
serializeCEI(&(ppBotCEIs[i][j]), &(ppBotSrlCEIs[i][j]));
else
ppBotSrlCEIs[i][j].head = NULL;
//free(ppBotCEIs[i]);
}
}
//free(ppBotCEIs);
}
void ACEI::serialTopCEIs()
{
u_int i;
pTopSrlCEIs = (_SrlCEI*)malloc(sizeof(_SrlCEI)*SNum*SCEINum);
memset(pTopSrlCEIs, 0, sizeof(_SrlCEI)*SNum*SCEINum);
for (i = 0; i < SNum*SCEINum; i++)
{
if (pTopCEIs[i].head)
serializeCEI(&(pTopCEIs[i]), &(pTopSrlCEIs[i]));
}
//free(pTopCEIs);
}
void ACEI::serial()
{
u_int i;
serialTopCEIs();
serialBtmCEIs();
releaseCEIs(&pTopCEIs, SNum*SCEINum);
for(i = 0; i < SNum; i++)
{
if(ppBotCEIs[i])
releaseCEIs(&(ppBotCEIs[i]), L);
}
free(ppBotCEIs);
}

View File

@@ -0,0 +1,135 @@
/************************************************************************/
/*
AdvanceCEI 0621.2007 v1.1
Yao Qiu-lin (yaoqiulin@software.ict.ac.cn)
<09>޸ģ<DEB8>
1. תΪOO<4F><4F>ʽ
2. <20>ļ<EFBFBD><C4BC>ӿڸ<D3BF>Ϊ<EFBFBD>ڴ<EFBFBD><DAB4>ӿ<EFBFBD>
ע<>⣺ PreProcessing<6E><67><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD><EFBFBD>ϲ<EFBFBD><CFB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӳ<EFBFBD>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD><E4A3AC>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƻ<EFBFBD><C6BB><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD>ݽṹ<DDBD><E1B9B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ԣ<EFBFBD>
<09><>Ȼʵ<C8BB><CAB5>ʹ<EFBFBD><CAB9>ʱ֧<CAB1><D6A7>ͨ<EFBFBD><CDA8>insertIntvl<76><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>QueryInterval<61><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
Ҫ<><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӳ<EFBFBD>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD><E4A3AC>ôӦ<C3B4>ø<EFBFBD><C3B8><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD><EFBFBD><E4BCAF><EFBFBD><EFBFBD><EFBFBD>¹<EFBFBD><C2B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
/************************************************************************/
#ifndef _ADVANCECEI_YAOQIULIN_20080509_
#define _ADVANCECEI_YAOQIULIN_20080509_
#include "IntervalIndex.h"
#include <stdlib.h>
#include <stdio.h>
#include <vector>
//#define u_int unsigned int
typedef unsigned int u_int;
typedef struct _QueryInterval_
{
u_int l;
u_int r;
u_int index;
} _QueryInterval;
typedef struct _HQueryI_
{
_QueryInterval ** ppQImatrix;
u_int MAXROWS;
u_int ROWLEN;
u_int rowID;//next available
u_int colID;//next available
u_int cnt;
}_HQueryI;
typedef struct _IID_ //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݽṹ<DDBD><E1B9B9><EFBFBD><EFBFBD><EFBFBD>ٿռ<D5BC><E4A3AC><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD><EFBFBD>õĻ<C3B5>
{
_QueryInterval * iid;
struct _IID_ * next;
}_IID;
typedef struct _IIDSet_
{
_IID *head;
_IID *tail;
} _IIDSet;
typedef _IIDSet _CEI;
//pack type
typedef struct _SrlIID_
{
_QueryInterval * iid;
}_SrlIID;
typedef struct _SrlIIDSet_
{
_SrlIID *head;
}_SrlIIDSet;
typedef _SrlIIDSet _SrlCEI;
class ACEI : public CIntervalIndex
{
public:
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>[0, 2^r)
ACEI(u_int eRange = 32);
virtual ~ACEI();
//һ<><D2BB><EFBFBD><EFBFBD><EFBFBD>䣬[ a[i], b[i] ]<5D><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>а<EFBFBD><D0B0><EFBFBD>key<65><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>index
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
//init & ruin
void initQIMatrix();
void ruinQIMatirc();
int init();
void ruin();
void releaseCEIs(_CEI **ppIIDSet, u_int lCEIsNum);
//search
int searchX(unsigned int x, unsigned int * result, unsigned int size);//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>double<6C><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD>u_int
//insert
//void insertIntvl(u_int l, u_int r)
void addIntvl(u_int ll, u_int lr, _QueryInterval* pQI);//<2F><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҿ<EFBFBD><D2BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD>ô<EFBFBD><C3B4>Ϊ<EFBFBD>ǿ<EFBFBD><C7BF><EFBFBD><EFBFBD><EFBFBD>
void addFrgmnt(u_int ll, u_int lr, u_int sID, _QueryInterval* pQI);
void addBtmCEIs(u_int ll, u_int lr, u_int sID, _QueryInterval *pQI);
void addID(_CEI* pCEI, _QueryInterval* pQI);
//ultlity
double getAvrgRng();
//serialization
void serializeCEI(_CEI *pCEI, _SrlCEI* pSrlCEI);//suppose pCEI not NULL
void serialBtmCEIs();
void serialTopCEIs();
void serial();
private:
//input data
_HQueryI HQI; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//model data
u_int n; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
u_int r; //ֵ<><D6B5>
u_int L;//L=2^H, Segment Len
u_int H;//<2F><><EFBFBD><EFBFBD>VC<56><43><EFBFBD>߶<EFBFBD>
u_int h;//ʵ<><CAB5>VC<56><43><EFBFBD>߶<EFBFBD>
double w; //ƽ<><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E4B3A4>
u_int SNum;//Segment Num
u_int SCEINum;
unsigned long *pExp2;
const u_int Exp2Size;
_CEI *pTopCEIs;
_CEI **ppBotCEIs;//ÿ<><C3BF>CEIs<49><73><EFBFBD>г<EFBFBD><D0B3><EFBFBD>ΪL<CEAA><4C>H
//for serial
_SrlCEI *pTopSrlCEIs;
_SrlCEI **ppBotSrlCEIs;//ÿ<><C3BF>CEIs<49><73><EFBFBD>г<EFBFBD><D0B3><EFBFBD>ΪL<CEAA><4C>H
};
#endif

View File

@@ -0,0 +1,159 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_CEI
#include "CEI.h"
#include <algorithm>
#include <iterator>
#include <cstdlib>
#include <cassert>
using namespace std;
unsigned int RoundToPowerOfTwo(unsigned int n)
{
unsigned int N=1;
while(n>N) N<<=1;
return N;
}
// r must be no greater than 2^31
CEI::CEI(unsigned int r, unsigned int L)
{
if(r==0 || (r&(r-1))!=0)
{
r=RoundToPowerOfTwo(r);
}
if(L==0 || (L&(L-1))!=0)
{
L=RoundToPowerOfTwo(L);
}
m_r=r;
m_L=L;
m_pIDList=new unsigned int[(r<<1)+1];
m_pIDArray=NULL;
}
CEI::~CEI()
{
if(m_pIDList!=NULL) delete [] m_pIDList;
if(m_pIDArray!=NULL) delete [] m_pIDArray;
}
long long CEI::PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
std::vector< std::vector<unsigned int> > IDList(m_r<<1);
for(int i=0; i<(int)A.size(); i++)
{
B[i]++;
if(A[i]>=B[i]) continue;
unsigned int inf=(A[i]+m_L-1)&-m_L;
unsigned int sup=B[i]&-m_L;
if(inf>sup)
{
this->BestPartition(i, A[i], B[i], IDList);
}
else
{
for(unsigned int p=inf; p<sup; p+=m_L)
{
IDList[(p<<1)+1].push_back(i);
}
if(A[i]<inf) this->BestPartition(i, A[i], inf, IDList);
if(sup<B[i]) this->BestPartition(i, sup, B[i], IDList);
}
}
m_pIDList[0]=0;
for(int i=0; i<(int)IDList.size(); i++)
{
m_pIDList[i+1]=m_pIDList[i]+IDList[i].size();
}
m_pIDArray=new unsigned int[m_pIDList[2*m_r]];
for(int i=0; i<(int)IDList.size(); i++)
{
copy(IDList[i].begin(), IDList[i].end(), m_pIDArray+m_pIDList[i]);
}
long long iMemBytes=sizeof(unsigned int)*(2*m_r+1+m_pIDList[2*m_r]);
#ifdef DEBUG_CEI
printf("CEI membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
void CEI::BestPartition(unsigned int id, unsigned int a, unsigned int b, std::vector< std::vector<unsigned int> >& IDList)
{
unsigned int g=(a&-m_L)<<1;
while(a<b)
{
unsigned int t=(a&(m_L-1))+m_L;
unsigned int delta=1;
while((t&1)==0 && a+(delta<<1)<=b)
{
t>>=1;
delta<<=1;
}
IDList[g+t].push_back(id);
a+=delta;
}
}
static int compare(const void * a, const void * b)
{
return(*(unsigned int *)a - *(unsigned int *)b);
}
int CEI::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int g=(key&-m_L)<<1;
unsigned int n = 0;
for(unsigned int t=(key&(m_L-1))+m_L; t; t>>=1)
{
unsigned int c=g+t;
if(m_pIDList[c]<m_pIDList[c+1])
{
int s = m_pIDList[c+1] - m_pIDList[c];
unsigned int * id_array = m_pIDArray + m_pIDList[c];
for(int i = 0; i < s; i++)
{
if(n >= size)
{
qsort(result, n, sizeof(unsigned int), compare);
return n;
}
result[n++] = id_array[i];
}
}
}
qsort(result, n, sizeof(unsigned int), compare);
return n;
}

View File

@@ -0,0 +1,47 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_CEI_CPP_H
#define H_CEI_CPP_H
#include <limits.h>
#include "IntervalIndex.h"
#include <vector>
class CEI : public CIntervalIndex
{
public:
CEI(unsigned int r=65536, unsigned int L=64);
virtual ~CEI();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
void BestPartition(unsigned int id, unsigned int a, unsigned int b, std::vector< std::vector<unsigned int> >& IDList);
private:
unsigned int m_r; // maximum range of integer
unsigned int m_L; // segment size for each CEI virtual construct
unsigned int * m_pIDList;
unsigned int * m_pIDArray;
};
#endif

View File

@@ -0,0 +1,7 @@
add_definitions(-D_GNU_SOURCE)
add_definitions(-fPIC)
add_library(interval_index_static ACEI.cpp CEI.cpp DirectIndex.cpp FastIntervalIndex.cpp
Int128IntervalIndex.cpp IntervalIndex.cpp IntervalTree.cpp IPMaskIndex.cpp
NaiveIntervalIndex.cpp NaiveIntervalIndex2.cpp PortIndex.cpp SuccinctHash.cpp
sigmastar_tools.cpp)

View File

@@ -0,0 +1,159 @@
/*
*
* Copyright (c) 2008--2012
* Advanced Algorithm and Algorithm Engineering Group
* National Engineering Laboratory for Information Security Technologies (NELIST)
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-11-27
*
* This code is the exclusive and proprietary property of NELIST and IIE-CAS.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "DirectIndex.h"
#include <algorithm>
#include <map>
using namespace std;
//#define DEBUG_DIRECT_INDEX
CDirectIndex::CDirectIndex()
{
m_keys=NULL;
m_ptr=NULL;
m_values=NULL;
}
CDirectIndex::~CDirectIndex()
{
if(m_keys!=NULL) delete [] m_keys;
if(m_ptr!=NULL) delete [] m_ptr;
if(m_values!=NULL) delete [] m_values;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CDirectIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
long long mem_bytes=0;
unsigned long long count=0;
for(unsigned int i=0; i<a.size(); i++)
{
count+=((unsigned long long)b[i]-a[i]+1);
}
if(count>2*a.size()) return -1;
map<unsigned int, unsigned int> M;
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
M[(unsigned int)t]++;
}
}
m_keys =new unsigned int[M.size()];
m_ptr =new unsigned int[M.size()+1];
m_values=new unsigned int[(unsigned int)count];
mem_bytes+=sizeof(unsigned int)*(2*M.size()+(unsigned int)count);
m_ptr[0]=0;
map<unsigned int, unsigned int>::const_iterator it=M.begin();
for(unsigned int k=0; k<M.size(); k++, ++it)
{
m_keys[k]=it->first;
m_ptr[k+1]=m_ptr[k]+it->second;
}
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
unsigned int k=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), (unsigned int)t)-m_keys);
m_values[m_ptr[k]++]=i;
}
}
for(int k=(int)M.size(); k>0; k--)
{
m_ptr[k]=m_ptr[k-1];
}
m_ptr[0]=0;
m_min_key=m_keys[0];
m_max_key=m_keys[M.size()-1];
for(unsigned int k=0; k<M.size(); k++)
{
m_keys[k]-=m_min_key;
}
unsigned long long ONE=1;
unsigned int theta=0;
while((ONE<<(theta+16))<=m_keys[M.size()-1]) theta++;
m_theta=theta;
m_L[0]=0;
for(unsigned int i=1; i<65536; i++)
{
m_L[i]=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), i*(1U<<theta))-m_keys);
}
m_L[65536]=(unsigned int)M.size();
mem_bytes+=sizeof(m_L);
M.clear();
#ifdef DEBUG_DIRECT_INDEX
printf("Direct Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1u<<20));
#endif
return mem_bytes;
}
int CDirectIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if(key<m_min_key || key>m_max_key)
return 0;
key-=m_min_key;
unsigned int k=key>>m_theta;
int l=m_L[k], h=(int)m_L[k+1]-1;
if(h<l)
return 0;
while(l<=h)
{
int m=(l+h)/2;
if(key<m_keys[m]) h=m-1;
else l=m+1;
}
unsigned int n = 0;
if(h<(int)m_L[k] || m_keys[h]!=key)
{
return 0;
}
else
{
int s = m_ptr[h+1] - m_ptr[h];
unsigned int * values = m_values+m_ptr[h];
for(int i = 0; i < s; i++)
{
if(n >= size)
{
return n;
}
result[n++] = values[i];
}
}
return n;
}

View File

@@ -0,0 +1,44 @@
/*
*
* Copyright (c) 2008--2012
* Advanced Algorithm and Algorithm Engineering Group
* National Engineering Laboratory for Information Security Technologies (NELIST)
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-11-27
*
* This code is the exclusive and proprietary property of NELIST and IIE-CAS.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_DIRECT_INDEX_CPP_H
#define H_DIRECT_INDEX_CPP_H
#include "IntervalIndex.h"
class CDirectIndex : public CIntervalIndex
{
public:
CDirectIndex();
virtual ~CDirectIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
unsigned int m_L[65537];
unsigned int m_theta;
unsigned int m_min_key;
unsigned int m_max_key;
unsigned int * m_keys;
unsigned int * m_ptr;
unsigned int * m_values;
};
#endif

View File

@@ -0,0 +1,141 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_FAST_INTERVAL_INDEX
#include "FastIntervalIndex.h"
#include "NaiveIntervalIndex.h"
#include "NaiveIntervalIndex2.h"
#include <climits>
#include <algorithm>
#include <set>
#include <cassert>
using namespace std;
CFastIntervalIndex::CFastIntervalIndex(unsigned int r, unsigned int l)
{
assert(r>=l);
this->m_r=r;
this->m_l=l;
this->m_pIndexer=new CIntervalIndex*[1u<<(r-l)];
for(unsigned int i=0; i<(1u<<(r-l)); i++)
{
this->m_pIndexer[i]=NULL;
}
}
CFastIntervalIndex::~CFastIntervalIndex()
{
for(unsigned int i=0; i<(1u<<(m_r-m_l)); i++)
{
if(this->m_pIndexer[i]!=NULL)
{
delete this->m_pIndexer[i];
}
}
delete [] this->m_pIndexer;
}
long long CFastIntervalIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
m_IDs.resize(1u<<(m_r-m_l));
for(int i=0; i<(int)A.size(); i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
unsigned int p, q;
if(m_l==32) // in this case, the bit shift operator will mod 32 to 0.
{
p=0;
q=1;
}
else
{
p=A[i]>>m_l;
q= (B[i]&((1u<<m_l)-1))==0 ? (B[i]>>m_l) : (B[i]>>m_l)+1;
}
for(unsigned int j=p; j<q; j++) m_IDs[j].push_back(i);
}
for(int i=0, n=1<<(m_r-m_l); i<n; i++)
{
if(m_IDs[i].size()==0) continue;
vector<unsigned int> C, D;
for(int j=0; j<(int)m_IDs[i].size(); j++)
{
int id=m_IDs[i][j];
C.push_back(A[id]);
D.push_back(B[id]-1);
}
this->m_pIndexer[i]=new CNaiveIntervalIndex2;
iMemBytes+=this->m_pIndexer[i]->PreProcessing(C, D);
}
iMemBytes+=(1<<(m_r-m_l))*(int)sizeof(CIntervalIndex *);
#ifdef DEBUG_FAST_INTERVAL_INDEX
printf("Fast Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
int CFastIntervalIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if (n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
vector<unsigned int> v;
unsigned int t=(m_l<32) ? (key>>m_l) : 0;
CIntervalIndex * pIndexer=this->m_pIndexer[t];
if(pIndexer!=NULL)
{
int k = pIndexer->Find(key, result, size);
int i = 0;
for( i=0; i < k; i++)
result[i]=m_IDs[t][result[i]];
n = i;
}
}
return n;
}

View File

@@ -0,0 +1,42 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_FAST_INTERVAL_INDEX_CPP_H
#define H_FAST_INTERVAL_INDEX_CPP_H
#include "IntervalIndex.h"
class CFastIntervalIndex : public CIntervalIndex
{
public:
// the range is [0, 2^r), and we partition it into intervals of size 2^l
CFastIntervalIndex(unsigned int r=32, unsigned int l=16);
virtual ~CFastIntervalIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
std::vector<unsigned int> m_IndexForMaxInt;
unsigned int m_r;
unsigned int m_l;
CIntervalIndex ** m_pIndexer;
std::vector< std::vector<unsigned int> > m_IDs;
};
#endif

View File

@@ -0,0 +1,149 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "IPMaskIndex.h"
#include <stdio.h>
using namespace std;
//#define DEBUG_IPMASK_INDEX
bool is_8bit_ipmask(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
for(unsigned int i=0; i<a.size(); i++)
{
if( !(a[i]==b[i]) && !((a[i]&0xFF)==0 && b[i]==a[i]+255) ) return false;
}
return true;
}
CIPMaskIndex::CIPMaskIndex()
{
m_values=NULL;
m_ip_hash=NULL;
m_is_single=false;
}
CIPMaskIndex::~CIPMaskIndex()
{
if(m_values!=NULL) delete [] m_values;
if(m_ip_hash!=NULL) delete m_ip_hash;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CIPMaskIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
if(a.size()==0) return -1;
long long mem_bytes=0;
unsigned int J=0;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]==b[i]) J++;
}
m_is_single=(J==a.size());
if(J>0)
{
unsigned int * keys =new unsigned int[J];
unsigned int * values=new unsigned int[J];
J=0;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]==b[i])
{
keys[J]=a[i];
values[J]=i;
J++;
}
}
m_ip_hash=new CSuccinctHash;
long long ret=m_ip_hash->init(keys, values, J);
delete [] keys;
delete [] values;
if(ret<0)
{
delete m_ip_hash;
m_ip_hash=NULL;
return -1;
}
mem_bytes+=ret;
}
if(m_is_single) return mem_bytes;
for(unsigned int i=0; i<=(1U<<24); i++) m_L[i]=0;
for(unsigned int i=0; i<a.size(); i++)
{
if((a[i]&0xFF)==0 && b[i]==a[i]+255) // 8-bit<69><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
m_L[a[i]>>8]++;
}
}
for(unsigned int i=1; i<=(1U<<24); i++) m_L[i]+=m_L[i-1];
m_values=new unsigned int[m_L[1<<24]];
mem_bytes+=sizeof(unsigned int)*m_L[1<<24]+sizeof(m_L)+sizeof(m_bitmap);
for(unsigned int i=0; i<a.size(); i++)
{
if((a[i]&0xFF)==0 && b[i]==a[i]+255)
{
m_values[--m_L[a[i]>>8]]=i;
}
}
for(unsigned int i=0; i<(1<<24); i++)
{
if(m_L[i]<m_L[i+1]) m_bitmap[i>>3]|=(1U<<(i&7));
}
#ifdef DEBUG_IPMASK_INDEX
printf("IPMask Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1U<<20));
#endif
return mem_bytes;
}
int CIPMaskIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
int ret=0;
if(m_ip_hash!=NULL)
{
ret=m_ip_hash->find(key, result, size);
if(ret<0) return -1;
}
if(m_is_single) return ret;
result+=ret;
size-=ret;
key>>=8;
if((m_bitmap[key>>3]&(1U<<(key&7)))==0) return ret;
unsigned int n=m_L[key+1]-m_L[key];
if(n>size) n=size;
unsigned int * p=m_values+m_L[key];
for(unsigned int i=0; i<n; i++) *result++=*p++;
ret+=n;
return ret;
}

View File

@@ -0,0 +1,49 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IP<49>ֶΣ<D6B6><CEA3><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD>IP<49><50><EFBFBD><EFBFBD>8-bit<69><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IP */
#ifndef H_IPMASK_INDEX_CPP_H
#define H_IPMASK_INDEX_CPP_H
#include "IntervalIndex.h"
#include "SuccinctHash.h"
#include <vector>
using namespace std;
bool is_8bit_ipmask(const vector<unsigned int>& a, const vector<unsigned int>& b);
class CIPMaskIndex : public CIntervalIndex
{
public:
CIPMaskIndex();
virtual ~CIPMaskIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
bool m_is_single;
unsigned char m_bitmap[1U<<21];
unsigned int m_L[(1U<<24)+1];
unsigned int * m_values;
CSuccinctHash * m_ip_hash;
};
#endif

View File

@@ -0,0 +1,352 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-06-03
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "Int128IntervalIndex.h"
#include <algorithm>
#include <set>
#include <cassert>
#include <limits.h>
using namespace std;
//#define DEBUG_INT128_INTERVAL_INDEX
bool operator<(const uint128_t& lhs, const uint128_t& rhs)
{
int i=1;
while(i>=0 && lhs.I[i]==rhs.I[i]) --i;
return (i>=0 && lhs.I[i]<rhs.I[i]);
}
bool operator>(const uint128_t& lhs, const uint128_t& rhs)
{
return rhs<lhs;
}
bool operator>=(const uint128_t& lhs, const uint128_t& rhs)
{
return !(lhs<rhs);
}
bool operator==(const uint128_t& lhs, const uint128_t& rhs)
{
for(int i=0; i<2; i++)
{
if(lhs.I[i]!=rhs.I[i]) return false;
}
return true;
}
bool operator!=(const uint128_t& lhs, const uint128_t& rhs)
{
return !(lhs==rhs);
}
//ִ<><D6B4>a-b<><62>aΪlhs<68><73>bΪrhs
uint128_t operator-(const uint128_t& lhs, const uint128_t& rhs)
{
uint128_t l = rhs;
uint128_t h = lhs;
unsigned long long I[2] = {0};
if(h.I[0] < l.I[0])//<2F><>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С<EFBFBD><D0A1><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>λ
{
I[0] = ULONG_MAX - (l.I[0] - h.I[0]);
h.I[1]--;//<2F><><EFBFBD><EFBFBD>λ<EFBFBD>ˣ<EFBFBD><CBA3><EFBFBD>λ<EFBFBD><CEBB>Ҫ<EFBFBD><D2AA>1
}
else
{
I[0] = h.I[0] - l.I[0];
}
I[1] = h.I[1] - l.I[1];
unsigned int a[4];
a[3] = I[0] & 0xFFFFFFFF;
a[2] = (I[0]>>32) & 0xFFFFFFFF;
a[1] = I[1] & 0xFFFFFFFF;
a[0] = (I[1]>>32) & 0xFFFFFFFF;
uint128_t b(a);
return b;
}
unsigned int ipv6_hash(const uint128_t * ip)
{
unsigned long long I=(ip->I[0])^(ip->I[1]);
return (I&0xFFFFFFFF)^(I>>32);
}
CInt128IntervalIndex::CInt128IntervalIndex()
{
m_array=NULL;
m_iEndPointsNum=0;
m_pEndPoints=NULL;
m_pIDPtr=NULL;
m_pIDList=NULL;
m_IndexForMaxInt=NULL;
m_IndexForWholeInterval = NULL;
}
CInt128IntervalIndex::~CInt128IntervalIndex()
{
if(m_array!=NULL) delete [] m_array;
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
if(this->m_pIDPtr!=NULL)
{
delete [] this->m_pIDPtr;
}
if(this->m_IndexForMaxInt!=NULL)
{
delete [] this->m_IndexForMaxInt;
}
if(this->m_IndexForWholeInterval != NULL)
{
delete [] this->m_IndexForWholeInterval;
}
}
long long CInt128IntervalIndex::PreProcessing(const vector<uint128_t>& a, const vector<uint128_t>& b)
{
if(a.size()==0) return -1;
m_is_single=true;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]!=b[i])
{
m_is_single=false;
break;
}
}
if(m_is_single)
{
return process_single(a);
}
else
{
return process_interval(a, b);
}
}
long long CInt128IntervalIndex::process_single(const vector<uint128_t>& a)
{
long long mem_bytes=0;
unsigned int num=a.size();
unsigned int * keys =new unsigned int[num];
unsigned int * values=new unsigned int[num];
m_array=new uint128_t[num];
mem_bytes+=(2*sizeof(unsigned int)+sizeof(uint128_t))*num;
for(unsigned int i=0; i<num; i++)
{
keys[i]=ipv6_hash(&a[i]);
values[i]=i;
m_array[i]=a[i];
}
long long ret=m_ip_hash.init(keys, values, num);
delete [] keys;
delete [] values;
if(ret<0) return -1;
mem_bytes+=ret;
return mem_bytes;
}
long long CInt128IntervalIndex::process_interval(const vector<uint128_t>& a, const vector<uint128_t>& b)
{
vector<uint128_t> A=a, B=b;
long long iMemBytes=0;
set<uint128_t> s;
vector<unsigned int> IndexForMaxInt;
vector<unsigned int> IndexForWholeInterval;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>B[i]) continue;
if(B[i].is_all_ones())
{
IndexForMaxInt.push_back(i);
--B[i];
}
++B[i]; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
if(A[i].is_all_zeros() && B[i].is_all_ones())
{
IndexForWholeInterval.push_back(i);
continue;
}
s.insert(A[i]);
s.insert(B[i]);
}
m_IndexForWholeInterval=new unsigned int[IndexForWholeInterval.size()+1];
m_IndexForWholeInterval[0]=IndexForWholeInterval.size();
copy(IndexForWholeInterval.begin(), IndexForWholeInterval.end(), m_IndexForWholeInterval+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForWholeInterval.size()+1));
m_IndexForMaxInt=new unsigned int[IndexForMaxInt.size()+1];
m_IndexForMaxInt[0]=IndexForMaxInt.size();
copy(IndexForMaxInt.begin(), IndexForMaxInt.end(), m_IndexForMaxInt+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForMaxInt.size()+1));
this->m_iEndPointsNum=(long long)s.size();
this->m_pEndPoints=new uint128_t[m_iEndPointsNum];
copy(s.begin(), s.end(), m_pEndPoints);
iMemBytes+=(long long)(m_iEndPointsNum*sizeof(uint128_t));
vector<long long> count(m_iEndPointsNum, 0);
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue;
long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(long long j=l; j<h; j++) count[j]++;
}
m_pIDPtr=new long long[m_iEndPointsNum+1];
m_pIDPtr[0]=0;
for(long long i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i+1]=m_pIDPtr[i]+count[i];
}
iMemBytes+=(long long)((m_iEndPointsNum+1)*sizeof(unsigned int));
m_pIDList=new unsigned int[m_pIDPtr[m_iEndPointsNum]];
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue;
long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(long long j=l; j<h; j++)
{
m_pIDList[m_pIDPtr[j]++]=i;
}
}
iMemBytes+=(long long)(m_pIDPtr[m_iEndPointsNum]*sizeof(unsigned int));
for(long long i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i]-=count[i];
}
int k=0;
unsigned int t[4] = {0,0,0,0};
for(unsigned int i=0; i<65536; i++)
{
t[3] = i;
uint128_t x(t);
x = x<<112;
while(k<m_iEndPointsNum && x>=m_pEndPoints[k])
k++;
m_L[i] = (k>=1)? k-1 : 0;
}
m_L[65536]=m_iEndPointsNum-1;
iMemBytes+=(long long)sizeof(m_L);
#ifdef DEBUG_INT128_INTERVAL_INDEX
printf("Int128 Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
A.clear();
B.clear();
s.clear();
IndexForMaxInt.clear();
return iMemBytes;
}
int CInt128IntervalIndex::Find(const uint128_t * key, unsigned int * result, unsigned int size)
{
if(m_is_single)
{
return Find_single(key, result, size);
}
else
{
return Find_interval(key, result, size);
}
}
int CInt128IntervalIndex::Find_single(const uint128_t * key, unsigned int * result, unsigned int size)
{
unsigned int h=ipv6_hash(key);
int ret=m_ip_hash.find(h, result, size);
int j=0;
for(int i=0; i<ret; i++)
{
if(*key==m_array[result[i]]) result[j++]=result[i];
}
return j;
}
int CInt128IntervalIndex::Find_interval(const uint128_t * key, unsigned int * result, unsigned int size)
{
if(key->is_all_ones())
{
unsigned int s=m_IndexForMaxInt[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++) *result++=m_IndexForMaxInt[i];
return s;
}
else
{
unsigned int s=m_IndexForWholeInterval[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++)
{
*result++=m_IndexForWholeInterval[i];
}
size-=s;
uint128_t t = (*key)>>112;
unsigned int k = t.I[0]&0xffff;
long long l=m_L[k], h=m_L[k+1];
long long m=0;
while(l<=h && m<m_iEndPointsNum)
{
m=(l+h)>>1;
if(*key>=m_pEndPoints[m]) l=m+1;
else h=m-1;
}
if(h>=m_L[k] && h<m_iEndPointsNum)
{
long long n=m_pIDPtr[h+1]-m_pIDPtr[h];
if(n>size) n=size;
unsigned int * id_list=m_pIDList+m_pIDPtr[h];
for(unsigned int i=0; i<n; i++) *result++=*id_list++;
s+=n;
}
return s;
}
}

View File

@@ -0,0 +1,149 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-06-03
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_INT128_INTERVAL_INDEX_CPP_H
#define H_INT128_INTERVAL_INDEX_CPP_H
#include "SuccinctHash.h"
#include <vector>
using namespace std;
struct uint128_t
{
unsigned long long I[2];//<2F><>λ-><3E><>λ<EFBFBD><CEBB>I[1]I[0]
uint128_t(const unsigned int * a=NULL)
{
if(a!=NULL)
{
I[0] = a[2];
I[0] = (I[0]<<32)|a[3];
I[1] = a[0];
I[1] = (I[1]<<32)|a[1];
}
}
uint128_t& operator=(const uint128_t& rhs)
{
for(int i=0; i<2; i++) I[i]=rhs.I[i];
return *this;
}
uint128_t& operator--()
{
if(I[0]==0) I[1]--;
I[0]--;
return *this;
}
uint128_t& operator++()
{
I[0]++;
if(I[0]==0) I[1]++;
return *this;
}
uint128_t operator<<(int n) const
{
uint128_t t = *this;
int k=n>>6;
n&=63;
t.I[1]=(t.I[1]<<n)|(t.I[0]>>(64-n));
t.I[0]<<=n;
for(int j=1; j>=k; j--) t.I[j]=t.I[j-k];
for(int i = k -1; i>=0; i--) t.I[i] = 0;
return t;
}
uint128_t operator>>(int n) const
{
uint128_t t = *this;
int k=n>>6;
n&=63;
t.I[0]=(t.I[0]>>n)|(t.I[1]<<(64-n));
t.I[1]>>=n;
for(int j = 0; j <= 1-k; j++) t.I[j] = t.I[j+k];
for(int i = 1; i > 1-k; i--) t.I[i] = 0;
return t;
}
void ornot(unsigned int * mask)
{
unsigned long long m = mask[1];
m = (m<<32)|mask[0];
unsigned long long n = mask[3];
n = (n<<32)|mask[2];
I[0] |= ~m;
I[1] |= ~n;
}
bool is_all_zeros() const
{
return (I[0] == 0)&&(I[1] == 0);
}
bool is_all_ones() const
{
return ((~I[0])==0)&&((~I[1])==0);
}
};
bool operator<(const uint128_t& lhs, const uint128_t& rhs);
bool operator>(const uint128_t& lhs, const uint128_t& rhs);
bool operator==(const uint128_t& lhs, const uint128_t& rhs);
bool operator!=(const uint128_t& lhs, const uint128_t& rhs);
bool operator>=(const uint128_t& lhs, const uint128_t& rhs);
uint128_t operator-(const uint128_t& lhs, const uint128_t& rhs);
class CInt128IntervalIndex
{
public:
CInt128IntervalIndex();
~CInt128IntervalIndex();
long long PreProcessing(const vector<uint128_t>& a, const vector<uint128_t>& b);
int Find(const uint128_t * key, unsigned int * result, unsigned int size);
private:
long long process_single(const vector<uint128_t>& a);
int Find_single(const uint128_t * key, unsigned int * result, unsigned int size);
long long process_interval(const vector<uint128_t>& a, const vector<uint128_t>& b);
int Find_interval(const uint128_t * key, unsigned int * result, unsigned int size);
private:
bool m_is_single;
CSuccinctHash m_ip_hash;
uint128_t * m_array;
unsigned int * m_IndexForMaxInt;
long long m_iEndPointsNum;
uint128_t * m_pEndPoints;
long long * m_pIDPtr;
unsigned int * m_pIDList;
int m_L[65537];
unsigned int * m_IndexForWholeInterval;
};
#endif

View File

@@ -0,0 +1,25 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#include "IntervalIndex.h"
CIntervalIndex::CIntervalIndex()
{
}
CIntervalIndex::~CIntervalIndex()
{
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_INTERVAL_INDEX_CPP_H
#define H_INTERVAL_INDEX_CPP_H
#include <vector>
class CIntervalIndex
{
public:
CIntervalIndex();
virtual ~CIntervalIndex();
/*
closed interval: [ a[i] , b[i] ] such that a[i]<=b[i]
*/
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b)=0;
/*
report the indexes of intervals that contain the key.
*/
virtual int Find(unsigned int key, unsigned int * result, unsigned int size)=0;//changed by luyuhai, 2015.11.09
};
#endif

View File

@@ -0,0 +1,202 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_INTERVAL_TREE
#include "IntervalTree.h"
#include <climits>
#include <queue>
#include <algorithm>
#include <iterator>
#include <set>
#include <cassert>
using namespace std;
CIntervalTree::CIntervalTree()
{
this->m_pstRoot=NULL;
this->m_uiNodeNum=0;
}
CIntervalTree::~CIntervalTree()
{
if(m_pstRoot==NULL) return;
queue<stIntervalNode *> Q;
for(Q.push(this->m_pstRoot); !Q.empty(); Q.pop())
{
stIntervalNode * pstCurrNode=Q.front();
if(pstCurrNode->lchild!=NULL) Q.push(pstCurrNode->lchild);
if(pstCurrNode->rchild!=NULL) Q.push(pstCurrNode->rchild);
delete pstCurrNode;
this->m_uiNodeNum--;
}
assert(this->m_uiNodeNum==0);
}
CIntervalTree::stIntervalNode * CIntervalTree::BuildBalancedTree(unsigned int a[], unsigned int n)
{
stIntervalNode * pstNode=new stIntervalNode;
this->m_uiNodeNum++;
if(n==0)
{
pstNode->isleaf=true;
pstNode->seperator=0;
pstNode->lchild=NULL;
pstNode->rchild=NULL;
}
else
{
unsigned int i=(n-1)/2;
pstNode->isleaf=false;
pstNode->seperator=a[i];
pstNode->lchild=this->BuildBalancedTree(a, i);
pstNode->rchild=this->BuildBalancedTree(a+i+1, n-i-1);
}
return pstNode;
}
void CIntervalTree::AddInterval(stIntervalNode * pstCurrNode, unsigned int inf, unsigned int sup,
unsigned int a, unsigned int b, unsigned int id)
{
assert(inf<=a && b<=sup);
while(1)
{
assert(pstCurrNode!=NULL);
if(a==inf && b==sup)
{
pstCurrNode->ids.push_back(id);
this->m_iMemBytes+=sizeof(unsigned int);
return;
}
else if(a>=pstCurrNode->seperator)
{
inf=pstCurrNode->seperator;
pstCurrNode=pstCurrNode->rchild;
}
else if(b<=pstCurrNode->seperator)
{
sup=pstCurrNode->seperator;
pstCurrNode=pstCurrNode->lchild;
}
else
{
this->AddInterval(pstCurrNode->lchild, inf, pstCurrNode->seperator, a, pstCurrNode->seperator, id);
this->AddInterval(pstCurrNode->rchild, pstCurrNode->seperator, sup, pstCurrNode->seperator, b, id);
return;
}
}
}
long long CIntervalTree::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
this->m_iMemBytes=0;
set<unsigned int> s;
for(int i=0, n=(int)A.size(); i<n; i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
s.insert(A[i]);
s.insert(B[i]);
}
this->m_iMemBytes+=(long long)(sizeof(unsigned int)*this->m_IndexForMaxInt.size());
vector<unsigned int> endpoints;
copy(s.begin(), s.end(), back_inserter(endpoints));
this->m_pstRoot=this->BuildBalancedTree(&endpoints[0], (unsigned int)endpoints.size());
assert(this->m_uiNodeNum==2*(unsigned int)endpoints.size()+1);
this->m_iMemBytes+=sizeof(stIntervalNode)*m_uiNodeNum;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i]) continue;
this->AddInterval(m_pstRoot, 0, UINT_MAX, A[i], B[i], i);
}
#ifdef DEBUG_INTERVAL_TREE
printf("Interval Tree membyte=%5.3lf (MB).\n", (double)m_iMemBytes/(1u<<20));
#endif
return m_iMemBytes;
}
static int compare(const void * a, const void * b)
{
return(*(unsigned int *)a - *(unsigned int *)b);
}
int CIntervalTree::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if (n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
stIntervalNode * pstCurrNode=this->m_pstRoot;
while(true)
{
vector<unsigned int>::iterator it = pstCurrNode->ids.begin();
while(it != pstCurrNode->ids.end())
{
if(n >= size)
{
qsort(result, n, sizeof(unsigned int), compare);
return n;
}
result[n++] = *it;
it++;
}
if(pstCurrNode->isleaf) break;
pstCurrNode= (key<pstCurrNode->seperator) ? pstCurrNode->lchild : pstCurrNode->rchild;
}
qsort(result, n, sizeof(unsigned int), compare);
}
return n;
}

View File

@@ -0,0 +1,56 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_INTERVAL_TREE_CPP_H
#define H_INTERVAL_TREE_CPP_H
#include "IntervalIndex.h"
class CIntervalTree : public CIntervalIndex
{
public:
struct stIntervalNode
{
bool isleaf;
unsigned int seperator;
std::vector<unsigned int> ids;
stIntervalNode * lchild;
stIntervalNode * rchild;
};
public:
CIntervalTree();
virtual ~CIntervalTree();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
stIntervalNode * BuildBalancedTree(unsigned int a[], unsigned int n);
void AddInterval(stIntervalNode * pstCurrNode, unsigned int inf, unsigned int sup,
unsigned int a, unsigned int b, unsigned int id);
private:
stIntervalNode * m_pstRoot;
unsigned int m_uiNodeNum;
long long m_iMemBytes;
std::vector<unsigned int> m_IndexForMaxInt;
};
#endif

View File

@@ -0,0 +1,225 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2014-11-19
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_NAIVE_INTERVAL_INDEX
#include "NaiveIntervalIndex.h"
#include <climits>
#include <algorithm>
#include <set>
#include <cassert>
using namespace std;
CNaiveIntervalIndex::CNaiveIntervalIndex()
{
this->m_pDirectIndexer=NULL;
this->m_iEndPointsNum=0;
this->m_pEndPoints=NULL;
this->m_pIDPtr=NULL;
this->m_pIDList=NULL;
this->m_IndexForMaxInt=NULL;
this->m_IndexForWholeInterval = NULL;
}
CNaiveIntervalIndex::~CNaiveIntervalIndex()
{
if(this->m_pDirectIndexer!=NULL)
{
delete this->m_pDirectIndexer;
}
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
if(this->m_pIDPtr!=NULL)
{
delete [] this->m_pIDPtr;
}
if(this->m_IndexForMaxInt!=NULL)
{
delete [] this->m_IndexForMaxInt;
}
if(this->m_IndexForWholeInterval != NULL)
{
delete [] this->m_IndexForWholeInterval;
}
}
long long CNaiveIntervalIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
CDirectIndex * instance=new CDirectIndex;
long long ret=instance->PreProcessing(a, b);
if(ret>=0)
{
m_pDirectIndexer=instance;
return ret;
}
delete instance;
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
set<unsigned int> s;
vector<unsigned int> IndexForMaxInt;
vector<unsigned int> IndexForWholeInterval;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>B[i]) continue;
if(B[i]==UINT_MAX)
{
IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
if(A[i]==0 && B[i]==UINT_MAX)
{
IndexForWholeInterval.push_back(i);
continue;
}
s.insert(A[i]);
s.insert(B[i]);
}
m_IndexForWholeInterval=new unsigned int[IndexForWholeInterval.size()+1];
m_IndexForWholeInterval[0]=IndexForWholeInterval.size();
copy(IndexForWholeInterval.begin(), IndexForWholeInterval.end(), m_IndexForWholeInterval+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForWholeInterval.size()+1));
m_IndexForMaxInt=new unsigned int[IndexForMaxInt.size()+1];
m_IndexForMaxInt[0]=IndexForMaxInt.size();
copy(IndexForMaxInt.begin(), IndexForMaxInt.end(), m_IndexForMaxInt+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForMaxInt.size()+1));
this->m_iEndPointsNum=(int)s.size();
this->m_pEndPoints=new unsigned int[m_iEndPointsNum];
copy(s.begin(), s.end(), m_pEndPoints);
iMemBytes+=(long long)(m_iEndPointsNum*sizeof(unsigned int));
vector<unsigned int> count(m_iEndPointsNum, 0);
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i] == 0 && B[i] == UINT_MAX)) continue;
int l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
int h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(int j=l; j<h; j++) count[j]++;
}
m_pIDPtr=new unsigned int[m_iEndPointsNum+1];
m_pIDPtr[0]=0;
for(int i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i+1]=m_pIDPtr[i]+count[i];
}
iMemBytes+=(long long)((m_iEndPointsNum+1)*sizeof(unsigned int));
m_pIDList=new unsigned int[m_pIDPtr[m_iEndPointsNum]];
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i] == 0 && B[i] == UINT_MAX)) continue;
int l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
int h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(int j=l; j<h; j++)
{
m_pIDList[m_pIDPtr[j]++]=i;
}
}
iMemBytes+=(long long)(m_pIDPtr[m_iEndPointsNum]*sizeof(unsigned int));
for(int i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i]-=count[i];
}
int k=0;
for(unsigned int i=0; i<65536; i++)
{
unsigned int x=(i<<16);
while(k<m_iEndPointsNum && x>=m_pEndPoints[k]) k++;
m_L[i] = (k>=1)? k-1 : 0;
}
m_L[65536]=m_iEndPointsNum-1;
iMemBytes+=(long long)sizeof(m_L);
#ifdef DEBUG_NAIVE_INTERVAL_INDEX
printf("Naive Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
A.clear();
B.clear();
s.clear();
IndexForMaxInt.clear();
return iMemBytes;
}
int CNaiveIntervalIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if(m_pDirectIndexer!=NULL) return m_pDirectIndexer->Find(key, result, size);
if(key==UINT_MAX)
{
unsigned int s=m_IndexForMaxInt[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++) *result++=m_IndexForMaxInt[i];
return s;
}
else
{
unsigned int s=m_IndexForWholeInterval[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++)
{
*result++=m_IndexForWholeInterval[i];
}
size-=s;
unsigned int k=(key>>16);
int l=m_L[k], h=m_L[k+1];
int m=0;
while(l<=h && m<m_iEndPointsNum)
{
m=(l+h)>>1;
if(key>=m_pEndPoints[m]) l=m+1;
else h=m-1;
}
if(h>=m_L[k] && h<m_iEndPointsNum)
{
unsigned int n=m_pIDPtr[h+1]-m_pIDPtr[h];
if(n>size) n=size;
unsigned int * id_list=m_pIDList+m_pIDPtr[h];
for(unsigned int i=0; i<n; i++) *result++=*id_list++;
s+=n;
}
return s;
}
}

View File

@@ -0,0 +1,45 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_NAIVE_INTERVAL_INDEX_CPP_H
#define H_NAIVE_INTERVAL_INDEX_CPP_H
#include "IntervalIndex.h"
#include "DirectIndex.h"
class CNaiveIntervalIndex : public CIntervalIndex
{
public:
CNaiveIntervalIndex();
virtual ~CNaiveIntervalIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
CDirectIndex * m_pDirectIndexer;
unsigned int * m_IndexForMaxInt;
int m_iEndPointsNum;
unsigned int * m_pEndPoints;
unsigned int * m_pIDPtr;
unsigned int * m_pIDList;
int m_L[65537];
unsigned int * m_IndexForWholeInterval;
};
#endif

View File

@@ -0,0 +1,179 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_NAIVE_INTERVAL_INDEX2
#include "NaiveIntervalIndex2.h"
#include <climits>
#include <algorithm>
#include <iterator>
#include <set>
#include <cassert>
using namespace std;
CNaiveIntervalIndex2::CNaiveIntervalIndex2()
{
this->m_N=1;
this->m_pEndPoints=NULL;
this->m_pIDList=NULL;
}
CNaiveIntervalIndex2::~CNaiveIntervalIndex2()
{
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
}
long long CNaiveIntervalIndex2::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
set<unsigned int> s;
for(int i=0, n=(int)A.size(); i<n; i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
s.insert(A[i]);
s.insert(B[i]);
}
iMemBytes+=(long long)(sizeof(unsigned int)*this->m_IndexForMaxInt.size());
int M=(int)s.size();
this->m_N=1;
while(m_N<=M) m_N<<=1;
this->m_pEndPoints=new unsigned int[m_N];
this->m_pIDList=new vector<unsigned int>[m_N];
vector<unsigned int> v;
copy(s.begin(), s.end(), back_inserter(v));
for(int i=M; i<m_N-1; i++) v.push_back(UINT_MAX);
int k=1;
for(int d=m_N>>1; d>0; d>>=1)
{
for(int j=1; d*j<m_N; j+=2)
{
this->m_pEndPoints[k++]=v[d*j-1];
}
}
assert(k==m_N);
iMemBytes+=m_N*(int)(sizeof(unsigned int)+sizeof(vector<unsigned int>));
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i]) continue;
int p =1;
while(p<m_N)
{
if(A[i]<m_pEndPoints[p])
{
p<<=1;
}
else
{
p=(p<<1)+1;
}
}
int q =1;
while(q<m_N)
{
if(B[i]<m_pEndPoints[q])
{
q<<=1;
}
else
{
q=(q<<1)+1;
}
}
for(int j=p; j<q; j++) this->m_pIDList[j-m_N].push_back(i);
iMemBytes+=(q-p)*(int)sizeof(unsigned int);
}
#ifdef DEBUG_NAIVE_INTERVAL_INDEX2
printf("Naive Interval Index-2 membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
int CNaiveIntervalIndex2::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if(n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
int i=1;
while(i<m_N)
{
if(key<m_pEndPoints[i])
{
i<<=1;
}
else
{
i=(i<<1)+1;
}
}
s = m_pIDList[i - m_N].size();
for(int j = 0; j < s; j++)
{
if(n >= size)
{
return n;
}
result[n++] = m_pIDList[i - m_N][j];
}
}
return n;
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_NAIVE_INTERVAL_INDEX_2_CPP_H
#define H_NAIVE_INTERVAL_INDEX_2_CPP_H
#include "IntervalIndex.h"
class CNaiveIntervalIndex2 : public CIntervalIndex
{
public:
CNaiveIntervalIndex2();
virtual ~CNaiveIntervalIndex2();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
std::vector<unsigned int> m_IndexForMaxInt;
int m_N;
unsigned int * m_pEndPoints;
std::vector<unsigned int> * m_pIDList;
};
#endif

View File

@@ -0,0 +1,81 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "PortIndex.h"
#include <stdlib.h>
#include <string.h>
using namespace std;
//#define DEBUG_PORT_INDEX
CPortIndex::CPortIndex()
{
m_values=NULL;
memset(m_bitmap, 0, sizeof(m_bitmap));
}
CPortIndex::~CPortIndex()
{
if(m_values!=NULL) delete [] m_values;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CPortIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
long long mem_bytes=0;
for(unsigned int i=0; i<=65536; i++) m_L[i]=0;
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned int t=a[i]; t<=b[i]; t++) m_L[t]++;
}
for(unsigned int i=1; i<=65536; i++) m_L[i]+=m_L[i-1];
m_values=new unsigned int[m_L[65536]];
mem_bytes+=sizeof(unsigned int)*m_L[65536]+sizeof(m_L)+sizeof(m_bitmap);
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned int t=a[i]; t<=b[i]; t++) m_values[--m_L[t]]=i;
}
for(unsigned int i=0; i<65536; i++)
{
if(m_L[i]<m_L[i+1]) m_bitmap[i>>3]|=(1U<<(i&7));
}
#ifdef DEBUG_PORT_INDEX
printf("Port Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1U<<20));
#endif
return mem_bytes;
}
int CPortIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if((m_bitmap[key>>3]&(1U<<(key&7)))==0) return 0;
unsigned int n=m_L[key+1]-m_L[key];
if(n>size) n=size;
unsigned int * p=m_values+m_L[key];
for(unsigned int i=0; i<n; i++) *result++=*p++;
return n;
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_PORT_INDEX_CPP_H
#define H_PORT_INDEX_CPP_H
#include "IntervalIndex.h"
class CPortIndex : public CIntervalIndex
{
public:
CPortIndex();
virtual ~CPortIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
unsigned char m_bitmap[8192];
unsigned int m_L[65537];
unsigned int * m_values;
};
#endif

View File

@@ -0,0 +1,156 @@
/*
*
* Copyright (c) 2008--2015
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2015-12-9
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "SuccinctHash.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
static const unsigned long long ONE=1;
static const unsigned int B=22695477;
inline unsigned int myhash(unsigned int key)
{
//return key;
unsigned int h=0;
FOR(j, 4)
{
h=h*B+(key&255);
key>>=8;
}
return h;
}
CSuccinctHash::CSuccinctHash()
{
m_RT=NULL;
m_kv_array=NULL;
m_kv_ptr = NULL;
}
CSuccinctHash::~CSuccinctHash()
{
if(m_RT!=NULL)
{
aligned_free(m_RT);
}
if(m_kv_array!=NULL)
{
delete [] m_kv_array;
}
if(m_kv_ptr != NULL)
{
delete []m_kv_ptr;
}
}
long long CSuccinctHash::init(unsigned int keys[], unsigned int values[], unsigned int num)
{
m_hash_bits=(int)(log10((double)num)/log10(2.0))+4;
if(m_hash_bits<8) m_hash_bits=8;
//printf("m_hash_bits=%d\n", m_hash_bits);
m_RT=(packedRT_t *)aligned_malloc(sizeof(packedRT_t)*((1U<<(m_hash_bits-8))+1), 64);
FOR(i, (1U<<(m_hash_bits-8)))
{
FOR(j, 4) m_RT[i].bitmap[j]=0;
}
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
m_RT[h>>8].bitmap[q>>6]|=(ONE<<(q&63));
}
m_RT[0].A=0;
FOR(i, (1U<<(m_hash_bits-8)))
{
m_RT[i].B[0]=0;
m_RT[i].B[1]= popcnt_u64(m_RT[i].bitmap[0]);
m_RT[i].B[2]= m_RT[i].B[1]+popcnt_u64(m_RT[i].bitmap[1]);
m_RT[i].B[3]= m_RT[i].B[2]+popcnt_u64(m_RT[i].bitmap[2]);
m_RT[i+1].A=m_RT[i].A+m_RT[i].B[3]+popcnt_u64(m_RT[i].bitmap[3]);
}
int tn=m_RT[(1U<<(m_hash_bits-8))].A;
m_kv_ptr=new unsigned int[tn+1];
FOR(i, tn+1) m_kv_ptr[i]=0;
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
m_kv_ptr[idx]++;
}
FOR(i, tn) m_kv_ptr[i+1]+=m_kv_ptr[i];
m_kv_array=new unsigned int[2*num];
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
unsigned int j=--m_kv_ptr[idx];
m_kv_array[2*j]=keys[i];
m_kv_array[2*j+1]=values[i];
}
long long mem_bytes=(1U<<(m_hash_bits-3))*sizeof(char)*5/4+(tn+1+2*num)*sizeof(unsigned int);
return mem_bytes;
}
unsigned int CSuccinctHash::rank(unsigned int h)
{
int p=(h>>8);
int r=((h&255)>>6);
int s=(h&63);
unsigned long long e=m_RT[p].bitmap[r]&((ONE<<s)-1);
return m_RT[p].A+m_RT[p].B[r]+popcnt_u64(e);
}
int CSuccinctHash::find(unsigned int key, unsigned int * value, unsigned int size)
{
unsigned int h=myhash(key);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
unsigned int r=0;
if(m_RT[h>>8].bitmap[q>>6]&(ONE<<(q&63)))
{
unsigned int idx=rank(h);
for(unsigned int j=m_kv_ptr[idx]; j<m_kv_ptr[idx+1]; j++)
{
if(m_kv_array[2*j]==key)
{
if(r==size) return r;
*value++=m_kv_array[2*j+1];
r++;
}
}
}
return r;
}

View File

@@ -0,0 +1,42 @@
/*
*
* Copyright (c) 2008--2015
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2015-12-9
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_SUCCINCT_HASH_CPP_H
#define H_SUCCINCT_HASH_CPP_H
#include "sigmastar_tools.h"
class CSuccinctHash
{
public:
CSuccinctHash();
~CSuccinctHash();
long long init(unsigned int keys[], unsigned int values[], unsigned int num);
int find(unsigned int key, unsigned int * value, unsigned int size);
protected:
unsigned int rank(unsigned int h);
protected:
unsigned int m_hash_bits;
packedRT_t * m_RT;
unsigned int * m_kv_ptr;
unsigned int * m_kv_array;
};
#endif // H_SUCCINCT_HASH_CPP_H

View File

@@ -0,0 +1,72 @@
#include "sigmastar_tools.h"
#include <stdlib.h>
#include <stdio.h>
void * aligned_malloc(size_t size, size_t align)
{
void * malloc_ptr;
void * aligned_ptr;
/* Error if align is not a power of two. */
if (align & (align - 1))
{
return ((void*) 0);
}
if (align==0 || size == 0)
{
return ((void *) 0);
}
malloc_ptr = malloc (sizeof(void *) + align - 1 + size);
if (!malloc_ptr)
{
return ((void *) 0);
}
aligned_ptr = (void *) (((size_t)malloc_ptr + sizeof(void *) + align-1) & ~(align-1));
((void **) aligned_ptr) [-1] = malloc_ptr;
return aligned_ptr;
}
void aligned_free(void * aligned_ptr)
{
if (aligned_ptr)
{
free (((void **) aligned_ptr) [-1]);
}
}
unsigned char popcnt_u16[65536];
unsigned char popcnt_u32(unsigned int x)
{
char c=0;
while(x)
{
if(x&1) c++;
x>>=1;
}
return c;
}
#ifndef USE_SSE_INSTR
int popcnt_u64(unsigned long long x)
{
return popcnt_u16[x&0xFFFF]
+popcnt_u16[(x>>16)&0xFFFF]
+popcnt_u16[(x>>32)&0xFFFF]
+popcnt_u16[(x>>48)&0xFFFF];
}
#endif
int initialize_sigmastar_tools()
{
for(unsigned int i=0; i<65536; i++)
{
popcnt_u16[i]=popcnt_u32(i);
}
return 1;
}

View File

@@ -0,0 +1,56 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_SIGMA_STAR_TOOLS_H
#define H_SIGMA_STAR_TOOLS_H
#include <stdlib.h>
#include <vector>
#include <string>
#include <map>
//#include "./StringMatch/include/string_matching.h"
using namespace std;
//#define DEBUG_PARTITION
int initialize_sigmastar_tools();
struct packedRT_t
{
unsigned long long bitmap[4];
unsigned int A;
unsigned char B[4];
};
void * aligned_malloc(size_t size, size_t align);
void aligned_free(void * aligned_ptr);
#if (defined __linux__) && (defined __SSE4_2__)
#define USE_SSE_INSTR
#endif
#ifdef USE_SSE_INSTR
#include <nmmintrin.h>
#define popcnt_u64 _mm_popcnt_u64
#else
int popcnt_u64(unsigned long long x);
#endif
#define FOR(i, n) for(int i=0, _n=(int)(n); i<_n; i++)
#endif

View File

@@ -0,0 +1,184 @@
#include <unistd.h>
#include <sys/syscall.h>
#include <list>
#include <map>
#include "ip_matcher.h"
#include "rule_match.h"
#include "ipv4_match.h"
#include "ipv6_match.h"
using namespace std;
pid_t ip_matcher_gettid()
{
return syscall(SYS_gettid);
}
static const char *ip_matcher_module_name_str(const char *name)
{
static __thread char module[64];
snprintf(module,sizeof(module),"%s(%d)", name, ip_matcher_gettid());
return module;
}
#define MODULE_IP_MATCHER ip_matcher_module_name_str("maat.ip_matcher")
int ipmatcher_VERSION_2020_05_13 = 0;
struct ip_matcher
{
CRuleMatch * ipv4_matcher;
CRuleMatch * ipv6_matcher;
struct log_handle *logger;
#ifdef RULESCAN_DEBUG
//for test
double search_time;
int search_cnt;
#endif
};
CRuleMatch * new_rule_matcher(enum IP_TYPE type)
{
if(type==IPv4)
{
return new CIPv4Match();
}
else if(type==IPv6)
{
return new CIPv6Match();
}
else
{
return NULL;
}
}
struct ip_matcher * ip_matcher_new(struct ip_rule * rules, size_t rule_num,
size_t * mem_use, struct log_handle *logger)
{
if(rules == NULL || rule_num == 0)
{
log_error(logger, MODULE_IP_MATCHER,
"[%s:%d]: ip_matcher_new() failed, for param is wrong!",
__FILE__, __LINE__);
return NULL;
}
long long mem_bytes=0;
struct ip_matcher * matcher = new struct ip_matcher;
mem_bytes = sizeof(struct ip_matcher);
matcher->ipv4_matcher = NULL;
matcher->ipv6_matcher = NULL;
matcher->logger = logger;
map<long long, struct ip_rule> ipv4_rules;
map<long long, struct ip_rule> ipv6_rules;
for(size_t i = 0; i < rule_num; i++)
{
long long id = rules[i].rule_id;
if(rules[i].type == IPv4)
ipv4_rules[id] = rules[i];
if(rules[i].type == IPv6 )
ipv6_rules[id] = rules[i];
}
//<2F><><EFBFBD><EFBFBD>ipv4ɨ<34><C9A8><EFBFBD><EFBFBD>
if(ipv4_rules.size() != 0)
{
CRuleMatch * v4_matcher = new CIPv4Match;
long long ret = v4_matcher->initialize(ipv4_rules);
if(ret<0)
{
delete v4_matcher;
v4_matcher=NULL;
log_error(logger, MODULE_IP_MATCHER,
"ip_matcher_new() failed !",
__FILE__, __LINE__);
return NULL;
}
mem_bytes += ret;
matcher->ipv4_matcher = v4_matcher;
}
//<2F><><EFBFBD><EFBFBD>ipv6ɨ<36><C9A8><EFBFBD><EFBFBD>
if(ipv6_rules.size() != 0)
{
CRuleMatch * v6_matcher = new CIPv6Match;
long long ret = v6_matcher->initialize(ipv6_rules);
if(ret<0)
{
delete v6_matcher;
v6_matcher=NULL;
log_error(logger, MODULE_IP_MATCHER,
"ip_matcher_new() failed !",
__FILE__, __LINE__);
return NULL;
}
mem_bytes += ret;
matcher->ipv6_matcher = v6_matcher;
}
*mem_use = mem_bytes;
return matcher;
}
int ip_matcher_match(struct ip_matcher* matcher, struct ip_data* data,
struct scan_result* result, size_t size)
{
if(matcher == NULL || data == NULL || result == NULL)
{
log_error(matcher->logger, MODULE_IP_MATCHER,
"[%s:%d]: ip_matcher_match() failed, for param is NULL!",
__FILE__, __LINE__);
return -1;
}
CRuleMatch * tmp_matcher = NULL;
if(data->type == IPv4)
{
tmp_matcher = matcher->ipv4_matcher;
}
if(data->type == IPv6)
{
tmp_matcher = matcher->ipv6_matcher;
}
if(tmp_matcher==NULL)
{
log_error(matcher->logger, MODULE_IP_MATCHER,
"[%s:%d]: ip_matcher_match() failed, for can't find the right rule_matcher!",
__FILE__, __LINE__);
return -1;
}
int ret = tmp_matcher->search_rule(data,result,size);
if(ret<0)
{
log_error(matcher->logger, MODULE_IP_MATCHER,
"[%s:%d]: ip_matcher_match() failed, for the value returned by search_rule() is wrong!",
__FILE__, __LINE__);
return -1;
}
return ret;
}
void ip_matcher_free(struct ip_matcher* matcher)
{
if(matcher == NULL) return;
if(matcher->ipv4_matcher != NULL)
delete matcher->ipv4_matcher;
if(matcher->ipv6_matcher != NULL)
delete matcher->ipv6_matcher;
delete matcher;
}

View File

@@ -0,0 +1,122 @@
/*
*
* Copyright (c) 2020
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LU YUHAI (luyuhai@iie.ac.cn)
* Last modification: 2020-04-20
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_IP_MATCHER_H
#define H_IP_MATCHER_H
#include <stddef.h>
#include "../../deps/log/log.h"
#ifdef __cplusplus
extern "C"
{
#endif
enum IP_TYPE
{
IPv4=4,
IPv6=6
};
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>IPv4<76><34><EFBFBD><EFBFBD> */
struct ipv4_range
{
unsigned int start_ip; /* IP<49><50>Χ<EFBFBD>½<EFBFBD> */
unsigned int end_ip; /* IP<49><50>Χ<EFBFBD>Ͻ<EFBFBD> */
};
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>IPv6<76><36><EFBFBD><EFBFBD> */
struct ipv6_range
{
unsigned int start_ip[4]; /* IP<49><50>Χ<EFBFBD>½磬<C2BD><E7A3AC> Big-Endian ģʽ<C4A3>洢 */
unsigned int end_ip[4]; /* IP<49><50>Χ<EFBFBD>Ͻ磬<CFBD><E7A3AC> Big-Endian ģʽ<C4A3>洢 */
};
/* ͨ<>õ<EFBFBD>ip<69><70><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
struct ip_rule
{
enum IP_TYPE type; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD>ipv4<76><34>ipv6 */
long long rule_id; /* <20><><EFBFBD><EFBFBD>ID */
void* user_tag; /* <20>û<EFBFBD><C3BB>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>ƥ<EFBFBD><C6A5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
union
{
struct ipv4_range ipv4_rule; /*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>IPv4<76><34><EFBFBD><EFBFBD>*/
struct ipv6_range ipv6_rule; /*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>IPv6<76><36><EFBFBD><EFBFBD>*/
};
};
/* ͨ<>õĴ<C3B5>ɨ<EFBFBD><C9A8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
struct ip_data
{
enum IP_TYPE type; /* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD>ipv4<76><34>ipv6 */
union /* <20><><EFBFBD><EFBFBD>rule_type<70><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݸ<EFBFBD><DDB8><EFBFBD><EFBFBD><EFBFBD>ipv4<76><34><EFBFBD><EFBFBD>ipv6 */
{
unsigned int ipv4; /* ipv4<76><34><EFBFBD><EFBFBD>*/
unsigned int ipv6[4]; /* ipv6<76><36><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD> Big-Endian ģʽ<C4A3>洢*/
};
};
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD>ɨ<EFBFBD><C9A8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
struct scan_result
{
long long rule_id; /* <20><><EFBFBD><EFBFBD><EFBFBD>ID */
void * tag; /* <20>û<EFBFBD><C3BB>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>ƥ<EFBFBD><C6A5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
};
struct ip_matcher;
/*
<09><><EFBFBD>ܣ<EFBFBD><DCA3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ĺ<EFBFBD><C4B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>?<3F><><EFBFBD><EFBFBD>
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
rules[in]<5D><>һ<EFBFBD><D2BB>ip<69><70><EFBFBD><EFBFBD>
rule_num[in]<5D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ĺ<EFBFBD><C4B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>?
mem_use[out]<5D><><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
<09><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>
ipɨ<70><C9A8><EFBFBD><EFBFBD>,<2C><><EFBFBD>ؿ<EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɨ<EFBFBD><C9A8><EFBFBD><EFBFBD>ʧ<EFBFBD><CAA7>
*/
struct ip_matcher* ip_matcher_new(struct ip_rule * rules, size_t rule_num,
size_t * mem_use, struct log_handle *logger);
/*
<09><><EFBFBD>ܣ<EFBFBD><DCA3><EFBFBD><EFBFBD><EFBFBD>ipɨ<70><C9A8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ip<69><70><EFBFBD>ݽ<EFBFBD><DDBD><EFBFBD>ɨ<EFBFBD><C9A8>
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
matcher[in]<5D><>ipɨ<70><C9A8><EFBFBD><EFBFBD>
data[in]<5D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ĵ<EFBFBD><C4B4>?<3F><>ip<69><70><EFBFBD><EFBFBD>
result[in]<5D><><EFBFBD><EFBFBD><EFBFBD>ؽ<EFBFBD><D8BD><EFBFBD><EFBFBD><EFBFBD>?<3F><><EFBFBD><EFBFBD>
size[in]<5D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ĵ<EFBFBD>С
<09><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>
<09><><EFBFBD>н<EFBFBD><D0BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>?<=size<7A><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ-1<><31>ʾ<EFBFBD><CABE><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
int ip_matcher_match(struct ip_matcher* matcher, struct ip_data * data,
struct scan_result* result, size_t size);
/*
<09><><EFBFBD>ܣ<EFBFBD><DCA3><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>ipɨ<70><C9A8><EFBFBD><EFBFBD>
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
matcher[in]<5D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ٵ<EFBFBD>ipɨ<70><C9A8><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
*/
void ip_matcher_free(struct ip_matcher* matcher);
#ifdef __cplusplus
}
#endif
#endif /* !defined(H_IP_MATCHER_H) */

View File

@@ -0,0 +1,114 @@
/*
*
* Copyright (c)2020
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YUHAI (luyuhai@iie.ac.cn)
* Last modification: 2020-04-16
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "ipv4_match.h"
#include "IntervalIndex/NaiveIntervalIndex.h"
#include "IntervalIndex/IPMaskIndex.h"
#include "IntervalIndex/PortIndex.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <algorithm>
using namespace std;
//#define DEBUG_IPV4_MATCH
bool cmp(ipv4_rule_t a, ipv4_rule_t b)
{
unsigned int inteval_a = a.rule.end_ip - a.rule.start_ip;
unsigned int inteval_b = b.rule.end_ip - b.rule.start_ip;
return inteval_a < inteval_b;
}
CIPv4Match::CIPv4Match()
{
m_pIndexer=NULL;
m_rnum=0;
m_rules=NULL;
}
CIPv4Match::~CIPv4Match()
{
if(m_pIndexer!=NULL) delete m_pIndexer;
if(m_rules!=NULL)
delete [] m_rules;
}
long long CIPv4Match::initialize(const map<long long, struct ip_rule>& rules)
{
m_rnum=rules.size();
if(m_rnum==0) return 0;
long long mem_bytes=0;
m_rules = new ipv4_rule_t[m_rnum];
mem_bytes+=(sizeof(struct ipv4_range)+sizeof(unsigned int)+sizeof(void *))*m_rnum;
vector<unsigned int> a, b;
unsigned int i=0;
for(map<long long, struct ip_rule>::const_iterator it=rules.begin(); it!=rules.end(); ++it)
{
struct ipv4_range arule = it->second.ipv4_rule;
m_rules[i].rule = arule;
m_rules[i].rule_id = it->first;
m_rules[i++].tag = it->second.user_tag;
}
//<2F><><EFBFBD>ݵ<EFBFBD><DDB5><EFBFBD>ip<69>ķ<EFBFBD>Χ<EFBFBD><CEA7>С<EFBFBD><D0A1><EFBFBD><EFBFBD>
sort(&m_rules[0], &m_rules[m_rnum],cmp);
for(unsigned int i = 0; i < m_rnum; i++)
{
a.push_back(m_rules[i].rule.start_ip);
b.push_back(m_rules[i].rule.end_ip);;
}
m_pIndexer = new CNaiveIntervalIndex;
mem_bytes+=sizeof(CNaiveIntervalIndex);
int ret=m_pIndexer->PreProcessing(a, b);
if(ret<0) return -1;
mem_bytes+=ret;
a.clear();
b.clear();
return mem_bytes;
}
int CIPv4Match::search_rule(const struct ip_data * data, struct scan_result * presult, unsigned int size)
{
unsigned int hit_num=0;
unsigned int m_v[size];
int ret=m_pIndexer->Find(data->ipv4, m_v, size);
if(ret<0) return -1;
sort(&m_v[0], &m_v[ret]);
for(int i = 0; i < ret; i++)
{
if(hit_num == size)
return hit_num;
unsigned int index = m_v[i];
presult[hit_num].rule_id = m_rules[index].rule_id;
presult[hit_num++].tag = m_rules[index].tag;
}
return hit_num;
}

View File

@@ -0,0 +1,49 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_IPV4_MATCH_H
#define H_IPV4_MATCH_H
#include "rule_match.h"
#include "IntervalIndex/IntervalIndex.h"
#include "ip_matcher.h"
typedef struct _ipv4_rule_t{
struct ipv4_range rule;
void * tag;
long long rule_id;
}ipv4_rule_t;
class CIPv4Match : public CRuleMatch
{
public:
CIPv4Match();
~CIPv4Match();
virtual long long initialize(const map<long long, struct ip_rule>& rules);
virtual int search_rule(const struct ip_data * data, struct scan_result * presult, unsigned int size);
private:
unsigned int m_rnum;
ipv4_rule_t * m_rules;
CIntervalIndex * m_pIndexer;
};
#endif

View File

@@ -0,0 +1,127 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-06-03
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "ipv6_match.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <algorithm>
#include "IntervalIndex/Int128IntervalIndex.h"
using namespace std;
//#define DEBUG_IPV6_MATCH
bool cmp(ipv6_rule_t a, ipv6_rule_t b)
{
uint128_t l_a(a.rule.start_ip);
uint128_t h_a(a.rule.end_ip);
uint128_t l_b(b.rule.start_ip);
uint128_t h_b(b.rule.end_ip);
//<2F>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ipv6<76><36><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Χ
uint128_t interval_a = h_a - l_a;
uint128_t interval_b = h_b - l_b;
return interval_a < interval_b;
}
CIPv6Match::CIPv6Match()
{
m_ipv6Indexer=NULL;
m_rnum=0;
m_rules=NULL;
}
CIPv6Match::~CIPv6Match()
{
if(m_ipv6Indexer!=NULL) delete m_ipv6Indexer;
if(m_rules!=NULL)
{
delete [] m_rules;
}
}
long long CIPv6Match::initialize(const map<long long, struct ip_rule>& rules)
{
m_rnum=rules.size();
if(m_rnum==0) return 0;
long long mem_bytes=0;
m_rules = new ipv6_rule_t[m_rnum];
mem_bytes+=(sizeof(struct ipv6_range)+sizeof(unsigned int)+sizeof(void *))*m_rnum;
unsigned int i=0;
for(map<long long, struct ip_rule>::const_iterator it=rules.begin(); it!=rules.end(); ++it)
{
struct ipv6_range arule = it->second.ipv6_rule;;
m_rules[i].rule = arule;
m_rules[i].rule_id = it->first;
m_rules[i++].tag = it->second.user_tag;
}
//<2F><><EFBFBD>ݵ<EFBFBD><DDB5><EFBFBD>ip<69>ķ<EFBFBD>Χ<EFBFBD><CEA7>С<EFBFBD><D0A1><EFBFBD><EFBFBD>
sort(&m_rules[0], &m_rules[m_rnum],cmp);
vector<uint128_t> A, B;
for(i = 0; i < m_rnum; i++)
{
uint128_t a(m_rules[i].rule.start_ip);
uint128_t b(m_rules[i].rule.end_ip);
A.push_back(a);
B.push_back(b);
}
m_ipv6Indexer = new CInt128IntervalIndex;
mem_bytes+=sizeof(CInt128IntervalIndex);
long long ret = m_ipv6Indexer->PreProcessing(A, B);
if(ret<0) return -1;
mem_bytes+=ret;
return mem_bytes;
}
int CIPv6Match::search_rule(const struct ip_data * data, struct scan_result * presult, unsigned int size)
{
if(m_rnum==0) return 0;
unsigned int hit_num=0;
unsigned int m_v[size];
uint128_t key(data->ipv6);
int ret=m_ipv6Indexer->Find(&key, m_v, size);
if(ret<0) return -1;
sort(&m_v[0], &m_v[ret]);
for(int i = 0; i < ret; i++)
{
if(hit_num == size)
return hit_num;
unsigned int index = m_v[i];
presult[hit_num].rule_id = m_rules[index].rule_id;
presult[hit_num++].tag = m_rules[index].tag;
}
return hit_num;
}

View File

@@ -0,0 +1,50 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_IPV6_MATCH_H
#define H_IPV6_MATCH_H
#include "rule_match.h"
#include "IntervalIndex/PortIndex.h"
#include "IntervalIndex/Int128IntervalIndex.h"
#include "ip_matcher.h"
typedef struct _ipv6_rule_t{
struct ipv6_range rule;
long long rule_id;
void * tag;
}ipv6_rule_t;
class CIPv6Match : public CRuleMatch
{
public:
CIPv6Match();
~CIPv6Match();
virtual long long initialize(const map<long long, struct ip_rule>& rules);
virtual int search_rule(const struct ip_data * data, struct scan_result * presult, unsigned int size);
private:
unsigned int m_rnum;
ipv6_rule_t * m_rules;
CInt128IntervalIndex * m_ipv6Indexer;
};
#endif

View File

@@ -0,0 +1,53 @@
#ifndef H_RULE_MATCH_H
#define H_RULE_MATCH_H
#include "ip_matcher.h"
#include <set>
#include <map>
#include <vector>
#include <queue>
#include <stdlib.h>
using namespace std;
#ifndef __linux__
#include <windows.h>
#include <process.h>
/************* mutex (use CRITICAL_SECTION in windows) ***************/
#define THREAD_MUTEX CRITICAL_SECTION
#define INITIALIZE_MUTEX(mutex) InitializeCriticalSection(mutex)
#define DESTROY_MUTEX DeleteCriticalSection
#define LOCK_MUTEX EnterCriticalSection
#define UNLOCK_MUTEX LeaveCriticalSection
/**********************************************************************/
#else
#include <pthread.h>
#include <unistd.h>
/************** mutex (use CRITICAL_SECTION in Linux) ***************/
#define THREAD_MUTEX pthread_mutex_t
#define INITIALIZE_MUTEX(mutex) pthread_mutex_init(mutex, NULL)
#define DESTROY_MUTEX pthread_mutex_destroy
#define LOCK_MUTEX pthread_mutex_lock
#define UNLOCK_MUTEX pthread_mutex_unlock
/**********************************************************************/
#endif
class CRuleMatch
{
public:
CRuleMatch()
{
}
virtual ~CRuleMatch()
{
}
virtual long long initialize(const map<long long, struct ip_rule>& rules)=0;
virtual int search_rule(const struct ip_data * data, struct scan_result * presult, unsigned int size)=0;
};
#endif

View File

@@ -24,6 +24,12 @@ include_directories(/opt/MESA/include/MESA/)
include_directories(${PROJECT_SOURCE_DIR}/include/)
include_directories(${PROJECT_SOURCE_DIR}/deps/)
include_directories(${PROJECT_SOURCE_DIR}/scanner)
include_directories(${PROJECT_SOURCE_DIR}/scanner/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/fqdn_engine)
include_directories(${PROJECT_SOURCE_DIR}/scanner/bool_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/ip_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/flag_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/interval_matcher)
include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
# Static Library Output
@@ -34,10 +40,7 @@ set_target_properties(maat_frame_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
target_link_libraries(maat_frame_static hiredis-static ${MAAT_DEPEND_DYN_LIB})
target_link_libraries(maat_frame_static igraph-static)
target_link_libraries(maat_frame_static ipmatcher-static)
target_link_libraries(maat_frame_static adapter-static)
target_link_libraries(maat_frame_static flagmatcher-static)
target_link_libraries(maat_frame_static intervalmatcher-static)
# Shared Library Output
add_library(maat_frame_shared SHARED ${MAAT_SRC} ${LIB_SOURCE_FILES})
@@ -50,10 +53,7 @@ set_target_properties(maat_frame_shared PROPERTIES LINK_FLAGS "-Wl,--version-scr
target_link_libraries(maat_frame_shared hiredis-static ${MAAT_DEPEND_DYN_LIB})
target_link_libraries(maat_frame_shared igraph-static ${MAAT_DEPEND_DYN_LIB})
target_link_libraries(maat_frame_shared ipmatcher-static)
target_link_libraries(maat_frame_shared adapter-static)
target_link_libraries(maat_frame_shared flagmatcher-static)
target_link_libraries(maat_frame_shared intervalmatcher-static)
# install
set(CMAKE_INSTALL_PREFIX /opt/MESA/)

View File

@@ -27,7 +27,7 @@ extern "C"
#include "hiredis/hiredis.h"
#include "uthash/uthash.h"
#include "maat_command.h"
#include "IPMatcher.h"
//#include "ip_matcher.h"
#include "maat.h"
#include "maat_kv.h"
#include "maat_table.h"
@@ -150,7 +150,7 @@ struct foreign_key {
//rm= Redis Maat
struct serial_rule {
enum maat_operation op;//0: delete, 1: add.
unsigned long rule_id;
long long rule_id;
long long timeout; // absolute unix time.
char table_name[NAME_MAX];
char *table_line;
@@ -304,7 +304,7 @@ void maat_cmd_get_foreign_conts(redisContext *c, struct serial_rule *rule_list,
void maat_cmd_rewrite_table_line_with_foreign(struct serial_rule *s_rule);
void maat_cmd_set_serial_rule(struct serial_rule *rule, enum maat_operation op,
unsigned long rule_id, const char *table_name,
long long rule_id, const char *table_name,
const char *line, long long timeout);
#ifdef __cplusplus

View File

@@ -26,6 +26,7 @@
#include "maat_redis_monitor.h"
#include "maat_compile.h"
#include "alignment.h"
#include "ip_matcher.h"
#include "maat_garbage_collection.h"
#include "maat_group.h"
#include "maat_expr.h"

View File

@@ -143,7 +143,7 @@ redisContext *get_redis_ctx_for_write(struct maat *maat_instance)
}
void maat_cmd_set_serial_rule(struct serial_rule *rule, enum maat_operation op,
unsigned long rule_id, const char *table_name,
long long rule_id, const char *table_name,
const char *line, long long timeout)
{
memset(rule, 0, sizeof(struct serial_rule));
@@ -313,7 +313,7 @@ int maat_cmd_set_line(struct maat *maat_instance, const struct maat_cmd_line *li
int table_id = table_manager_get_table_id(maat_instance->tbl_mgr, line_rule->table_name);
if (table_id < 0) {
log_error(maat_instance->logger, MODULE_MAAT_COMMAND,
"Command set line id %d failed: unknown table %s",
"Command set line id %lld failed: unknown table %s",
line_rule->rule_id, line_rule->table_name);
FREE(s_rule);
return -1;
@@ -322,7 +322,7 @@ int maat_cmd_set_line(struct maat *maat_instance, const struct maat_cmd_line *li
int valid_column = table_manager_get_valid_column(maat_instance->tbl_mgr, table_id);
if (valid_column < 0) {
log_error(maat_instance->logger, MODULE_MAAT_COMMAND,
"Command set line id %d failed: table %s is not a plugin or ip_plugin table",
"Command set line id %lld failed: table %s is not a plugin or ip_plugin table",
line_rule->rule_id, line_rule->table_name);
FREE(s_rule);
return -1;

View File

@@ -14,7 +14,7 @@
#include "log/log.h"
#include "maat_utils.h"
#include "maat_ex_data.h"
#include "IPMatcher.h"
#include "ip_matcher.h"
#include "maat_ip.h"
#include "maat_rule.h"
#include "maat_compile.h"
@@ -460,7 +460,7 @@ int ip_runtime_commit(void *ip_runtime, const char *table_name)
log_info(ip_rt->logger, MODULE_IP,
"table[%s] committing %zu ip rules for rebuilding ip_matcher engine",
table_name, rule_cnt);
new_ip_matcher = ip_matcher_new(rules, rule_cnt, &mem_used);
new_ip_matcher = ip_matcher_new(rules, rule_cnt, &mem_used, ip_rt->logger);
if (NULL == new_ip_matcher) {
log_error(ip_rt->logger, MODULE_IP,
"table[%s] rebuild ip_matcher engine failed when update %zu ip rules",

View File

@@ -14,7 +14,7 @@
#include "maat_utils.h"
#include "maat_ip_plugin.h"
#include "maat_ex_data.h"
#include "IPMatcher.h"
#include "ip_matcher.h"
#include "maat_rule.h"
#include "maat.h"
#include "maat_garbage_collection.h"
@@ -206,7 +206,7 @@ ip_plugin_rule_new(const char *line, struct ip_plugin_schema *schema,
schema->table_id, line);
goto error;
}
ip_plugin_rule->rule_id = atoi(line + column_offset);
ip_plugin_rule->rule_id = atoll(line + column_offset);
ret = get_column_pos(line, schema->ip_type_column, &column_offset, &column_len);
if (ret < 0) {
@@ -383,7 +383,6 @@ int ip_plugin_runtime_update(void *ip_plugin_runtime, void *ip_plugin_schema,
return -1;
}
//struct ip_plugin_item *ip_plugin_item = NULL;
struct ip_rule *ip_plugin_rule = NULL;
struct ip_plugin_schema *schema = (struct ip_plugin_schema *)ip_plugin_schema;
struct ip_plugin_runtime *ip_plugin_rt = (struct ip_plugin_runtime *)ip_plugin_runtime;
@@ -462,7 +461,7 @@ int ip_plugin_runtime_commit(void *ip_plugin_runtime, const char *table_name)
log_info(ip_plugin_rt->logger, MODULE_IP_PLUGIN,
"table[%s] committing %zu ip_plugin rules for rebuilding ip_matcher engine",
table_name, rule_cnt);
new_ip_matcher = ip_matcher_new(rules, rule_cnt, &mem_used);
new_ip_matcher = ip_matcher_new(rules, rule_cnt, &mem_used, ip_plugin_rt->logger);
if (NULL == new_ip_matcher) {
log_error(ip_plugin_rt->logger, MODULE_IP_PLUGIN,
"table[%s] rebuild ip_matcher engine failed when update %zu ip_plugin rules",

View File

@@ -40,12 +40,12 @@ const char *foreign_key_prefix = "__FILE_";
const char *mr_op_str[] = {"DEL", "ADD", "RENEW_TIMEOUT"};
char *get_foreign_cont_filename(const char *table_name, int rule_id,
char *get_foreign_cont_filename(const char *table_name, long long rule_id,
const char *foreign_key, const char *dir)
{
char buffer[512] = {0};
snprintf(buffer, sizeof(buffer),"%s/%s-%d-%s", dir,
snprintf(buffer, sizeof(buffer),"%s/%s-%lld-%s", dir,
table_name, rule_id, foreign_key);
char *filename = ALLOC(char, strlen(buffer) + 1);
memcpy(filename, buffer, strlen(buffer));
@@ -363,12 +363,12 @@ int get_inc_key_list(long long instance_version, long long target_version,
int i = 0;
int j = 0;
char op_str[4] = {0};
char op_str[256] = {0}; // reply->element[i]->str length less than 256
struct serial_rule *s_rule = ALLOC(struct serial_rule, reply->elements);
for (i = 0, j = 0; i < (int)reply->elements; i++) {
assert(reply->element[i]->type == REDIS_REPLY_STRING);
int ret = sscanf(reply->element[i]->str, "%[^,],%[^,],%lu",
int ret = sscanf(reply->element[i]->str, "%[^,],%[^,],%lld",
op_str, s_rule[j].table_name, &(s_rule[j].rule_id));
if (ret != 3 || s_rule[i].rule_id < 0) {
log_error(logger, MODULE_REDIS_MONITOR,

View File

@@ -25,6 +25,7 @@
#include "maat_table.h"
#include "maat_compile.h"
#include "maat_plugin.h"
#include "ip_matcher.h"
#include "alignment.h"
#include "maat_garbage_collection.h"

View File

@@ -2,8 +2,17 @@ VERS_3.0{
global:
extern "C" {
*MAAT_FRAME_VERSION_*;
*maat_*;
*bool_matcher_*;
maat_options*;
maat_new;
maat_free;
maat_table*;
maat_compile_table*;
maat_plugin_table*;
maat_ip_plugin_table*;
maat_fqdn_plugin_table*;
maat_bool_plugin_table*;
maat_scan*;
maat_state_*;
};
local: *;
};

View File

@@ -2,6 +2,8 @@ include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
include_directories(${PROJECT_SOURCE_DIR}/deps)
include_directories(${PROJECT_SOURCE_DIR}/scanner)
include_directories(${PROJECT_SOURCE_DIR}/scanner/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/ip_matcher)
add_executable(maat_api_gtest maat_api_gtest.cpp)
target_link_libraries(maat_api_gtest maat_frame_shared gtest_static)

View File

@@ -4,7 +4,7 @@
#include "maat_rule.h"
#include "maat_utils.h"
#include "maat_command.h"
#include "IPMatcher.h"
#include "ip_matcher.h"
#include "json2iris.h"
#include "log/log.h"
#include "maat_config_monitor.h"
@@ -125,6 +125,11 @@ TEST_F(MaatFlagScan, hitMultiCompile) {
EXPECT_EQ(results[0], 194);
EXPECT_EQ(results[1], 192);
// memset(results, 0, sizeof(results));
// ret = maat_scan_flag(g_maat_instance, flag_table_id, 0, flag_scan_data, results,
// ARRAY_SIZE, &n_hit_result, &state);
// EXPECT_EQ(ret, MAAT_SCAN_OK);
struct maat_hit_path hit_path[HIT_PATH_SIZE] = {0};
int n_read = 0;
n_read = maat_state_get_hit_paths(g_maat_instance, &state, hit_path, HIT_PATH_SIZE);
@@ -1352,25 +1357,60 @@ TEST_F(Policy, CompileEXData) {
ASSERT_GT(table_id, 0);
int ex_data_counter = 0;
int ret = maat_compile_table_ex_schema_register(g_maat_instance, "COMPILE",
compile_ex_param_new,
compile_ex_param_free,
compile_ex_param_dup,
0, &ex_data_counter);
ASSERT_TRUE(ret >= 0);
int compile_table_id = maat_table_get_id(g_maat_instance, "COMPILE");
int ret = maat_compile_table_ex_schema_register(g_maat_instance, compile_table_id,
compile_ex_param_new,
compile_ex_param_free,
compile_ex_param_dup,
0, &ex_data_counter);
EXPECT_EQ(ret, 0);
EXPECT_EQ(ex_data_counter, 1);
ret = maat_scan_string(g_maat_instance, table_id, 0, url, strlen(url),
results, ARRAY_SIZE, &n_hit_result, &state);
EXPECT_EQ(ret, MAAT_SCAN_HIT);
EXPECT_EQ(results[0], 141);
void *ex_data=Maat_rule_get_ex_data(g_feather, result, ex_param_idx);
ASSERT_TRUE(ex_data!=NULL);
struct rule_ex_param* param=(struct rule_ex_param*)ex_data;
void *ex_data = maat_compile_table_get_ex_data(g_maat_instance, compile_table_id, results[0]);
ASSERT_TRUE(ex_data != NULL);
struct rule_ex_param *param = (struct rule_ex_param *)ex_data;
EXPECT_EQ(param->id, 7799);
str_unescape(param->name);
EXPECT_EQ(strcmp(param->name, expect_name),0);
compile_ex_param_free(0, NULL, NULL, &ex_data, 0, NULL);
Maat_clean_status(&mid);
compile_ex_param_free(compile_table_id, &ex_data, 0, NULL);
maat_state_free(&state);
}
#endif
#if 0
TEST_F(Policy, SubGroup) {
long long results[ARRAY_SIZE] = {0};
size_t n_hit_result = 0;
struct maat_state *state = NULL;
const char *scan_data = "ceshi6@mailhost.cn";
uint32_t sip;
inet_pton(AF_INET,"10.0.6.205", &sip);
int table_id = maat_table_get_id(g_maat_instance, "MAIL_ADDR");
ASSERT_GT(table_id, 0);
int ret = maat_scan_string(g_maat_instance, table_id, 0, scan_data, strlen(scan_data),
results, ARRAY_SIZE, &n_hit_result, &state);
EXPECT_EQ(ret, MAAT_SCAN_HALF_HIT);
table_id = maat_table_get_id(g_maat_instance, "IP_CONFIG");
ASSERT_GT(table_id, 0);
ret = maat_scan_ipv4(g_maat_instance, table_id, 0, sip, results, ARRAY_SIZE,
&n_hit_result, &state);
EXPECT_EQ(ret, MAAT_SCAN_HIT);
EXPECT_EQ(results[0], 153);
maat_state_free(&state);
}
#endif
class MaatCmdTest : public testing::Test

View File

@@ -41,14 +41,18 @@
"table_type": "ip_plus",
"table_content": {
"addr_type": "ipv4",
"src_ip": "192.168.40.88",
"mask_src_ip": "255.255.255.255",
"src_port": "0",
"mask_src_port": "65535",
"dst_ip": "0.0.0.0",
"mask_dst_ip": "255.255.255.255",
"dst_port": "0",
"mask_dst_port": "65535",
"saddr_format": "mask",
"src_ip1": "192.168.40.88",
"src_ip2": "255.255.255.255",
"sport_format": "range",
"src_port1": "0",
"src_port2": "65535",
"daddr_format": "mask",
"dst_ip1": "0.0.0.0",
"dst_ip2": "255.255.255.255",
"dport_format": "range",
"dst_port1": "0",
"dst_port2": "65535",
"protocol": 6,
"direction": "double"
}
@@ -196,14 +200,18 @@
"table_type": "ip_plus",
"table_content": {
"addr_type": "ipv4",
"src_ip": "10.0.6.201",
"mask_src_ip": "255.255.0.0",
"src_port": "0",
"mask_src_port": "65535",
"dst_ip": "0.0.0.0",
"mask_dst_ip": "255.255.255.255",
"dst_port": "0",
"mask_dst_port": "65535",
"saddr_format": "mask",
"src_ip1": "10.0.6.201",
"src_ip2": "255.255.0.0",
"sport_format": "range",
"src_port1": "0",
"src_port2": "65535",
"daddr_format": "mask",
"dst_ip1": "0.0.0.0",
"dst_ip2": "255.255.255.255",
"dport_format": "range",
"dst_port1": "0",
"dst_port2": "65535",
"protocol": 6,
"direction": "double"
}
@@ -213,14 +221,18 @@
"table_type": "ip_plus",
"table_content": {
"addr_type": "ipv6",
"src_ip": "2001:da8:205:1::101",
"mask_src_ip": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:0000",
"src_port": "0",
"mask_src_port": "65535",
"dst_ip": "0::0",
"mask_dst_ip": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"dst_port": "0",
"mask_dst_port": "65535",
"saddr_format": "mask",
"src_ip1": "2001:da8:205:1::101",
"src_ip2": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:0000",
"sport_format": "range",
"src_port1": "0",
"src_port2": "65535",
"daddr_format": "mask",
"dst_ip1": "0::0",
"dst_ip2": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"dport_format": "range",
"dst_port1": "0",
"dst_port2": "65535",
"protocol": 6,
"direction": "double"
}
@@ -2044,15 +2056,19 @@
"table_content": {
"protocol": 0,
"addr_type": "ipv6",
"direction": "double",
"src_ip": "::",
"dst_ip": "2620:100:3000::",
"src_port": "0",
"dst_port": "0",
"mask_src_port": "65535",
"mask_src_ip": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"mask_dst_port": "65535",
"mask_dst_ip": "ffff:ffff:ff00:0000:0000:0000:0000:0000"
"saddr_format": "mask",
"src_ip1": "::",
"src_ip2": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"sport_format": "range",
"src_port1": "0",
"src_port2": "0",
"daddr_format": "mask",
"dst_ip1": "2620:100:3000::",
"dst_ip2": "ffff:ffff:ff00:0000:0000:0000:0000:0000",
"dport_format": "range",
"dst_port1": "0",
"dst_port2": "65535",
"direction": "double"
}
}
]

42
vendor/CMakeLists.txt vendored
View File

@@ -58,22 +58,6 @@ add_dependencies(hyperscan_runtime_static hyperscan)
set_property(TARGET hyperscan_runtime_static PROPERTY IMPORTED_LOCATION ${VENDOR_BUILD}/lib64/libhs_runtime.a)
set_property(TARGET hyperscan_runtime_static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${VENDOR_BUILD}/include)
# ipmatcher-1.1
ExternalProject_Add(ipmatcher PREFIX ipmatcher
URL ${CMAKE_CURRENT_SOURCE_DIR}/ipmatcher-v1.1.zip
CONFIGURE_COMMAND ""
BUILD_COMMAND make
INSTALL_COMMAND make DESTDIR=<INSTALL_DIR> install
BUILD_IN_SOURCE 1)
ExternalProject_Get_Property(ipmatcher INSTALL_DIR)
file(MAKE_DIRECTORY ${INSTALL_DIR}/include)
add_library(ipmatcher-static STATIC IMPORTED GLOBAL)
add_dependencies(ipmatcher-static ipmatcher)
set_property(TARGET ipmatcher-static PROPERTY IMPORTED_LOCATION ${INSTALL_DIR}/lib/ipmatcher.a)
set_property(TARGET ipmatcher-static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include)
# hiredis-1.1.0
ExternalProject_Add(hiredis PREFIX hiredis
URL ${CMAKE_CURRENT_SOURCE_DIR}/hiredis-1.1.0.tar.gz
@@ -108,29 +92,3 @@ add_library(igraph-static STATIC IMPORTED GLOBAL)
add_dependencies(igraph-static igraph)
set_property(TARGET igraph-static PROPERTY IMPORTED_LOCATION ${INSTALL_DIR}/lib/libigraph.a)
set_property(TARGET igraph-static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include)
#flag-matcher
ExternalProject_Add(flagmatcher PREFIX flagmatcher
URL ${CMAKE_CURRENT_SOURCE_DIR}/flag-matcher-1.0.1.tar.gz
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${VENDOR_BUILD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_C_FLAGS="-fPIC")
ExternalProject_Get_Property(flagmatcher INSTALL_DIR)
file(MAKE_DIRECTORY ${VENDOR_BUILD}/include)
add_library(flagmatcher-static STATIC IMPORTED GLOBAL)
add_dependencies(flagmatcher-static flagmatcher)
set_property(TARGET flagmatcher-static PROPERTY IMPORTED_LOCATION ${VENDOR_BUILD}/lib/libflag_matcher.a)
set_property(TARGET flagmatcher-static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${VENDOR_BUILD}/include)
#interval-matcher
ExternalProject_Add(intervalmatcher PREFIX intervalmatcher
URL ${CMAKE_CURRENT_SOURCE_DIR}/interval-matcher-1.0.1.tar.gz
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${VENDOR_BUILD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_C_FLAGS="-fPIC")
ExternalProject_Get_Property(intervalmatcher INSTALL_DIR)
file(MAKE_DIRECTORY ${VENDOR_BUILD}/include)
add_library(intervalmatcher-static STATIC IMPORTED GLOBAL)
add_dependencies(intervalmatcher-static flagmatcher)
set_property(TARGET intervalmatcher-static PROPERTY IMPORTED_LOCATION ${VENDOR_BUILD}/lib/libinterval_matcher.a)
set_property(TARGET intervalmatcher-static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${VENDOR_BUILD}/include)

Binary file not shown.

Binary file not shown.

Binary file not shown.