ipmatcher rule_id -> long long & scanner engine centralization

This commit is contained in:
liuwentan
2023-03-01 09:32:36 +08:00
parent a6fb2b6fdd
commit 1566a30002
63 changed files with 4695 additions and 115 deletions

View File

@@ -0,0 +1,430 @@
#include <math.h>
#include <string.h>
#include <memory.h>
#include <time.h>
#include "ACEI.h"
ACEI::ACEI(unsigned int eRange):Exp2Size(32)
{
r = int(((unsigned long long)1 << eRange) - 1); //<2F>ں<EFBFBD><DABA><EFBFBD><EFBFBD><EFBFBD>r<EFBFBD><72>ʹ<EFBFBD><CAB9><EFBFBD>о<EFBFBD><D0BE><EFBFBD><EFBFBD>ȼ<EFBFBD>1
}
ACEI::~ACEI()
{
ruin();
}
long long ACEI::PreProcessing(const std::vector<u_int> &a, const std::vector<u_int> &b)
{
if (a.size() != b.size())
return -1;
initQIMatrix();
int i;
for (i = 0; i < (int)a.size(); i++)
{
HQI.ppQImatrix[HQI.rowID][HQI.colID].l = a[i];//be careful
HQI.ppQImatrix[HQI.rowID][HQI.colID].r = b[i]+1;//<2F>ڲ<EFBFBD><DAB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾΪ[, )
HQI.ppQImatrix[HQI.rowID][HQI.colID].index = i;
++HQI.cnt;
//adjust QI matrix begin
HQI.colID = (HQI.colID + 1) % HQI.ROWLEN;
if(HQI.colID == 0)
{
++HQI.rowID;
if(HQI.rowID < HQI.MAXROWS)
{
HQI.ppQImatrix[HQI.rowID] = (_QueryInterval*)malloc(sizeof(_QueryInterval)*HQI.ROWLEN);
}
else
{
printf("Query Interval matrix run over, reminder discard !!!\n");
return 1;
}
}
//adjust end
}
n = HQI.cnt;
init();
return 0;
}
int ACEI::Find(unsigned int key, unsigned int * result, unsigned int size)
{
return searchX(key, result, size);
}
//input & output
void ACEI::initQIMatrix()
{
HQI.MAXROWS = 10000;
HQI.ROWLEN = 1024;
HQI.ppQImatrix = (_QueryInterval**)malloc(sizeof(_QueryInterval*)*HQI.MAXROWS);
HQI.ppQImatrix[0] = (_QueryInterval*)malloc(sizeof(_QueryInterval)*HQI.ROWLEN);
HQI.rowID = 0;
HQI.colID = 0;
HQI.cnt = 0;
}
void ACEI::ruinQIMatirc()
{
int i;
if(HQI.ppQImatrix)
{
for(i = HQI.rowID; i >= 0; i--)
free(HQI.ppQImatrix[i]);
free(HQI.ppQImatrix);
HQI.ppQImatrix = NULL;
}
HQI.rowID = 0;
HQI.colID = 0;
HQI.cnt = 0;
}
//init & ruin
int ACEI::init()
{
u_int i,j,k;
pExp2 = (unsigned long*)malloc(sizeof(unsigned long)*Exp2Size);
for(i = 0; i < Exp2Size; i++) pExp2[i] = (unsigned long)pow((double)2, (double)i);
w = getAvrgRng();
if(n*w>4*r)
{
H = (u_int)ceil((double)log(sqrt((double)w/4))/log((double)2));
h = (u_int)ceil((double)log(sqrt((double)n*w/4/r))/log((double)2));
if(H <= h) H = h+1;
}
else
{
H = (u_int)ceil((double)log(sqrt((double)r/3/n+w/6))/log((double)2));
h = 0;
if(H <= h) H = h+1;
}
L = pExp2[H];
SNum = r/L + 1;
SCEINum = pExp2[h+1];
pTopCEIs = (_CEI*)malloc(sizeof(_CEI)*SNum*SCEINum);
if(!pTopCEIs)
{
printf("Malloc error !!! for pTopCEIs\n");
return -1;
}
memset(pTopCEIs, 0, SNum*SCEINum*sizeof(_CEI));
ppBotCEIs = (_CEI**)malloc(sizeof(_CEI*) * SNum);
if(!ppBotCEIs)
{
printf("Malloc error !!! for ppBotCEIs\n");
return -1;
}
memset(ppBotCEIs, 0, SNum*sizeof(_CEI*));
for (k = 0; k < HQI.cnt; k++)
{
i = k / HQI.ROWLEN;
j = k % HQI.ROWLEN;
addIntvl(HQI.ppQImatrix[i][j].l, HQI.ppQImatrix[i][j].r, &(HQI.ppQImatrix[i][j]));
}
serial();
return 0;
}
void ACEI::ruin()
{
u_int i,j;
free(pExp2);
//releaseCEIs(&pTopCEIs, SNum*SCEINum);
//for (i = 0; i < SNum; i++)
//{
// if (ppBotCEIs[i])
// {
// releaseCEIs(&(ppBotCEIs[i]), L);
// }
//}
//free(ppBotCEIs);
for (i = 0; i < SNum*SCEINum; i++)
if(pTopSrlCEIs[i].head)
free(pTopSrlCEIs[i].head);
free(pTopSrlCEIs);
for (i = 0; i < SNum; i++)
if(ppBotSrlCEIs[i])
{
for (j = 0; j < L; j++)
if(ppBotSrlCEIs[i][j].head)
free(ppBotSrlCEIs[i][j].head);
free(ppBotSrlCEIs[i]);
}
free(ppBotSrlCEIs);
ruinQIMatirc();
}
void ACEI::releaseCEIs(_CEI **ppIIDSet, u_int lCEIsNum)
{
u_int i;
_IID * pTID, * pTIDpre;
for (i = 0; i < lCEIsNum; i++)
{
if(((*ppIIDSet)[i]).head != NULL)
{
for(pTID = ((*ppIIDSet)[i]).head, pTIDpre = NULL; pTID != NULL;)
{
pTIDpre = pTID;
pTID = pTID->next;
free(pTIDpre);
}
}
}
free(*ppIIDSet);
*ppIIDSet = NULL;
}
//search
int ACEI::searchX(unsigned int x, unsigned int * result, unsigned int size)
{
std::vector<u_int> vecui;
u_int sID, uID, dID, lx;
u_int i;
_SrlIID *pIID;
lx = x;
//if(lx >= r) return vecui;
if(lx > r)
return 0;
sID = lx >> H;
dID = lx - (sID << H);
uID = (dID >> (H - h)) + pExp2[h];
//lx = (u_int)floor(x);
//sID = lx / L;
//dID = lx % L;
//uID = dID / pExp2[H-h] + pExp2[h];
//if (fpQueryRslt) fprintf(fpQueryRslt, "%10.6lf\tin ", x);
//else printf("%10.6lf\tin ", x);
unsigned int n = 0;
if(ppBotSrlCEIs[sID] && ppBotSrlCEIs[sID][dID].head != NULL)
{
for (pIID = ppBotSrlCEIs[sID][dID].head; pIID->iid != NULL; pIID++)
{
if(n >= size)
{
return n;
}
result[n++] = pIID->iid->index;
}
}
for (i = 0; i <= h; i++)
{
if (pTopSrlCEIs[sID*SCEINum+uID].head != NULL)
{
for (pIID = pTopSrlCEIs[sID*SCEINum+uID].head; pIID->iid != NULL; pIID++)
{
if(n >= size)
{
return n;
}
result[n++] = pIID->iid->index;
}
}
uID >>= 1;
}
return n;
}
//insert
//void insertIntvl(u_int l, u_int r)
//{
// _QueryInterval * pQI = (_QueryInterval*)malloc(sizeof(_QueryInterval));
// MemCst += sizeof(_QueryInterval);//for stat.
// pQI->l = l;
// pQI->r = r;
// pQI->next = NULL;
// if (HQI.head != NULL)
// {
// HQI.tail->next = pQI;
// HQI.tail = pQI;
// }
// else
// {
// HQI.head = HQI.tail = pQI;
// }
// HQI.len ++;
// addIntvl(l, r, pQI);
//}
void ACEI::addIntvl(u_int ll, u_int lr, _QueryInterval* pQI)//<2F><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҿ<EFBFBD><D2BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD>ô<EFBFBD><C3B4>Ϊ<EFBFBD>ǿ<EFBFBD><C7BF><EFBFBD><EFBFBD><EFBFBD>
{
unsigned long long lft = ll;
unsigned long long rght = lr;
if (!lr)
rght += ((unsigned long long)1 << 32);
u_int sLID, sRID;
u_int i;
sLID = (u_int)ceil((double)lft/L);
sRID = (u_int)floor((double)rght/L);
if (sLID > sRID) addFrgmnt(ll, lr, sRID, pQI);
else
{
if (sLID < sRID)
{
for(i = sLID; i < sRID; i++) addID(&(pTopCEIs[i*SCEINum+1]), pQI);
}
if (lft < (unsigned long long)sLID * L) addFrgmnt(ll, sLID*L, sLID-1, pQI);
if (rght > (unsigned long long)sRID * L) addFrgmnt(sRID*L, lr, sRID, pQI);
}
}
void ACEI::addFrgmnt(u_int ll, u_int lr, u_int sID, _QueryInterval* pQI)//lr <20><><EFBFBD><EFBFBD>Ϊ0<CEAA><30>ֻҪL<D2AA><4C><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2^32<33><32><EFBFBD>Ͳ<EFBFBD><CDB2><EFBFBD><EFBFBD>ٳ<EFBFBD><D9B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҷ˵<D2B6><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
u_int lft = ll % L;
u_int rght = lr % L;
u_int sLID, sRID;
u_int i,k;
if(!rght) rght+= L;
sLID = (u_int)ceil((double)lft/pExp2[H-h]);
sRID = (u_int)floor((double)rght/pExp2[H-h]);
if (sLID > sRID) addBtmCEIs(lft, rght, sID, pQI);
else
{
if (lft < sLID * pExp2[H-h]) addBtmCEIs(lft, sLID*pExp2[H-h], sID, pQI);
if (rght > sRID * pExp2[H-h]) addBtmCEIs(sRID*pExp2[H-h], rght, sID, pQI);
if (sLID < sRID)
{
sLID += pExp2[h];
sRID += pExp2[h];
i = 1;
k = sRID - sLID;
while(1)
{
if (sLID % 2 == 0 && i*2 <= k)
{
sLID /= 2;
i *= 2;
}
else
{
addID(&(pTopCEIs[sID*SCEINum+sLID]), pQI);
sLID = sRID + i - k;
k = k - i;
i = 1;
if (k == 0) return;
}
}
}
}
return;
}
void ACEI::addBtmCEIs(u_int ll, u_int lr, u_int sID, _QueryInterval *pQI)
{
u_int i;
if (ppBotCEIs[sID] == NULL && ll < lr)
{
ppBotCEIs[sID] = (_CEI*)malloc(sizeof(_CEI) * L);
memset(ppBotCEIs[sID], 0, sizeof(_CEI)*L);
}
for (i = ll; i < lr; i++)
addID(&(ppBotCEIs[sID][i]), pQI);
}
void ACEI::addID(_CEI* pCEI, _QueryInterval* pQI)
{
_IID *pNewIID = (_IID*)malloc(sizeof(_IID));
pNewIID->iid = pQI;
pNewIID->next = NULL;
if (pCEI->head != NULL)
{
pCEI->tail->next = pNewIID;
pCEI->tail = pNewIID;
}
else
{
pCEI->head = pCEI->tail = pNewIID;
}
}
//ultlity
double ACEI::getAvrgRng()
{
double d = 0;
u_int i,j,k;
if (HQI.cnt == 0)
{
printf("!!! NO QueryISet loaded when calc w\n");
return 0;
}
for (k = 0; k < HQI.cnt; k++)
{
i = k / HQI.ROWLEN;
j = k % HQI.ROWLEN;
d += HQI.ppQImatrix[i][j].r - HQI.ppQImatrix[i][j].l;
}
return d / n;
}
//serialization
void ACEI::serializeCEI(_CEI *pCEI, _SrlCEI* pSrlCEI)//suppose pCEI not NULL
{
u_int i,num = 0;
_IID *pIID;
for(pIID = pCEI->head; pIID; ++num, pIID = pIID->next);
pSrlCEI->head = (_SrlIID*)malloc(sizeof(_SrlIID)*(num+1));
for (i = 0, pIID = pCEI->head; pIID; i++)
{
(pSrlCEI->head)[i].iid = pIID->iid;
pIID = pIID->next;
//free(pIIDpre);
}
((pSrlCEI->head)[num]).iid = NULL;
}
void ACEI::serialBtmCEIs()
{
u_int i,j;
ppBotSrlCEIs = (_SrlCEI**)malloc(sizeof(_SrlCEI*)*SNum);
memset(ppBotSrlCEIs, 0, sizeof(_SrlCEI*)*SNum);
for (i = 0; i < SNum; i++)
{
if (ppBotCEIs[i])
{
ppBotSrlCEIs[i] = (_SrlCEI*)malloc(sizeof(_SrlCEI)*L);
memset(ppBotSrlCEIs[i], 0, sizeof(_SrlCEI)*L);
for (j = 0; j < L; j++)
if (ppBotCEIs[i][j].head)
serializeCEI(&(ppBotCEIs[i][j]), &(ppBotSrlCEIs[i][j]));
else
ppBotSrlCEIs[i][j].head = NULL;
//free(ppBotCEIs[i]);
}
}
//free(ppBotCEIs);
}
void ACEI::serialTopCEIs()
{
u_int i;
pTopSrlCEIs = (_SrlCEI*)malloc(sizeof(_SrlCEI)*SNum*SCEINum);
memset(pTopSrlCEIs, 0, sizeof(_SrlCEI)*SNum*SCEINum);
for (i = 0; i < SNum*SCEINum; i++)
{
if (pTopCEIs[i].head)
serializeCEI(&(pTopCEIs[i]), &(pTopSrlCEIs[i]));
}
//free(pTopCEIs);
}
void ACEI::serial()
{
u_int i;
serialTopCEIs();
serialBtmCEIs();
releaseCEIs(&pTopCEIs, SNum*SCEINum);
for(i = 0; i < SNum; i++)
{
if(ppBotCEIs[i])
releaseCEIs(&(ppBotCEIs[i]), L);
}
free(ppBotCEIs);
}

View File

@@ -0,0 +1,135 @@
/************************************************************************/
/*
AdvanceCEI 0621.2007 v1.1
Yao Qiu-lin (yaoqiulin@software.ict.ac.cn)
<09>޸ģ<DEB8>
1. תΪOO<4F><4F>ʽ
2. <20>ļ<EFBFBD><C4BC>ӿڸ<D3BF>Ϊ<EFBFBD>ڴ<EFBFBD><DAB4>ӿ<EFBFBD>
ע<>⣺ PreProcessing<6E><67><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD><EFBFBD>ϲ<EFBFBD><CFB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӳ<EFBFBD>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD><E4A3AC>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƻ<EFBFBD><C6BB><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD>ݽṹ<DDBD><E1B9B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ԣ<EFBFBD>
<09><>Ȼʵ<C8BB><CAB5>ʹ<EFBFBD><CAB9>ʱ֧<CAB1><D6A7>ͨ<EFBFBD><CDA8>insertIntvl<76><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>QueryInterval<61><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
Ҫ<><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӳ<EFBFBD>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD><E4A3AC>ôӦ<C3B4>ø<EFBFBD><C3B8><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD><EFBFBD><E4BCAF><EFBFBD><EFBFBD><EFBFBD>¹<EFBFBD><C2B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
/************************************************************************/
#ifndef _ADVANCECEI_YAOQIULIN_20080509_
#define _ADVANCECEI_YAOQIULIN_20080509_
#include "IntervalIndex.h"
#include <stdlib.h>
#include <stdio.h>
#include <vector>
//#define u_int unsigned int
typedef unsigned int u_int;
typedef struct _QueryInterval_
{
u_int l;
u_int r;
u_int index;
} _QueryInterval;
typedef struct _HQueryI_
{
_QueryInterval ** ppQImatrix;
u_int MAXROWS;
u_int ROWLEN;
u_int rowID;//next available
u_int colID;//next available
u_int cnt;
}_HQueryI;
typedef struct _IID_ //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݽṹ<DDBD><E1B9B9><EFBFBD><EFBFBD><EFBFBD>ٿռ<D5BC><E4A3AC><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD><EFBFBD>õĻ<C3B5>
{
_QueryInterval * iid;
struct _IID_ * next;
}_IID;
typedef struct _IIDSet_
{
_IID *head;
_IID *tail;
} _IIDSet;
typedef _IIDSet _CEI;
//pack type
typedef struct _SrlIID_
{
_QueryInterval * iid;
}_SrlIID;
typedef struct _SrlIIDSet_
{
_SrlIID *head;
}_SrlIIDSet;
typedef _SrlIIDSet _SrlCEI;
class ACEI : public CIntervalIndex
{
public:
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>[0, 2^r)
ACEI(u_int eRange = 32);
virtual ~ACEI();
//һ<><D2BB><EFBFBD><EFBFBD><EFBFBD>䣬[ a[i], b[i] ]<5D><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>а<EFBFBD><D0B0><EFBFBD>key<65><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>index
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
//init & ruin
void initQIMatrix();
void ruinQIMatirc();
int init();
void ruin();
void releaseCEIs(_CEI **ppIIDSet, u_int lCEIsNum);
//search
int searchX(unsigned int x, unsigned int * result, unsigned int size);//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>double<6C><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD>u_int
//insert
//void insertIntvl(u_int l, u_int r)
void addIntvl(u_int ll, u_int lr, _QueryInterval* pQI);//<2F><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҿ<EFBFBD><D2BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD>ô<EFBFBD><C3B4>Ϊ<EFBFBD>ǿ<EFBFBD><C7BF><EFBFBD><EFBFBD><EFBFBD>
void addFrgmnt(u_int ll, u_int lr, u_int sID, _QueryInterval* pQI);
void addBtmCEIs(u_int ll, u_int lr, u_int sID, _QueryInterval *pQI);
void addID(_CEI* pCEI, _QueryInterval* pQI);
//ultlity
double getAvrgRng();
//serialization
void serializeCEI(_CEI *pCEI, _SrlCEI* pSrlCEI);//suppose pCEI not NULL
void serialBtmCEIs();
void serialTopCEIs();
void serial();
private:
//input data
_HQueryI HQI; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//model data
u_int n; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
u_int r; //ֵ<><D6B5>
u_int L;//L=2^H, Segment Len
u_int H;//<2F><><EFBFBD><EFBFBD>VC<56><43><EFBFBD>߶<EFBFBD>
u_int h;//ʵ<><CAB5>VC<56><43><EFBFBD>߶<EFBFBD>
double w; //ƽ<><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E4B3A4>
u_int SNum;//Segment Num
u_int SCEINum;
unsigned long *pExp2;
const u_int Exp2Size;
_CEI *pTopCEIs;
_CEI **ppBotCEIs;//ÿ<><C3BF>CEIs<49><73><EFBFBD>г<EFBFBD><D0B3><EFBFBD>ΪL<CEAA><4C>H
//for serial
_SrlCEI *pTopSrlCEIs;
_SrlCEI **ppBotSrlCEIs;//ÿ<><C3BF>CEIs<49><73><EFBFBD>г<EFBFBD><D0B3><EFBFBD>ΪL<CEAA><4C>H
};
#endif

View File

@@ -0,0 +1,159 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_CEI
#include "CEI.h"
#include <algorithm>
#include <iterator>
#include <cstdlib>
#include <cassert>
using namespace std;
unsigned int RoundToPowerOfTwo(unsigned int n)
{
unsigned int N=1;
while(n>N) N<<=1;
return N;
}
// r must be no greater than 2^31
CEI::CEI(unsigned int r, unsigned int L)
{
if(r==0 || (r&(r-1))!=0)
{
r=RoundToPowerOfTwo(r);
}
if(L==0 || (L&(L-1))!=0)
{
L=RoundToPowerOfTwo(L);
}
m_r=r;
m_L=L;
m_pIDList=new unsigned int[(r<<1)+1];
m_pIDArray=NULL;
}
CEI::~CEI()
{
if(m_pIDList!=NULL) delete [] m_pIDList;
if(m_pIDArray!=NULL) delete [] m_pIDArray;
}
long long CEI::PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
std::vector< std::vector<unsigned int> > IDList(m_r<<1);
for(int i=0; i<(int)A.size(); i++)
{
B[i]++;
if(A[i]>=B[i]) continue;
unsigned int inf=(A[i]+m_L-1)&-m_L;
unsigned int sup=B[i]&-m_L;
if(inf>sup)
{
this->BestPartition(i, A[i], B[i], IDList);
}
else
{
for(unsigned int p=inf; p<sup; p+=m_L)
{
IDList[(p<<1)+1].push_back(i);
}
if(A[i]<inf) this->BestPartition(i, A[i], inf, IDList);
if(sup<B[i]) this->BestPartition(i, sup, B[i], IDList);
}
}
m_pIDList[0]=0;
for(int i=0; i<(int)IDList.size(); i++)
{
m_pIDList[i+1]=m_pIDList[i]+IDList[i].size();
}
m_pIDArray=new unsigned int[m_pIDList[2*m_r]];
for(int i=0; i<(int)IDList.size(); i++)
{
copy(IDList[i].begin(), IDList[i].end(), m_pIDArray+m_pIDList[i]);
}
long long iMemBytes=sizeof(unsigned int)*(2*m_r+1+m_pIDList[2*m_r]);
#ifdef DEBUG_CEI
printf("CEI membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
void CEI::BestPartition(unsigned int id, unsigned int a, unsigned int b, std::vector< std::vector<unsigned int> >& IDList)
{
unsigned int g=(a&-m_L)<<1;
while(a<b)
{
unsigned int t=(a&(m_L-1))+m_L;
unsigned int delta=1;
while((t&1)==0 && a+(delta<<1)<=b)
{
t>>=1;
delta<<=1;
}
IDList[g+t].push_back(id);
a+=delta;
}
}
static int compare(const void * a, const void * b)
{
return(*(unsigned int *)a - *(unsigned int *)b);
}
int CEI::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int g=(key&-m_L)<<1;
unsigned int n = 0;
for(unsigned int t=(key&(m_L-1))+m_L; t; t>>=1)
{
unsigned int c=g+t;
if(m_pIDList[c]<m_pIDList[c+1])
{
int s = m_pIDList[c+1] - m_pIDList[c];
unsigned int * id_array = m_pIDArray + m_pIDList[c];
for(int i = 0; i < s; i++)
{
if(n >= size)
{
qsort(result, n, sizeof(unsigned int), compare);
return n;
}
result[n++] = id_array[i];
}
}
}
qsort(result, n, sizeof(unsigned int), compare);
return n;
}

View File

@@ -0,0 +1,47 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_CEI_CPP_H
#define H_CEI_CPP_H
#include <limits.h>
#include "IntervalIndex.h"
#include <vector>
class CEI : public CIntervalIndex
{
public:
CEI(unsigned int r=65536, unsigned int L=64);
virtual ~CEI();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
void BestPartition(unsigned int id, unsigned int a, unsigned int b, std::vector< std::vector<unsigned int> >& IDList);
private:
unsigned int m_r; // maximum range of integer
unsigned int m_L; // segment size for each CEI virtual construct
unsigned int * m_pIDList;
unsigned int * m_pIDArray;
};
#endif

View File

@@ -0,0 +1,7 @@
add_definitions(-D_GNU_SOURCE)
add_definitions(-fPIC)
add_library(interval_index_static ACEI.cpp CEI.cpp DirectIndex.cpp FastIntervalIndex.cpp
Int128IntervalIndex.cpp IntervalIndex.cpp IntervalTree.cpp IPMaskIndex.cpp
NaiveIntervalIndex.cpp NaiveIntervalIndex2.cpp PortIndex.cpp SuccinctHash.cpp
sigmastar_tools.cpp)

View File

@@ -0,0 +1,159 @@
/*
*
* Copyright (c) 2008--2012
* Advanced Algorithm and Algorithm Engineering Group
* National Engineering Laboratory for Information Security Technologies (NELIST)
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-11-27
*
* This code is the exclusive and proprietary property of NELIST and IIE-CAS.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "DirectIndex.h"
#include <algorithm>
#include <map>
using namespace std;
//#define DEBUG_DIRECT_INDEX
CDirectIndex::CDirectIndex()
{
m_keys=NULL;
m_ptr=NULL;
m_values=NULL;
}
CDirectIndex::~CDirectIndex()
{
if(m_keys!=NULL) delete [] m_keys;
if(m_ptr!=NULL) delete [] m_ptr;
if(m_values!=NULL) delete [] m_values;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CDirectIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
long long mem_bytes=0;
unsigned long long count=0;
for(unsigned int i=0; i<a.size(); i++)
{
count+=((unsigned long long)b[i]-a[i]+1);
}
if(count>2*a.size()) return -1;
map<unsigned int, unsigned int> M;
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
M[(unsigned int)t]++;
}
}
m_keys =new unsigned int[M.size()];
m_ptr =new unsigned int[M.size()+1];
m_values=new unsigned int[(unsigned int)count];
mem_bytes+=sizeof(unsigned int)*(2*M.size()+(unsigned int)count);
m_ptr[0]=0;
map<unsigned int, unsigned int>::const_iterator it=M.begin();
for(unsigned int k=0; k<M.size(); k++, ++it)
{
m_keys[k]=it->first;
m_ptr[k+1]=m_ptr[k]+it->second;
}
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
unsigned int k=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), (unsigned int)t)-m_keys);
m_values[m_ptr[k]++]=i;
}
}
for(int k=(int)M.size(); k>0; k--)
{
m_ptr[k]=m_ptr[k-1];
}
m_ptr[0]=0;
m_min_key=m_keys[0];
m_max_key=m_keys[M.size()-1];
for(unsigned int k=0; k<M.size(); k++)
{
m_keys[k]-=m_min_key;
}
unsigned long long ONE=1;
unsigned int theta=0;
while((ONE<<(theta+16))<=m_keys[M.size()-1]) theta++;
m_theta=theta;
m_L[0]=0;
for(unsigned int i=1; i<65536; i++)
{
m_L[i]=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), i*(1U<<theta))-m_keys);
}
m_L[65536]=(unsigned int)M.size();
mem_bytes+=sizeof(m_L);
M.clear();
#ifdef DEBUG_DIRECT_INDEX
printf("Direct Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1u<<20));
#endif
return mem_bytes;
}
int CDirectIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if(key<m_min_key || key>m_max_key)
return 0;
key-=m_min_key;
unsigned int k=key>>m_theta;
int l=m_L[k], h=(int)m_L[k+1]-1;
if(h<l)
return 0;
while(l<=h)
{
int m=(l+h)/2;
if(key<m_keys[m]) h=m-1;
else l=m+1;
}
unsigned int n = 0;
if(h<(int)m_L[k] || m_keys[h]!=key)
{
return 0;
}
else
{
int s = m_ptr[h+1] - m_ptr[h];
unsigned int * values = m_values+m_ptr[h];
for(int i = 0; i < s; i++)
{
if(n >= size)
{
return n;
}
result[n++] = values[i];
}
}
return n;
}

View File

@@ -0,0 +1,44 @@
/*
*
* Copyright (c) 2008--2012
* Advanced Algorithm and Algorithm Engineering Group
* National Engineering Laboratory for Information Security Technologies (NELIST)
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-11-27
*
* This code is the exclusive and proprietary property of NELIST and IIE-CAS.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_DIRECT_INDEX_CPP_H
#define H_DIRECT_INDEX_CPP_H
#include "IntervalIndex.h"
class CDirectIndex : public CIntervalIndex
{
public:
CDirectIndex();
virtual ~CDirectIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
unsigned int m_L[65537];
unsigned int m_theta;
unsigned int m_min_key;
unsigned int m_max_key;
unsigned int * m_keys;
unsigned int * m_ptr;
unsigned int * m_values;
};
#endif

View File

@@ -0,0 +1,141 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_FAST_INTERVAL_INDEX
#include "FastIntervalIndex.h"
#include "NaiveIntervalIndex.h"
#include "NaiveIntervalIndex2.h"
#include <climits>
#include <algorithm>
#include <set>
#include <cassert>
using namespace std;
CFastIntervalIndex::CFastIntervalIndex(unsigned int r, unsigned int l)
{
assert(r>=l);
this->m_r=r;
this->m_l=l;
this->m_pIndexer=new CIntervalIndex*[1u<<(r-l)];
for(unsigned int i=0; i<(1u<<(r-l)); i++)
{
this->m_pIndexer[i]=NULL;
}
}
CFastIntervalIndex::~CFastIntervalIndex()
{
for(unsigned int i=0; i<(1u<<(m_r-m_l)); i++)
{
if(this->m_pIndexer[i]!=NULL)
{
delete this->m_pIndexer[i];
}
}
delete [] this->m_pIndexer;
}
long long CFastIntervalIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
m_IDs.resize(1u<<(m_r-m_l));
for(int i=0; i<(int)A.size(); i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
unsigned int p, q;
if(m_l==32) // in this case, the bit shift operator will mod 32 to 0.
{
p=0;
q=1;
}
else
{
p=A[i]>>m_l;
q= (B[i]&((1u<<m_l)-1))==0 ? (B[i]>>m_l) : (B[i]>>m_l)+1;
}
for(unsigned int j=p; j<q; j++) m_IDs[j].push_back(i);
}
for(int i=0, n=1<<(m_r-m_l); i<n; i++)
{
if(m_IDs[i].size()==0) continue;
vector<unsigned int> C, D;
for(int j=0; j<(int)m_IDs[i].size(); j++)
{
int id=m_IDs[i][j];
C.push_back(A[id]);
D.push_back(B[id]-1);
}
this->m_pIndexer[i]=new CNaiveIntervalIndex2;
iMemBytes+=this->m_pIndexer[i]->PreProcessing(C, D);
}
iMemBytes+=(1<<(m_r-m_l))*(int)sizeof(CIntervalIndex *);
#ifdef DEBUG_FAST_INTERVAL_INDEX
printf("Fast Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
int CFastIntervalIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if (n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
vector<unsigned int> v;
unsigned int t=(m_l<32) ? (key>>m_l) : 0;
CIntervalIndex * pIndexer=this->m_pIndexer[t];
if(pIndexer!=NULL)
{
int k = pIndexer->Find(key, result, size);
int i = 0;
for( i=0; i < k; i++)
result[i]=m_IDs[t][result[i]];
n = i;
}
}
return n;
}

View File

@@ -0,0 +1,42 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_FAST_INTERVAL_INDEX_CPP_H
#define H_FAST_INTERVAL_INDEX_CPP_H
#include "IntervalIndex.h"
class CFastIntervalIndex : public CIntervalIndex
{
public:
// the range is [0, 2^r), and we partition it into intervals of size 2^l
CFastIntervalIndex(unsigned int r=32, unsigned int l=16);
virtual ~CFastIntervalIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
std::vector<unsigned int> m_IndexForMaxInt;
unsigned int m_r;
unsigned int m_l;
CIntervalIndex ** m_pIndexer;
std::vector< std::vector<unsigned int> > m_IDs;
};
#endif

View File

@@ -0,0 +1,149 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "IPMaskIndex.h"
#include <stdio.h>
using namespace std;
//#define DEBUG_IPMASK_INDEX
bool is_8bit_ipmask(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
for(unsigned int i=0; i<a.size(); i++)
{
if( !(a[i]==b[i]) && !((a[i]&0xFF)==0 && b[i]==a[i]+255) ) return false;
}
return true;
}
CIPMaskIndex::CIPMaskIndex()
{
m_values=NULL;
m_ip_hash=NULL;
m_is_single=false;
}
CIPMaskIndex::~CIPMaskIndex()
{
if(m_values!=NULL) delete [] m_values;
if(m_ip_hash!=NULL) delete m_ip_hash;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CIPMaskIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
if(a.size()==0) return -1;
long long mem_bytes=0;
unsigned int J=0;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]==b[i]) J++;
}
m_is_single=(J==a.size());
if(J>0)
{
unsigned int * keys =new unsigned int[J];
unsigned int * values=new unsigned int[J];
J=0;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]==b[i])
{
keys[J]=a[i];
values[J]=i;
J++;
}
}
m_ip_hash=new CSuccinctHash;
long long ret=m_ip_hash->init(keys, values, J);
delete [] keys;
delete [] values;
if(ret<0)
{
delete m_ip_hash;
m_ip_hash=NULL;
return -1;
}
mem_bytes+=ret;
}
if(m_is_single) return mem_bytes;
for(unsigned int i=0; i<=(1U<<24); i++) m_L[i]=0;
for(unsigned int i=0; i<a.size(); i++)
{
if((a[i]&0xFF)==0 && b[i]==a[i]+255) // 8-bit<69><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
m_L[a[i]>>8]++;
}
}
for(unsigned int i=1; i<=(1U<<24); i++) m_L[i]+=m_L[i-1];
m_values=new unsigned int[m_L[1<<24]];
mem_bytes+=sizeof(unsigned int)*m_L[1<<24]+sizeof(m_L)+sizeof(m_bitmap);
for(unsigned int i=0; i<a.size(); i++)
{
if((a[i]&0xFF)==0 && b[i]==a[i]+255)
{
m_values[--m_L[a[i]>>8]]=i;
}
}
for(unsigned int i=0; i<(1<<24); i++)
{
if(m_L[i]<m_L[i+1]) m_bitmap[i>>3]|=(1U<<(i&7));
}
#ifdef DEBUG_IPMASK_INDEX
printf("IPMask Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1U<<20));
#endif
return mem_bytes;
}
int CIPMaskIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
int ret=0;
if(m_ip_hash!=NULL)
{
ret=m_ip_hash->find(key, result, size);
if(ret<0) return -1;
}
if(m_is_single) return ret;
result+=ret;
size-=ret;
key>>=8;
if((m_bitmap[key>>3]&(1U<<(key&7)))==0) return ret;
unsigned int n=m_L[key+1]-m_L[key];
if(n>size) n=size;
unsigned int * p=m_values+m_L[key];
for(unsigned int i=0; i<n; i++) *result++=*p++;
ret+=n;
return ret;
}

View File

@@ -0,0 +1,49 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IP<49>ֶΣ<D6B6><CEA3><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD>IP<49><50><EFBFBD><EFBFBD>8-bit<69><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IP */
#ifndef H_IPMASK_INDEX_CPP_H
#define H_IPMASK_INDEX_CPP_H
#include "IntervalIndex.h"
#include "SuccinctHash.h"
#include <vector>
using namespace std;
bool is_8bit_ipmask(const vector<unsigned int>& a, const vector<unsigned int>& b);
class CIPMaskIndex : public CIntervalIndex
{
public:
CIPMaskIndex();
virtual ~CIPMaskIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
bool m_is_single;
unsigned char m_bitmap[1U<<21];
unsigned int m_L[(1U<<24)+1];
unsigned int * m_values;
CSuccinctHash * m_ip_hash;
};
#endif

View File

@@ -0,0 +1,352 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-06-03
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "Int128IntervalIndex.h"
#include <algorithm>
#include <set>
#include <cassert>
#include <limits.h>
using namespace std;
//#define DEBUG_INT128_INTERVAL_INDEX
bool operator<(const uint128_t& lhs, const uint128_t& rhs)
{
int i=1;
while(i>=0 && lhs.I[i]==rhs.I[i]) --i;
return (i>=0 && lhs.I[i]<rhs.I[i]);
}
bool operator>(const uint128_t& lhs, const uint128_t& rhs)
{
return rhs<lhs;
}
bool operator>=(const uint128_t& lhs, const uint128_t& rhs)
{
return !(lhs<rhs);
}
bool operator==(const uint128_t& lhs, const uint128_t& rhs)
{
for(int i=0; i<2; i++)
{
if(lhs.I[i]!=rhs.I[i]) return false;
}
return true;
}
bool operator!=(const uint128_t& lhs, const uint128_t& rhs)
{
return !(lhs==rhs);
}
//ִ<><D6B4>a-b<><62>aΪlhs<68><73>bΪrhs
uint128_t operator-(const uint128_t& lhs, const uint128_t& rhs)
{
uint128_t l = rhs;
uint128_t h = lhs;
unsigned long long I[2] = {0};
if(h.I[0] < l.I[0])//<2F><>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С<EFBFBD><D0A1><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>λ
{
I[0] = ULONG_MAX - (l.I[0] - h.I[0]);
h.I[1]--;//<2F><><EFBFBD><EFBFBD>λ<EFBFBD>ˣ<EFBFBD><CBA3><EFBFBD>λ<EFBFBD><CEBB>Ҫ<EFBFBD><D2AA>1
}
else
{
I[0] = h.I[0] - l.I[0];
}
I[1] = h.I[1] - l.I[1];
unsigned int a[4];
a[3] = I[0] & 0xFFFFFFFF;
a[2] = (I[0]>>32) & 0xFFFFFFFF;
a[1] = I[1] & 0xFFFFFFFF;
a[0] = (I[1]>>32) & 0xFFFFFFFF;
uint128_t b(a);
return b;
}
unsigned int ipv6_hash(const uint128_t * ip)
{
unsigned long long I=(ip->I[0])^(ip->I[1]);
return (I&0xFFFFFFFF)^(I>>32);
}
CInt128IntervalIndex::CInt128IntervalIndex()
{
m_array=NULL;
m_iEndPointsNum=0;
m_pEndPoints=NULL;
m_pIDPtr=NULL;
m_pIDList=NULL;
m_IndexForMaxInt=NULL;
m_IndexForWholeInterval = NULL;
}
CInt128IntervalIndex::~CInt128IntervalIndex()
{
if(m_array!=NULL) delete [] m_array;
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
if(this->m_pIDPtr!=NULL)
{
delete [] this->m_pIDPtr;
}
if(this->m_IndexForMaxInt!=NULL)
{
delete [] this->m_IndexForMaxInt;
}
if(this->m_IndexForWholeInterval != NULL)
{
delete [] this->m_IndexForWholeInterval;
}
}
long long CInt128IntervalIndex::PreProcessing(const vector<uint128_t>& a, const vector<uint128_t>& b)
{
if(a.size()==0) return -1;
m_is_single=true;
for(unsigned int i=0; i<a.size(); i++)
{
if(a[i]!=b[i])
{
m_is_single=false;
break;
}
}
if(m_is_single)
{
return process_single(a);
}
else
{
return process_interval(a, b);
}
}
long long CInt128IntervalIndex::process_single(const vector<uint128_t>& a)
{
long long mem_bytes=0;
unsigned int num=a.size();
unsigned int * keys =new unsigned int[num];
unsigned int * values=new unsigned int[num];
m_array=new uint128_t[num];
mem_bytes+=(2*sizeof(unsigned int)+sizeof(uint128_t))*num;
for(unsigned int i=0; i<num; i++)
{
keys[i]=ipv6_hash(&a[i]);
values[i]=i;
m_array[i]=a[i];
}
long long ret=m_ip_hash.init(keys, values, num);
delete [] keys;
delete [] values;
if(ret<0) return -1;
mem_bytes+=ret;
return mem_bytes;
}
long long CInt128IntervalIndex::process_interval(const vector<uint128_t>& a, const vector<uint128_t>& b)
{
vector<uint128_t> A=a, B=b;
long long iMemBytes=0;
set<uint128_t> s;
vector<unsigned int> IndexForMaxInt;
vector<unsigned int> IndexForWholeInterval;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>B[i]) continue;
if(B[i].is_all_ones())
{
IndexForMaxInt.push_back(i);
--B[i];
}
++B[i]; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
if(A[i].is_all_zeros() && B[i].is_all_ones())
{
IndexForWholeInterval.push_back(i);
continue;
}
s.insert(A[i]);
s.insert(B[i]);
}
m_IndexForWholeInterval=new unsigned int[IndexForWholeInterval.size()+1];
m_IndexForWholeInterval[0]=IndexForWholeInterval.size();
copy(IndexForWholeInterval.begin(), IndexForWholeInterval.end(), m_IndexForWholeInterval+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForWholeInterval.size()+1));
m_IndexForMaxInt=new unsigned int[IndexForMaxInt.size()+1];
m_IndexForMaxInt[0]=IndexForMaxInt.size();
copy(IndexForMaxInt.begin(), IndexForMaxInt.end(), m_IndexForMaxInt+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForMaxInt.size()+1));
this->m_iEndPointsNum=(long long)s.size();
this->m_pEndPoints=new uint128_t[m_iEndPointsNum];
copy(s.begin(), s.end(), m_pEndPoints);
iMemBytes+=(long long)(m_iEndPointsNum*sizeof(uint128_t));
vector<long long> count(m_iEndPointsNum, 0);
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue;
long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(long long j=l; j<h; j++) count[j]++;
}
m_pIDPtr=new long long[m_iEndPointsNum+1];
m_pIDPtr[0]=0;
for(long long i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i+1]=m_pIDPtr[i]+count[i];
}
iMemBytes+=(long long)((m_iEndPointsNum+1)*sizeof(unsigned int));
m_pIDList=new unsigned int[m_pIDPtr[m_iEndPointsNum]];
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue;
long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(long long j=l; j<h; j++)
{
m_pIDList[m_pIDPtr[j]++]=i;
}
}
iMemBytes+=(long long)(m_pIDPtr[m_iEndPointsNum]*sizeof(unsigned int));
for(long long i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i]-=count[i];
}
int k=0;
unsigned int t[4] = {0,0,0,0};
for(unsigned int i=0; i<65536; i++)
{
t[3] = i;
uint128_t x(t);
x = x<<112;
while(k<m_iEndPointsNum && x>=m_pEndPoints[k])
k++;
m_L[i] = (k>=1)? k-1 : 0;
}
m_L[65536]=m_iEndPointsNum-1;
iMemBytes+=(long long)sizeof(m_L);
#ifdef DEBUG_INT128_INTERVAL_INDEX
printf("Int128 Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
A.clear();
B.clear();
s.clear();
IndexForMaxInt.clear();
return iMemBytes;
}
int CInt128IntervalIndex::Find(const uint128_t * key, unsigned int * result, unsigned int size)
{
if(m_is_single)
{
return Find_single(key, result, size);
}
else
{
return Find_interval(key, result, size);
}
}
int CInt128IntervalIndex::Find_single(const uint128_t * key, unsigned int * result, unsigned int size)
{
unsigned int h=ipv6_hash(key);
int ret=m_ip_hash.find(h, result, size);
int j=0;
for(int i=0; i<ret; i++)
{
if(*key==m_array[result[i]]) result[j++]=result[i];
}
return j;
}
int CInt128IntervalIndex::Find_interval(const uint128_t * key, unsigned int * result, unsigned int size)
{
if(key->is_all_ones())
{
unsigned int s=m_IndexForMaxInt[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++) *result++=m_IndexForMaxInt[i];
return s;
}
else
{
unsigned int s=m_IndexForWholeInterval[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++)
{
*result++=m_IndexForWholeInterval[i];
}
size-=s;
uint128_t t = (*key)>>112;
unsigned int k = t.I[0]&0xffff;
long long l=m_L[k], h=m_L[k+1];
long long m=0;
while(l<=h && m<m_iEndPointsNum)
{
m=(l+h)>>1;
if(*key>=m_pEndPoints[m]) l=m+1;
else h=m-1;
}
if(h>=m_L[k] && h<m_iEndPointsNum)
{
long long n=m_pIDPtr[h+1]-m_pIDPtr[h];
if(n>size) n=size;
unsigned int * id_list=m_pIDList+m_pIDPtr[h];
for(unsigned int i=0; i<n; i++) *result++=*id_list++;
s+=n;
}
return s;
}
}

View File

@@ -0,0 +1,149 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-06-03
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_INT128_INTERVAL_INDEX_CPP_H
#define H_INT128_INTERVAL_INDEX_CPP_H
#include "SuccinctHash.h"
#include <vector>
using namespace std;
struct uint128_t
{
unsigned long long I[2];//<2F><>λ-><3E><>λ<EFBFBD><CEBB>I[1]I[0]
uint128_t(const unsigned int * a=NULL)
{
if(a!=NULL)
{
I[0] = a[2];
I[0] = (I[0]<<32)|a[3];
I[1] = a[0];
I[1] = (I[1]<<32)|a[1];
}
}
uint128_t& operator=(const uint128_t& rhs)
{
for(int i=0; i<2; i++) I[i]=rhs.I[i];
return *this;
}
uint128_t& operator--()
{
if(I[0]==0) I[1]--;
I[0]--;
return *this;
}
uint128_t& operator++()
{
I[0]++;
if(I[0]==0) I[1]++;
return *this;
}
uint128_t operator<<(int n) const
{
uint128_t t = *this;
int k=n>>6;
n&=63;
t.I[1]=(t.I[1]<<n)|(t.I[0]>>(64-n));
t.I[0]<<=n;
for(int j=1; j>=k; j--) t.I[j]=t.I[j-k];
for(int i = k -1; i>=0; i--) t.I[i] = 0;
return t;
}
uint128_t operator>>(int n) const
{
uint128_t t = *this;
int k=n>>6;
n&=63;
t.I[0]=(t.I[0]>>n)|(t.I[1]<<(64-n));
t.I[1]>>=n;
for(int j = 0; j <= 1-k; j++) t.I[j] = t.I[j+k];
for(int i = 1; i > 1-k; i--) t.I[i] = 0;
return t;
}
void ornot(unsigned int * mask)
{
unsigned long long m = mask[1];
m = (m<<32)|mask[0];
unsigned long long n = mask[3];
n = (n<<32)|mask[2];
I[0] |= ~m;
I[1] |= ~n;
}
bool is_all_zeros() const
{
return (I[0] == 0)&&(I[1] == 0);
}
bool is_all_ones() const
{
return ((~I[0])==0)&&((~I[1])==0);
}
};
bool operator<(const uint128_t& lhs, const uint128_t& rhs);
bool operator>(const uint128_t& lhs, const uint128_t& rhs);
bool operator==(const uint128_t& lhs, const uint128_t& rhs);
bool operator!=(const uint128_t& lhs, const uint128_t& rhs);
bool operator>=(const uint128_t& lhs, const uint128_t& rhs);
uint128_t operator-(const uint128_t& lhs, const uint128_t& rhs);
class CInt128IntervalIndex
{
public:
CInt128IntervalIndex();
~CInt128IntervalIndex();
long long PreProcessing(const vector<uint128_t>& a, const vector<uint128_t>& b);
int Find(const uint128_t * key, unsigned int * result, unsigned int size);
private:
long long process_single(const vector<uint128_t>& a);
int Find_single(const uint128_t * key, unsigned int * result, unsigned int size);
long long process_interval(const vector<uint128_t>& a, const vector<uint128_t>& b);
int Find_interval(const uint128_t * key, unsigned int * result, unsigned int size);
private:
bool m_is_single;
CSuccinctHash m_ip_hash;
uint128_t * m_array;
unsigned int * m_IndexForMaxInt;
long long m_iEndPointsNum;
uint128_t * m_pEndPoints;
long long * m_pIDPtr;
unsigned int * m_pIDList;
int m_L[65537];
unsigned int * m_IndexForWholeInterval;
};
#endif

View File

@@ -0,0 +1,25 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#include "IntervalIndex.h"
CIntervalIndex::CIntervalIndex()
{
}
CIntervalIndex::~CIntervalIndex()
{
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_INTERVAL_INDEX_CPP_H
#define H_INTERVAL_INDEX_CPP_H
#include <vector>
class CIntervalIndex
{
public:
CIntervalIndex();
virtual ~CIntervalIndex();
/*
closed interval: [ a[i] , b[i] ] such that a[i]<=b[i]
*/
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b)=0;
/*
report the indexes of intervals that contain the key.
*/
virtual int Find(unsigned int key, unsigned int * result, unsigned int size)=0;//changed by luyuhai, 2015.11.09
};
#endif

View File

@@ -0,0 +1,202 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_INTERVAL_TREE
#include "IntervalTree.h"
#include <climits>
#include <queue>
#include <algorithm>
#include <iterator>
#include <set>
#include <cassert>
using namespace std;
CIntervalTree::CIntervalTree()
{
this->m_pstRoot=NULL;
this->m_uiNodeNum=0;
}
CIntervalTree::~CIntervalTree()
{
if(m_pstRoot==NULL) return;
queue<stIntervalNode *> Q;
for(Q.push(this->m_pstRoot); !Q.empty(); Q.pop())
{
stIntervalNode * pstCurrNode=Q.front();
if(pstCurrNode->lchild!=NULL) Q.push(pstCurrNode->lchild);
if(pstCurrNode->rchild!=NULL) Q.push(pstCurrNode->rchild);
delete pstCurrNode;
this->m_uiNodeNum--;
}
assert(this->m_uiNodeNum==0);
}
CIntervalTree::stIntervalNode * CIntervalTree::BuildBalancedTree(unsigned int a[], unsigned int n)
{
stIntervalNode * pstNode=new stIntervalNode;
this->m_uiNodeNum++;
if(n==0)
{
pstNode->isleaf=true;
pstNode->seperator=0;
pstNode->lchild=NULL;
pstNode->rchild=NULL;
}
else
{
unsigned int i=(n-1)/2;
pstNode->isleaf=false;
pstNode->seperator=a[i];
pstNode->lchild=this->BuildBalancedTree(a, i);
pstNode->rchild=this->BuildBalancedTree(a+i+1, n-i-1);
}
return pstNode;
}
void CIntervalTree::AddInterval(stIntervalNode * pstCurrNode, unsigned int inf, unsigned int sup,
unsigned int a, unsigned int b, unsigned int id)
{
assert(inf<=a && b<=sup);
while(1)
{
assert(pstCurrNode!=NULL);
if(a==inf && b==sup)
{
pstCurrNode->ids.push_back(id);
this->m_iMemBytes+=sizeof(unsigned int);
return;
}
else if(a>=pstCurrNode->seperator)
{
inf=pstCurrNode->seperator;
pstCurrNode=pstCurrNode->rchild;
}
else if(b<=pstCurrNode->seperator)
{
sup=pstCurrNode->seperator;
pstCurrNode=pstCurrNode->lchild;
}
else
{
this->AddInterval(pstCurrNode->lchild, inf, pstCurrNode->seperator, a, pstCurrNode->seperator, id);
this->AddInterval(pstCurrNode->rchild, pstCurrNode->seperator, sup, pstCurrNode->seperator, b, id);
return;
}
}
}
long long CIntervalTree::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
this->m_iMemBytes=0;
set<unsigned int> s;
for(int i=0, n=(int)A.size(); i<n; i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
s.insert(A[i]);
s.insert(B[i]);
}
this->m_iMemBytes+=(long long)(sizeof(unsigned int)*this->m_IndexForMaxInt.size());
vector<unsigned int> endpoints;
copy(s.begin(), s.end(), back_inserter(endpoints));
this->m_pstRoot=this->BuildBalancedTree(&endpoints[0], (unsigned int)endpoints.size());
assert(this->m_uiNodeNum==2*(unsigned int)endpoints.size()+1);
this->m_iMemBytes+=sizeof(stIntervalNode)*m_uiNodeNum;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i]) continue;
this->AddInterval(m_pstRoot, 0, UINT_MAX, A[i], B[i], i);
}
#ifdef DEBUG_INTERVAL_TREE
printf("Interval Tree membyte=%5.3lf (MB).\n", (double)m_iMemBytes/(1u<<20));
#endif
return m_iMemBytes;
}
static int compare(const void * a, const void * b)
{
return(*(unsigned int *)a - *(unsigned int *)b);
}
int CIntervalTree::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if (n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
stIntervalNode * pstCurrNode=this->m_pstRoot;
while(true)
{
vector<unsigned int>::iterator it = pstCurrNode->ids.begin();
while(it != pstCurrNode->ids.end())
{
if(n >= size)
{
qsort(result, n, sizeof(unsigned int), compare);
return n;
}
result[n++] = *it;
it++;
}
if(pstCurrNode->isleaf) break;
pstCurrNode= (key<pstCurrNode->seperator) ? pstCurrNode->lchild : pstCurrNode->rchild;
}
qsort(result, n, sizeof(unsigned int), compare);
}
return n;
}

View File

@@ -0,0 +1,56 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_INTERVAL_TREE_CPP_H
#define H_INTERVAL_TREE_CPP_H
#include "IntervalIndex.h"
class CIntervalTree : public CIntervalIndex
{
public:
struct stIntervalNode
{
bool isleaf;
unsigned int seperator;
std::vector<unsigned int> ids;
stIntervalNode * lchild;
stIntervalNode * rchild;
};
public:
CIntervalTree();
virtual ~CIntervalTree();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
stIntervalNode * BuildBalancedTree(unsigned int a[], unsigned int n);
void AddInterval(stIntervalNode * pstCurrNode, unsigned int inf, unsigned int sup,
unsigned int a, unsigned int b, unsigned int id);
private:
stIntervalNode * m_pstRoot;
unsigned int m_uiNodeNum;
long long m_iMemBytes;
std::vector<unsigned int> m_IndexForMaxInt;
};
#endif

View File

@@ -0,0 +1,225 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2014-11-19
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_NAIVE_INTERVAL_INDEX
#include "NaiveIntervalIndex.h"
#include <climits>
#include <algorithm>
#include <set>
#include <cassert>
using namespace std;
CNaiveIntervalIndex::CNaiveIntervalIndex()
{
this->m_pDirectIndexer=NULL;
this->m_iEndPointsNum=0;
this->m_pEndPoints=NULL;
this->m_pIDPtr=NULL;
this->m_pIDList=NULL;
this->m_IndexForMaxInt=NULL;
this->m_IndexForWholeInterval = NULL;
}
CNaiveIntervalIndex::~CNaiveIntervalIndex()
{
if(this->m_pDirectIndexer!=NULL)
{
delete this->m_pDirectIndexer;
}
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
if(this->m_pIDPtr!=NULL)
{
delete [] this->m_pIDPtr;
}
if(this->m_IndexForMaxInt!=NULL)
{
delete [] this->m_IndexForMaxInt;
}
if(this->m_IndexForWholeInterval != NULL)
{
delete [] this->m_IndexForWholeInterval;
}
}
long long CNaiveIntervalIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
CDirectIndex * instance=new CDirectIndex;
long long ret=instance->PreProcessing(a, b);
if(ret>=0)
{
m_pDirectIndexer=instance;
return ret;
}
delete instance;
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
set<unsigned int> s;
vector<unsigned int> IndexForMaxInt;
vector<unsigned int> IndexForWholeInterval;
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>B[i]) continue;
if(B[i]==UINT_MAX)
{
IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
if(A[i]==0 && B[i]==UINT_MAX)
{
IndexForWholeInterval.push_back(i);
continue;
}
s.insert(A[i]);
s.insert(B[i]);
}
m_IndexForWholeInterval=new unsigned int[IndexForWholeInterval.size()+1];
m_IndexForWholeInterval[0]=IndexForWholeInterval.size();
copy(IndexForWholeInterval.begin(), IndexForWholeInterval.end(), m_IndexForWholeInterval+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForWholeInterval.size()+1));
m_IndexForMaxInt=new unsigned int[IndexForMaxInt.size()+1];
m_IndexForMaxInt[0]=IndexForMaxInt.size();
copy(IndexForMaxInt.begin(), IndexForMaxInt.end(), m_IndexForMaxInt+1);
iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForMaxInt.size()+1));
this->m_iEndPointsNum=(int)s.size();
this->m_pEndPoints=new unsigned int[m_iEndPointsNum];
copy(s.begin(), s.end(), m_pEndPoints);
iMemBytes+=(long long)(m_iEndPointsNum*sizeof(unsigned int));
vector<unsigned int> count(m_iEndPointsNum, 0);
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i] == 0 && B[i] == UINT_MAX)) continue;
int l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
int h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(int j=l; j<h; j++) count[j]++;
}
m_pIDPtr=new unsigned int[m_iEndPointsNum+1];
m_pIDPtr[0]=0;
for(int i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i+1]=m_pIDPtr[i]+count[i];
}
iMemBytes+=(long long)((m_iEndPointsNum+1)*sizeof(unsigned int));
m_pIDList=new unsigned int[m_pIDPtr[m_iEndPointsNum]];
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i] || (A[i] == 0 && B[i] == UINT_MAX)) continue;
int l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints);
int h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints);
assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]);
for(int j=l; j<h; j++)
{
m_pIDList[m_pIDPtr[j]++]=i;
}
}
iMemBytes+=(long long)(m_pIDPtr[m_iEndPointsNum]*sizeof(unsigned int));
for(int i=0; i<m_iEndPointsNum; i++)
{
m_pIDPtr[i]-=count[i];
}
int k=0;
for(unsigned int i=0; i<65536; i++)
{
unsigned int x=(i<<16);
while(k<m_iEndPointsNum && x>=m_pEndPoints[k]) k++;
m_L[i] = (k>=1)? k-1 : 0;
}
m_L[65536]=m_iEndPointsNum-1;
iMemBytes+=(long long)sizeof(m_L);
#ifdef DEBUG_NAIVE_INTERVAL_INDEX
printf("Naive Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
A.clear();
B.clear();
s.clear();
IndexForMaxInt.clear();
return iMemBytes;
}
int CNaiveIntervalIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if(m_pDirectIndexer!=NULL) return m_pDirectIndexer->Find(key, result, size);
if(key==UINT_MAX)
{
unsigned int s=m_IndexForMaxInt[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++) *result++=m_IndexForMaxInt[i];
return s;
}
else
{
unsigned int s=m_IndexForWholeInterval[0];
if(s>size) s=size;
for(unsigned int i=1; i<=s; i++)
{
*result++=m_IndexForWholeInterval[i];
}
size-=s;
unsigned int k=(key>>16);
int l=m_L[k], h=m_L[k+1];
int m=0;
while(l<=h && m<m_iEndPointsNum)
{
m=(l+h)>>1;
if(key>=m_pEndPoints[m]) l=m+1;
else h=m-1;
}
if(h>=m_L[k] && h<m_iEndPointsNum)
{
unsigned int n=m_pIDPtr[h+1]-m_pIDPtr[h];
if(n>size) n=size;
unsigned int * id_list=m_pIDList+m_pIDPtr[h];
for(unsigned int i=0; i<n; i++) *result++=*id_list++;
s+=n;
}
return s;
}
}

View File

@@ -0,0 +1,45 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_NAIVE_INTERVAL_INDEX_CPP_H
#define H_NAIVE_INTERVAL_INDEX_CPP_H
#include "IntervalIndex.h"
#include "DirectIndex.h"
class CNaiveIntervalIndex : public CIntervalIndex
{
public:
CNaiveIntervalIndex();
virtual ~CNaiveIntervalIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
CDirectIndex * m_pDirectIndexer;
unsigned int * m_IndexForMaxInt;
int m_iEndPointsNum;
unsigned int * m_pEndPoints;
unsigned int * m_pIDPtr;
unsigned int * m_pIDList;
int m_L[65537];
unsigned int * m_IndexForWholeInterval;
};
#endif

View File

@@ -0,0 +1,179 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_NAIVE_INTERVAL_INDEX2
#include "NaiveIntervalIndex2.h"
#include <climits>
#include <algorithm>
#include <iterator>
#include <set>
#include <cassert>
using namespace std;
CNaiveIntervalIndex2::CNaiveIntervalIndex2()
{
this->m_N=1;
this->m_pEndPoints=NULL;
this->m_pIDList=NULL;
}
CNaiveIntervalIndex2::~CNaiveIntervalIndex2()
{
if(this->m_pEndPoints!=NULL)
{
delete [] this->m_pEndPoints;
}
if(this->m_pIDList!=NULL)
{
delete [] this->m_pIDList;
}
}
long long CNaiveIntervalIndex2::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
set<unsigned int> s;
for(int i=0, n=(int)A.size(); i<n; i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
s.insert(A[i]);
s.insert(B[i]);
}
iMemBytes+=(long long)(sizeof(unsigned int)*this->m_IndexForMaxInt.size());
int M=(int)s.size();
this->m_N=1;
while(m_N<=M) m_N<<=1;
this->m_pEndPoints=new unsigned int[m_N];
this->m_pIDList=new vector<unsigned int>[m_N];
vector<unsigned int> v;
copy(s.begin(), s.end(), back_inserter(v));
for(int i=M; i<m_N-1; i++) v.push_back(UINT_MAX);
int k=1;
for(int d=m_N>>1; d>0; d>>=1)
{
for(int j=1; d*j<m_N; j+=2)
{
this->m_pEndPoints[k++]=v[d*j-1];
}
}
assert(k==m_N);
iMemBytes+=m_N*(int)(sizeof(unsigned int)+sizeof(vector<unsigned int>));
for(int i=0, n=(int)A.size(); i<n; i++)
{
if(A[i]>=B[i]) continue;
int p =1;
while(p<m_N)
{
if(A[i]<m_pEndPoints[p])
{
p<<=1;
}
else
{
p=(p<<1)+1;
}
}
int q =1;
while(q<m_N)
{
if(B[i]<m_pEndPoints[q])
{
q<<=1;
}
else
{
q=(q<<1)+1;
}
}
for(int j=p; j<q; j++) this->m_pIDList[j-m_N].push_back(i);
iMemBytes+=(q-p)*(int)sizeof(unsigned int);
}
#ifdef DEBUG_NAIVE_INTERVAL_INDEX2
printf("Naive Interval Index-2 membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
int CNaiveIntervalIndex2::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if(n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
int i=1;
while(i<m_N)
{
if(key<m_pEndPoints[i])
{
i<<=1;
}
else
{
i=(i<<1)+1;
}
}
s = m_pIDList[i - m_N].size();
for(int j = 0; j < s; j++)
{
if(n >= size)
{
return n;
}
result[n++] = m_pIDList[i - m_N][j];
}
}
return n;
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
#ifndef H_NAIVE_INTERVAL_INDEX_2_CPP_H
#define H_NAIVE_INTERVAL_INDEX_2_CPP_H
#include "IntervalIndex.h"
class CNaiveIntervalIndex2 : public CIntervalIndex
{
public:
CNaiveIntervalIndex2();
virtual ~CNaiveIntervalIndex2();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
std::vector<unsigned int> m_IndexForMaxInt;
int m_N;
unsigned int * m_pEndPoints;
std::vector<unsigned int> * m_pIDList;
};
#endif

View File

@@ -0,0 +1,81 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "PortIndex.h"
#include <stdlib.h>
#include <string.h>
using namespace std;
//#define DEBUG_PORT_INDEX
CPortIndex::CPortIndex()
{
m_values=NULL;
memset(m_bitmap, 0, sizeof(m_bitmap));
}
CPortIndex::~CPortIndex()
{
if(m_values!=NULL) delete [] m_values;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CPortIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
long long mem_bytes=0;
for(unsigned int i=0; i<=65536; i++) m_L[i]=0;
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned int t=a[i]; t<=b[i]; t++) m_L[t]++;
}
for(unsigned int i=1; i<=65536; i++) m_L[i]+=m_L[i-1];
m_values=new unsigned int[m_L[65536]];
mem_bytes+=sizeof(unsigned int)*m_L[65536]+sizeof(m_L)+sizeof(m_bitmap);
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned int t=a[i]; t<=b[i]; t++) m_values[--m_L[t]]=i;
}
for(unsigned int i=0; i<65536; i++)
{
if(m_L[i]<m_L[i+1]) m_bitmap[i>>3]|=(1U<<(i&7));
}
#ifdef DEBUG_PORT_INDEX
printf("Port Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1U<<20));
#endif
return mem_bytes;
}
int CPortIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if((m_bitmap[key>>3]&(1U<<(key&7)))==0) return 0;
unsigned int n=m_L[key+1]-m_L[key];
if(n>size) n=size;
unsigned int * p=m_values+m_L[key];
for(unsigned int i=0; i<n; i++) *result++=*p++;
return n;
}

View File

@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_PORT_INDEX_CPP_H
#define H_PORT_INDEX_CPP_H
#include "IntervalIndex.h"
class CPortIndex : public CIntervalIndex
{
public:
CPortIndex();
virtual ~CPortIndex();
virtual long long PreProcessing(const std::vector<unsigned int>& a, const std::vector<unsigned int>& b);
virtual int Find(unsigned int key, unsigned int * result, unsigned int size);
private:
unsigned char m_bitmap[8192];
unsigned int m_L[65537];
unsigned int * m_values;
};
#endif

View File

@@ -0,0 +1,156 @@
/*
*
* Copyright (c) 2008--2015
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2015-12-9
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "SuccinctHash.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
static const unsigned long long ONE=1;
static const unsigned int B=22695477;
inline unsigned int myhash(unsigned int key)
{
//return key;
unsigned int h=0;
FOR(j, 4)
{
h=h*B+(key&255);
key>>=8;
}
return h;
}
CSuccinctHash::CSuccinctHash()
{
m_RT=NULL;
m_kv_array=NULL;
m_kv_ptr = NULL;
}
CSuccinctHash::~CSuccinctHash()
{
if(m_RT!=NULL)
{
aligned_free(m_RT);
}
if(m_kv_array!=NULL)
{
delete [] m_kv_array;
}
if(m_kv_ptr != NULL)
{
delete []m_kv_ptr;
}
}
long long CSuccinctHash::init(unsigned int keys[], unsigned int values[], unsigned int num)
{
m_hash_bits=(int)(log10((double)num)/log10(2.0))+4;
if(m_hash_bits<8) m_hash_bits=8;
//printf("m_hash_bits=%d\n", m_hash_bits);
m_RT=(packedRT_t *)aligned_malloc(sizeof(packedRT_t)*((1U<<(m_hash_bits-8))+1), 64);
FOR(i, (1U<<(m_hash_bits-8)))
{
FOR(j, 4) m_RT[i].bitmap[j]=0;
}
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
m_RT[h>>8].bitmap[q>>6]|=(ONE<<(q&63));
}
m_RT[0].A=0;
FOR(i, (1U<<(m_hash_bits-8)))
{
m_RT[i].B[0]=0;
m_RT[i].B[1]= popcnt_u64(m_RT[i].bitmap[0]);
m_RT[i].B[2]= m_RT[i].B[1]+popcnt_u64(m_RT[i].bitmap[1]);
m_RT[i].B[3]= m_RT[i].B[2]+popcnt_u64(m_RT[i].bitmap[2]);
m_RT[i+1].A=m_RT[i].A+m_RT[i].B[3]+popcnt_u64(m_RT[i].bitmap[3]);
}
int tn=m_RT[(1U<<(m_hash_bits-8))].A;
m_kv_ptr=new unsigned int[tn+1];
FOR(i, tn+1) m_kv_ptr[i]=0;
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
m_kv_ptr[idx]++;
}
FOR(i, tn) m_kv_ptr[i+1]+=m_kv_ptr[i];
m_kv_array=new unsigned int[2*num];
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
unsigned int j=--m_kv_ptr[idx];
m_kv_array[2*j]=keys[i];
m_kv_array[2*j+1]=values[i];
}
long long mem_bytes=(1U<<(m_hash_bits-3))*sizeof(char)*5/4+(tn+1+2*num)*sizeof(unsigned int);
return mem_bytes;
}
unsigned int CSuccinctHash::rank(unsigned int h)
{
int p=(h>>8);
int r=((h&255)>>6);
int s=(h&63);
unsigned long long e=m_RT[p].bitmap[r]&((ONE<<s)-1);
return m_RT[p].A+m_RT[p].B[r]+popcnt_u64(e);
}
int CSuccinctHash::find(unsigned int key, unsigned int * value, unsigned int size)
{
unsigned int h=myhash(key);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
unsigned int r=0;
if(m_RT[h>>8].bitmap[q>>6]&(ONE<<(q&63)))
{
unsigned int idx=rank(h);
for(unsigned int j=m_kv_ptr[idx]; j<m_kv_ptr[idx+1]; j++)
{
if(m_kv_array[2*j]==key)
{
if(r==size) return r;
*value++=m_kv_array[2*j+1];
r++;
}
}
}
return r;
}

View File

@@ -0,0 +1,42 @@
/*
*
* Copyright (c) 2008--2015
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2015-12-9
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_SUCCINCT_HASH_CPP_H
#define H_SUCCINCT_HASH_CPP_H
#include "sigmastar_tools.h"
class CSuccinctHash
{
public:
CSuccinctHash();
~CSuccinctHash();
long long init(unsigned int keys[], unsigned int values[], unsigned int num);
int find(unsigned int key, unsigned int * value, unsigned int size);
protected:
unsigned int rank(unsigned int h);
protected:
unsigned int m_hash_bits;
packedRT_t * m_RT;
unsigned int * m_kv_ptr;
unsigned int * m_kv_array;
};
#endif // H_SUCCINCT_HASH_CPP_H

View File

@@ -0,0 +1,72 @@
#include "sigmastar_tools.h"
#include <stdlib.h>
#include <stdio.h>
void * aligned_malloc(size_t size, size_t align)
{
void * malloc_ptr;
void * aligned_ptr;
/* Error if align is not a power of two. */
if (align & (align - 1))
{
return ((void*) 0);
}
if (align==0 || size == 0)
{
return ((void *) 0);
}
malloc_ptr = malloc (sizeof(void *) + align - 1 + size);
if (!malloc_ptr)
{
return ((void *) 0);
}
aligned_ptr = (void *) (((size_t)malloc_ptr + sizeof(void *) + align-1) & ~(align-1));
((void **) aligned_ptr) [-1] = malloc_ptr;
return aligned_ptr;
}
void aligned_free(void * aligned_ptr)
{
if (aligned_ptr)
{
free (((void **) aligned_ptr) [-1]);
}
}
unsigned char popcnt_u16[65536];
unsigned char popcnt_u32(unsigned int x)
{
char c=0;
while(x)
{
if(x&1) c++;
x>>=1;
}
return c;
}
#ifndef USE_SSE_INSTR
int popcnt_u64(unsigned long long x)
{
return popcnt_u16[x&0xFFFF]
+popcnt_u16[(x>>16)&0xFFFF]
+popcnt_u16[(x>>32)&0xFFFF]
+popcnt_u16[(x>>48)&0xFFFF];
}
#endif
int initialize_sigmastar_tools()
{
for(unsigned int i=0; i<65536; i++)
{
popcnt_u16[i]=popcnt_u32(i);
}
return 1;
}

View File

@@ -0,0 +1,56 @@
/*
*
* Copyright (c) 2008-2016
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2016-05-31
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#ifndef H_SIGMA_STAR_TOOLS_H
#define H_SIGMA_STAR_TOOLS_H
#include <stdlib.h>
#include <vector>
#include <string>
#include <map>
//#include "./StringMatch/include/string_matching.h"
using namespace std;
//#define DEBUG_PARTITION
int initialize_sigmastar_tools();
struct packedRT_t
{
unsigned long long bitmap[4];
unsigned int A;
unsigned char B[4];
};
void * aligned_malloc(size_t size, size_t align);
void aligned_free(void * aligned_ptr);
#if (defined __linux__) && (defined __SSE4_2__)
#define USE_SSE_INSTR
#endif
#ifdef USE_SSE_INSTR
#include <nmmintrin.h>
#define popcnt_u64 _mm_popcnt_u64
#else
int popcnt_u64(unsigned long long x);
#endif
#define FOR(i, n) for(int i=0, _n=(int)(n); i<_n; i++)
#endif