This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-maat/scanner/ip_matcher/IntervalIndex/FastIntervalIndex.cpp

142 lines
2.9 KiB
C++

/*
*
* Copyright (c) 2008--2012
* String Matching Group, Lab for Intelligent Information Processing Technology,
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS).
* All rights reserved.
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-07-10
*
* This code is the exclusive and proprietary property of IIE-CAS. Usage for direct
* or indirect commercial advantage is not allowed without written permission from
* the authors.
*
*/
//#define DEBUG_FAST_INTERVAL_INDEX
#include "FastIntervalIndex.h"
#include "NaiveIntervalIndex.h"
#include "NaiveIntervalIndex2.h"
#include <climits>
#include <algorithm>
#include <set>
#include <cassert>
using namespace std;
CFastIntervalIndex::CFastIntervalIndex(unsigned int r, unsigned int l)
{
assert(r>=l);
this->m_r=r;
this->m_l=l;
this->m_pIndexer=new CIntervalIndex*[1u<<(r-l)];
for(unsigned int i=0; i<(1u<<(r-l)); i++)
{
this->m_pIndexer[i]=NULL;
}
}
CFastIntervalIndex::~CFastIntervalIndex()
{
for(unsigned int i=0; i<(1u<<(m_r-m_l)); i++)
{
if(this->m_pIndexer[i]!=NULL)
{
delete this->m_pIndexer[i];
}
}
delete [] this->m_pIndexer;
}
long long CFastIntervalIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
vector<unsigned int> A=a, B=b;
long long iMemBytes=0;
m_IDs.resize(1u<<(m_r-m_l));
for(int i=0; i<(int)A.size(); i++)
{
assert(A[i]<=B[i]);
if(B[i]==UINT_MAX)
{
this->m_IndexForMaxInt.push_back(i);
--B[i];
}
B[i]++; // now A[i], B[i] is half closed interval.
if(A[i]>=B[i]) continue;
unsigned int p, q;
if(m_l==32) // in this case, the bit shift operator will mod 32 to 0.
{
p=0;
q=1;
}
else
{
p=A[i]>>m_l;
q= (B[i]&((1u<<m_l)-1))==0 ? (B[i]>>m_l) : (B[i]>>m_l)+1;
}
for(unsigned int j=p; j<q; j++) m_IDs[j].push_back(i);
}
for(int i=0, n=1<<(m_r-m_l); i<n; i++)
{
if(m_IDs[i].size()==0) continue;
vector<unsigned int> C, D;
for(int j=0; j<(int)m_IDs[i].size(); j++)
{
int id=m_IDs[i][j];
C.push_back(A[id]);
D.push_back(B[id]-1);
}
this->m_pIndexer[i]=new CNaiveIntervalIndex2;
iMemBytes+=this->m_pIndexer[i]->PreProcessing(C, D);
}
iMemBytes+=(1<<(m_r-m_l))*(int)sizeof(CIntervalIndex *);
#ifdef DEBUG_FAST_INTERVAL_INDEX
printf("Fast Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20));
#endif
return iMemBytes;
}
int CFastIntervalIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
unsigned int n = 0;
int s = 0;
if(key==UINT_MAX)
{
s = m_IndexForMaxInt.size();
for(int i = 0; i < s; i++)
{
if (n >= size)
{
return n;
}
result[n++] = m_IndexForMaxInt[i];
}
}
else
{
vector<unsigned int> v;
unsigned int t=(m_l<32) ? (key>>m_l) : 0;
CIntervalIndex * pIndexer=this->m_pIndexer[t];
if(pIndexer!=NULL)
{
int k = pIndexer->Find(key, result, size);
int i = 0;
for( i=0; i < k; i++)
result[i]=m_IDs[t][result[i]];
n = i;
}
}
return n;
}