This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-maat/scanner/ip_matcher/IntervalIndex/DirectIndex.cpp

160 lines
3.2 KiB
C++
Raw Normal View History

/*
*
* Copyright (c) 2008--2012
* Advanced Algorithm and Algorithm Engineering Group
* National Engineering Laboratory for Information Security Technologies (NELIST)
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2012-11-27
*
* This code is the exclusive and proprietary property of NELIST and IIE-CAS.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "DirectIndex.h"
#include <algorithm>
#include <map>
using namespace std;
//#define DEBUG_DIRECT_INDEX
CDirectIndex::CDirectIndex()
{
m_keys=NULL;
m_ptr=NULL;
m_values=NULL;
}
CDirectIndex::~CDirectIndex()
{
if(m_keys!=NULL) delete [] m_keys;
if(m_ptr!=NULL) delete [] m_ptr;
if(m_values!=NULL) delete [] m_values;
}
/*
closed interval: [a[i], b[i]] such that a[i]<=b[i]
*/
long long CDirectIndex::PreProcessing(const vector<unsigned int>& a, const vector<unsigned int>& b)
{
long long mem_bytes=0;
unsigned long long count=0;
for(unsigned int i=0; i<a.size(); i++)
{
count+=((unsigned long long)b[i]-a[i]+1);
}
if(count>2*a.size()) return -1;
map<unsigned int, unsigned int> M;
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
M[(unsigned int)t]++;
}
}
m_keys =new unsigned int[M.size()];
m_ptr =new unsigned int[M.size()+1];
m_values=new unsigned int[(unsigned int)count];
mem_bytes+=sizeof(unsigned int)*(2*M.size()+(unsigned int)count);
m_ptr[0]=0;
map<unsigned int, unsigned int>::const_iterator it=M.begin();
for(unsigned int k=0; k<M.size(); k++, ++it)
{
m_keys[k]=it->first;
m_ptr[k+1]=m_ptr[k]+it->second;
}
for(unsigned int i=0; i<a.size(); i++)
{
for(unsigned long long t=a[i]; t<=b[i]; t++)
{
unsigned int k=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), (unsigned int)t)-m_keys);
m_values[m_ptr[k]++]=i;
}
}
for(int k=(int)M.size(); k>0; k--)
{
m_ptr[k]=m_ptr[k-1];
}
m_ptr[0]=0;
m_min_key=m_keys[0];
m_max_key=m_keys[M.size()-1];
for(unsigned int k=0; k<M.size(); k++)
{
m_keys[k]-=m_min_key;
}
unsigned long long ONE=1;
unsigned int theta=0;
while((ONE<<(theta+16))<=m_keys[M.size()-1]) theta++;
m_theta=theta;
m_L[0]=0;
for(unsigned int i=1; i<65536; i++)
{
m_L[i]=(unsigned int)(lower_bound(m_keys, m_keys+M.size(), i*(1U<<theta))-m_keys);
}
m_L[65536]=(unsigned int)M.size();
mem_bytes+=sizeof(m_L);
M.clear();
#ifdef DEBUG_DIRECT_INDEX
printf("Direct Index membyte=%5.3lf (MB).\n", (double)mem_bytes/(1u<<20));
#endif
return mem_bytes;
}
int CDirectIndex::Find(unsigned int key, unsigned int * result, unsigned int size)
{
if(key<m_min_key || key>m_max_key)
return 0;
key-=m_min_key;
unsigned int k=key>>m_theta;
int l=m_L[k], h=(int)m_L[k+1]-1;
if(h<l)
return 0;
while(l<=h)
{
int m=(l+h)/2;
if(key<m_keys[m]) h=m-1;
else l=m+1;
}
unsigned int n = 0;
if(h<(int)m_L[k] || m_keys[h]!=key)
{
return 0;
}
else
{
int s = m_ptr[h+1] - m_ptr[h];
unsigned int * values = m_values+m_ptr[h];
for(int i = 0; i < s; i++)
{
if(n >= size)
{
return n;
}
result[n++] = values[i];
}
}
return n;
}