This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-maat/scanner/ip_matcher/IntervalIndex/SuccinctHash.cpp

158 lines
3.2 KiB
C++
Raw Normal View History

/*
*
* Copyright (c) 2008--2015
* String Algorithms Research Group
* Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS)
* National Engineering Laboratory for Information Security Technologies (NELIST)
* All rights reserved
*
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
* Last modification: 2015-12-9
*
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
* Usage for direct or indirect commercial advantage is not allowed without
* written permission from the authors.
*
*/
#include "SuccinctHash.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
static const unsigned long long ONE=1;
static const unsigned int B=22695477;
inline unsigned int myhash(unsigned int key)
{
//return key;
unsigned int h=0;
FOR(j, 4)
{
h=h*B+(key&255);
key>>=8;
}
return h;
}
CSuccinctHash::CSuccinctHash()
{
m_RT=NULL;
m_kv_array=NULL;
m_kv_ptr = NULL;
}
CSuccinctHash::~CSuccinctHash()
{
if(m_RT!=NULL)
{
aligned_free(m_RT);
}
if(m_kv_array!=NULL)
{
delete [] m_kv_array;
}
if(m_kv_ptr != NULL)
{
delete []m_kv_ptr;
}
}
long long CSuccinctHash::init(unsigned int keys[], unsigned int values[], unsigned int num)
{
m_hash_bits=(int)(log10((double)num)/log10(2.0))+4;
if(m_hash_bits<8) m_hash_bits=8;
//printf("m_hash_bits=%d\n", m_hash_bits);
m_RT=(packedRT_t *)aligned_malloc(sizeof(packedRT_t)*((1U<<(m_hash_bits-8))+1), 64);
FOR(i, (1U<<(m_hash_bits-8)))
{
FOR(j, 4) m_RT[i].bitmap[j]=0;
}
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
m_RT[h>>8].bitmap[q>>6]|=(ONE<<(q&63));
}
m_RT[0].A=0;
FOR(i, (1U<<(m_hash_bits-8)))
{
m_RT[i].B[0]=0;
m_RT[i].B[1]= popcnt_u64(m_RT[i].bitmap[0]);
m_RT[i].B[2]= m_RT[i].B[1]+popcnt_u64(m_RT[i].bitmap[1]);
m_RT[i].B[3]= m_RT[i].B[2]+popcnt_u64(m_RT[i].bitmap[2]);
m_RT[i+1].A=m_RT[i].A+m_RT[i].B[3]+popcnt_u64(m_RT[i].bitmap[3]);
}
int tn=m_RT[(1U<<(m_hash_bits-8))].A;
m_kv_ptr=new unsigned int[tn+2]();
FOR(i, tn+1) m_kv_ptr[i]=0;
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
m_kv_ptr[idx]++;
}
FOR(i, tn+1) m_kv_ptr[i+1]+=m_kv_ptr[i];
m_kv_array=new unsigned int[2*num];
FOR(i, num)
{
unsigned int h=myhash(keys[i]);
h&=((1U<<m_hash_bits)-1);
unsigned int idx=rank(h);
unsigned int j=--m_kv_ptr[idx];
m_kv_array[2*j]=keys[i];
m_kv_array[2*j+1]=values[i];
}
long long mem_bytes=(1U<<(m_hash_bits-3))*sizeof(char)*5/4+(tn+1+2*num)*sizeof(unsigned int);
return mem_bytes;
}
unsigned int CSuccinctHash::rank(unsigned int h)
{
int p=(h>>8);
int r=((h&255)>>6);
int s=(h&63);
unsigned long long e=m_RT[p].bitmap[r]&((ONE<<s)-1);
return m_RT[p].A+m_RT[p].B[r]+popcnt_u64(e);
}
int CSuccinctHash::find(unsigned int key, unsigned int * value, unsigned int size)
{
unsigned int h=myhash(key);
h&=((1U<<m_hash_bits)-1);
int q=h&255;
unsigned int r=0;
if(m_RT[h>>8].bitmap[q>>6]&(ONE<<(q&63)))
{
unsigned int idx=rank(h);
for(unsigned int j=m_kv_ptr[idx]; j<m_kv_ptr[idx+1]; j++)
{
if(m_kv_array[2*j]==key)
{
if(r==size) return r;
*value++=m_kv_array[2*j+1];
r++;
}
}
}
return r;
}