/* * * Copyright (c) 2008-2016 * String Algorithms Research Group * Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS) * National Engineering Laboratory for Information Security Technologies (NELIST) * All rights reserved * * Written by: LIU YANBING (liuyanbing@iie.ac.cn) * Last modification: 2016-06-03 * * This code is the exclusive and proprietary property of IIE-CAS and NELIST. * Usage for direct or indirect commercial advantage is not allowed without * written permission from the authors. * */ #include "Int128IntervalIndex.h" #include #include #include #include using namespace std; //#define DEBUG_INT128_INTERVAL_INDEX bool operator<(const uint128_t& lhs, const uint128_t& rhs) { int i=1; while(i>=0 && lhs.I[i]==rhs.I[i]) --i; return (i>=0 && lhs.I[i](const uint128_t& lhs, const uint128_t& rhs) { return rhs=(const uint128_t& lhs, const uint128_t& rhs) { return !(lhs>32) & 0xFFFFFFFF; a[1] = I[1] & 0xFFFFFFFF; a[0] = (I[1]>>32) & 0xFFFFFFFF; uint128_t b(a); return b; } unsigned int ipv6_hash(const uint128_t * ip) { unsigned long long I = (ip->I[0])^(ip->I[1]); return (I & 0xFFFFFFFF)^(I >> 32); } CInt128IntervalIndex::CInt128IntervalIndex() { m_array=NULL; m_iEndPointsNum=0; m_pEndPoints=NULL; m_pIDPtr=NULL; m_pIDList=NULL; m_IndexForMaxInt=NULL; m_IndexForWholeInterval = NULL; } CInt128IntervalIndex::~CInt128IntervalIndex() { if(m_array!=NULL) delete [] m_array; if(this->m_pEndPoints!=NULL) { delete [] this->m_pEndPoints; } if(this->m_pIDList!=NULL) { delete [] this->m_pIDList; } if(this->m_pIDPtr!=NULL) { delete [] this->m_pIDPtr; } if(this->m_IndexForMaxInt!=NULL) { delete [] this->m_IndexForMaxInt; } if(this->m_IndexForWholeInterval != NULL) { delete [] this->m_IndexForWholeInterval; } } long long CInt128IntervalIndex::PreProcessing(const vector& a, const vector& b) { if (a.size() == 0) { return -1; } m_is_single = true; for (size_t i = 0; i < a.size(); i++) { if (a[i] != b[i]) { m_is_single = false; break; } } if (m_is_single) { return process_single(a); } else { return process_interval(a, b); } } long long CInt128IntervalIndex::process_single(const vector& a) { long long mem_bytes = 0; unsigned int num = a.size(); unsigned int *keys = new unsigned int[num]; unsigned int *values = new unsigned int[num]; m_array = new uint128_t[num]; mem_bytes += (2 * sizeof(unsigned int) + sizeof(uint128_t)) * num; for (unsigned int i = 0; i < num; i++) { keys[i] = ipv6_hash(&a[i]); values[i] = i; m_array[i] = a[i]; } long long ret = m_ip_hash.init(keys, values, num); delete [] keys; delete [] values; if(ret<0) return -1; mem_bytes += ret; return mem_bytes; } long long CInt128IntervalIndex::process_interval(const vector& a, const vector& b) { vector A = a, B = b; long long iMemBytes = 0; set s; vector IndexForMaxInt; vector IndexForWholeInterval; for (int i = 0, n = (int)A.size(); i B[i]) continue; if (B[i].is_all_ones()) { IndexForMaxInt.push_back(i); --B[i]; } ++B[i]; // now A[i], B[i] is half closed interval. if (A[i] >= B[i]) continue; if (A[i].is_all_zeros() && B[i].is_all_ones()) { IndexForWholeInterval.push_back(i); continue; } s.insert(A[i]); s.insert(B[i]); } m_IndexForWholeInterval=new unsigned int[IndexForWholeInterval.size()+1]; m_IndexForWholeInterval[0]=IndexForWholeInterval.size(); copy(IndexForWholeInterval.begin(), IndexForWholeInterval.end(), m_IndexForWholeInterval+1); iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForWholeInterval.size()+1)); m_IndexForMaxInt=new unsigned int[IndexForMaxInt.size()+1]; m_IndexForMaxInt[0]=IndexForMaxInt.size(); copy(IndexForMaxInt.begin(), IndexForMaxInt.end(), m_IndexForMaxInt+1); iMemBytes+=(long long)(sizeof(unsigned int)*(IndexForMaxInt.size()+1)); this->m_iEndPointsNum=(long long)s.size(); this->m_pEndPoints=new uint128_t[m_iEndPointsNum]; copy(s.begin(), s.end(), m_pEndPoints); iMemBytes+=(long long)(m_iEndPointsNum*sizeof(uint128_t)); vector count(m_iEndPointsNum, 0); for(int i=0, n=(int)A.size(); i=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue; long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints); long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints); assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]); for(long long j=l; j=B[i] || (A[i].is_all_zeros() && B[i].is_all_ones())) continue; long long l=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, A[i])-m_pEndPoints); long long h=(int)(lower_bound(m_pEndPoints, m_pEndPoints+m_iEndPointsNum, B[i])-m_pEndPoints); assert(m_pEndPoints[l]==A[i] && m_pEndPoints[h]==B[i]); for(long long j=l; j=m_pEndPoints[k]) k++; m_L[i] = (k>=1)? k-1 : 0; } m_L[65536]=m_iEndPointsNum-1; iMemBytes+=(long long)sizeof(m_L); #ifdef DEBUG_INT128_INTERVAL_INDEX printf("Int128 Interval Index membyte=%5.3lf (MB).\n", (double)iMemBytes/(1u<<20)); #endif A.clear(); B.clear(); s.clear(); IndexForMaxInt.clear(); return iMemBytes; } int CInt128IntervalIndex::Find(const uint128_t *key, unsigned int *result, unsigned int size) { if (m_is_single) { return Find_single(key, result, size); } else { return Find_interval(key, result, size); } } int CInt128IntervalIndex::Find_single(const uint128_t *key, unsigned int *result, unsigned int size) { unsigned int h = ipv6_hash(key); int ret = m_ip_hash.find(h, result, size); int j=0; for (int i = 0; i < ret; i++) { if(*key == m_array[result[i]]) result[j++] = result[i]; } return j; } int CInt128IntervalIndex::Find_interval(const uint128_t *key, unsigned int *result, unsigned int size) { if (key->is_all_ones()) { unsigned int s = m_IndexForMaxInt[0]; if (s > size) s = size; for(unsigned int i = 1; i <= s; i++) *result++=m_IndexForMaxInt[i]; return s; } else { unsigned int s = m_IndexForWholeInterval[0]; if (s > size) s = size; for (unsigned int i = 1; i <= s; i++) { *result++=m_IndexForWholeInterval[i]; } size-=s; uint128_t t = (*key)>>112; unsigned int k = t.I[0]&0xffff; long long l=m_L[k], h=m_L[k+1]; long long m=0; while (l <= h && m < m_iEndPointsNum) { m = (l+h)>>1; if (*key >= m_pEndPoints[m]) { l = m+1; } else { h = m-1; } } if (h >= m_L[k] && h < m_iEndPointsNum) { long long n = m_pIDPtr[h+1] - m_pIDPtr[h]; if (n > size) n = size; unsigned int *id_list = m_pIDList + m_pIDPtr[h]; for(unsigned int i=0; i