/* * Copyright (c) 2012-2022, Jyri J. Virkki * All rights reserved. * * This file is under BSD license. See LICENSE file. */ /* * Refer to bloom.h for documentation on the public interfaces. */ #include #include #include #include #include #include #include #include #include #include #include "bloom.h" #include "murmurhash2.h" #define MAKESTRING(n) STRING(n) #define STRING(n) #n #define BLOOM_MAGIC "libbloom2" #define BLOOM_VERSION_MAJOR 2 #define BLOOM_VERSION_MINOR 0 inline static int test_bit_set_bit(unsigned char * buf, unsigned long int bit, int set_bit) { unsigned long int byte = bit >> 3; unsigned char c = buf[byte]; // expensive memory access unsigned char mask = 1 << (bit % 8ul); if (c & mask) { return 1; } else { if (set_bit) { buf[byte] = c | mask; } return 0; } } static int bloom_check_add(struct bloom * bloom, const void * buffer, int len, int add) { if (bloom->ready == 0) { printf("bloom at %p not initialized!\n", (void *)bloom); return -1; } unsigned char hits = 0; unsigned int a = murmurhash2(buffer, len, 0x9747b28c); unsigned int b = murmurhash2(buffer, len, a); unsigned long int x; unsigned long int i; for (i = 0; i < bloom->hashes; i++) { x = (a + b*i) % bloom->bits; if (test_bit_set_bit(bloom->bf, x, add)) { hits++; } else if (!add) { // Don't care about the presence of all the bits. Just our own. return 0; } } if (hits == bloom->hashes) { return 1; // 1 == element already in (or collision) } return 0; } // DEPRECATED - Please migrate to bloom_init2. int bloom_init(struct bloom * bloom, int entries, double error) { return bloom_init2(bloom, (unsigned int)entries, error); } int bloom_init2(struct bloom * bloom, unsigned int entries, double error) { if (sizeof(unsigned long int) < 8) { printf("error: libbloom will not function correctly because\n"); printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int)); exit(1); } memset(bloom, 0, sizeof(struct bloom)); if (entries < 1000 || error <= 0 || error >= 1) { return 1; } bloom->entries = entries; bloom->error = error; double num = -log(bloom->error); double denom = 0.480453013918201; // ln(2)^2 bloom->bpe = (num / denom); long double dentries = (long double)entries; long double allbits = dentries * bloom->bpe; bloom->bits = (unsigned long int)allbits; if (bloom->bits % 8) { bloom->bytes = (bloom->bits / 8) + 1; } else { bloom->bytes = bloom->bits / 8; } bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2) bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char)); if (bloom->bf == NULL) { // LCOV_EXCL_START return 1; } // LCOV_EXCL_STOP bloom->ready = 1; bloom->major = BLOOM_VERSION_MAJOR; bloom->minor = BLOOM_VERSION_MINOR; return 0; } int bloom_check(struct bloom * bloom, const void * buffer, int len) { return bloom_check_add(bloom, buffer, len, 0); } int bloom_add(struct bloom * bloom, const void * buffer, int len) { return bloom_check_add(bloom, buffer, len, 1); } void bloom_print(struct bloom * bloom) { printf("bloom at %p\n", (void *)bloom); if (!bloom->ready) { printf(" *** NOT READY ***\n"); } printf(" ->version = %d.%d\n", bloom->major, bloom->minor); printf(" ->entries = %u\n", bloom->entries); printf(" ->error = %f\n", bloom->error); printf(" ->bits = %lu\n", bloom->bits); printf(" ->bits per elem = %f\n", bloom->bpe); printf(" ->bytes = %lu", bloom->bytes); unsigned int KB = bloom->bytes / 1024; unsigned int MB = KB / 1024; printf(" (%u KB, %u MB)\n", KB, MB); printf(" ->hash functions = %d\n", bloom->hashes); } void bloom_free(struct bloom * bloom) { if (bloom->ready) { free(bloom->bf); } bloom->ready = 0; } int bloom_reset(struct bloom * bloom) { if (!bloom->ready) return 1; memset(bloom->bf, 0, bloom->bytes); return 0; } int bloom_save(struct bloom * bloom, char * filename) { if (filename == NULL || filename[0] == 0) { return 1; } int fd = open(filename, O_WRONLY | O_CREAT, 0644); if (fd < 0) { return 1; } ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC)); if (out != strlen(BLOOM_MAGIC)) { goto save_error; } // LCOV_EXCL_LINE uint16_t size = sizeof(struct bloom); out = write(fd, &size, sizeof(uint16_t)); if (out != sizeof(uint16_t)) { goto save_error; } // LCOV_EXCL_LINE out = write(fd, bloom, sizeof(struct bloom)); if (out != sizeof(struct bloom)) { goto save_error; } // LCOV_EXCL_LINE out = write(fd, bloom->bf, bloom->bytes); if (out != bloom->bytes) { goto save_error; } // LCOV_EXCL_LINE close(fd); return 0; // LCOV_EXCL_START save_error: close(fd); return 1; // LCOV_EXCL_STOP } int bloom_load(struct bloom * bloom, char * filename) { int rv = 0; if (filename == NULL || filename[0] == 0) { return 1; } if (bloom == NULL) { return 2; } memset(bloom, 0, sizeof(struct bloom)); int fd = open(filename, O_RDONLY); if (fd < 0) { return 3; } char line[30]; memset(line, 0, 30); ssize_t in = read(fd, line, strlen(BLOOM_MAGIC)); if (in != strlen(BLOOM_MAGIC)) { rv = 4; goto load_error; } if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) { rv = 5; goto load_error; } uint16_t size; in = read(fd, &size, sizeof(uint16_t)); if (in != sizeof(uint16_t)) { rv = 6; goto load_error; } if (size != sizeof(struct bloom)) { rv = 7; goto load_error; } in = read(fd, bloom, sizeof(struct bloom)); if (in != sizeof(struct bloom)) { rv = 8; goto load_error; } bloom->bf = NULL; if (bloom->major != BLOOM_VERSION_MAJOR) { rv = 9; goto load_error; } bloom->bf = (unsigned char *)malloc(bloom->bytes); if (bloom->bf == NULL) { rv = 10; goto load_error; } // LCOV_EXCL_LINE in = read(fd, bloom->bf, bloom->bytes); if (in != bloom->bytes) { rv = 11; free(bloom->bf); bloom->bf = NULL; goto load_error; } close(fd); return rv; load_error: close(fd); bloom->ready = 0; return rv; } int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src) { if (bloom_dest->ready == 0) { printf("bloom at %p not initialized!\n", (void *)bloom_dest); return -1; } if (bloom_src->ready == 0) { printf("bloom at %p not initialized!\n", (void *)bloom_src); return -1; } if (bloom_dest->entries != bloom_src->entries) { return 1; } if (bloom_dest->error != bloom_src->error) { return 1; } if (bloom_dest->major != bloom_src->major) { return 1; } if (bloom_dest->minor != bloom_src->minor) { return 1; } // Not really possible if properly used but check anyway to avoid the // possibility of buffer overruns. if (bloom_dest->bytes != bloom_src->bytes) { return 1; // LCOV_EXCL_LINE } unsigned long int p; for (p = 0; p < bloom_dest->bytes; p++) { bloom_dest->bf[p] |= bloom_src->bf[p]; } return 0; } const char * bloom_version() { return MAKESTRING(BLOOM_VERSION); }