[PATCH] Add bloom filter to optimize expr_matcher performance
This commit is contained in:
334
deps/bloom/bloom.c
vendored
Normal file
334
deps/bloom/bloom.c
vendored
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2022, Jyri J. Virkki
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is under BSD license. See LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Refer to bloom.h for documentation on the public interfaces.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
#include "murmurhash2.h"
|
||||||
|
|
||||||
|
#define MAKESTRING(n) STRING(n)
|
||||||
|
#define STRING(n) #n
|
||||||
|
#define BLOOM_MAGIC "libbloom2"
|
||||||
|
|
||||||
|
#define BLOOM_VERSION_MAJOR 2
|
||||||
|
#define BLOOM_VERSION_MINOR 0
|
||||||
|
|
||||||
|
inline static int test_bit_set_bit(unsigned char * buf,
|
||||||
|
unsigned long int bit, int set_bit)
|
||||||
|
{
|
||||||
|
unsigned long int byte = bit >> 3;
|
||||||
|
unsigned char c = buf[byte]; // expensive memory access
|
||||||
|
unsigned char mask = 1 << (bit % 8ul);
|
||||||
|
|
||||||
|
if (c & mask) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
if (set_bit) {
|
||||||
|
buf[byte] = c | mask;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int bloom_check_add(struct bloom * bloom,
|
||||||
|
const void * buffer, int len, int add)
|
||||||
|
{
|
||||||
|
if (bloom->ready == 0) {
|
||||||
|
printf("bloom at %p not initialized!\n", (void *)bloom);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char hits = 0;
|
||||||
|
unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
|
||||||
|
unsigned int b = murmurhash2(buffer, len, a);
|
||||||
|
unsigned long int x;
|
||||||
|
unsigned long int i;
|
||||||
|
|
||||||
|
for (i = 0; i < bloom->hashes; i++) {
|
||||||
|
x = (a + b*i) % bloom->bits;
|
||||||
|
if (test_bit_set_bit(bloom->bf, x, add)) {
|
||||||
|
hits++;
|
||||||
|
} else if (!add) {
|
||||||
|
// Don't care about the presence of all the bits. Just our own.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hits == bloom->hashes) {
|
||||||
|
return 1; // 1 == element already in (or collision)
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// DEPRECATED - Please migrate to bloom_init2.
|
||||||
|
int bloom_init(struct bloom * bloom, int entries, double error)
|
||||||
|
{
|
||||||
|
return bloom_init2(bloom, (unsigned int)entries, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_init2(struct bloom * bloom, unsigned int entries, double error)
|
||||||
|
{
|
||||||
|
if (sizeof(unsigned long int) < 8) {
|
||||||
|
printf("error: libbloom will not function correctly because\n");
|
||||||
|
printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(bloom, 0, sizeof(struct bloom));
|
||||||
|
|
||||||
|
if (entries < 1000 || error <= 0 || error >= 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->entries = entries;
|
||||||
|
bloom->error = error;
|
||||||
|
|
||||||
|
double num = -log(bloom->error);
|
||||||
|
double denom = 0.480453013918201; // ln(2)^2
|
||||||
|
bloom->bpe = (num / denom);
|
||||||
|
|
||||||
|
long double dentries = (long double)entries;
|
||||||
|
long double allbits = dentries * bloom->bpe;
|
||||||
|
bloom->bits = (unsigned long int)allbits;
|
||||||
|
|
||||||
|
if (bloom->bits % 8) {
|
||||||
|
bloom->bytes = (bloom->bits / 8) + 1;
|
||||||
|
} else {
|
||||||
|
bloom->bytes = bloom->bits / 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2)
|
||||||
|
|
||||||
|
bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
|
||||||
|
if (bloom->bf == NULL) { // LCOV_EXCL_START
|
||||||
|
return 1;
|
||||||
|
} // LCOV_EXCL_STOP
|
||||||
|
|
||||||
|
bloom->ready = 1;
|
||||||
|
|
||||||
|
bloom->major = BLOOM_VERSION_MAJOR;
|
||||||
|
bloom->minor = BLOOM_VERSION_MINOR;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_check(struct bloom * bloom, const void * buffer, int len)
|
||||||
|
{
|
||||||
|
return bloom_check_add(bloom, buffer, len, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_add(struct bloom * bloom, const void * buffer, int len)
|
||||||
|
{
|
||||||
|
return bloom_check_add(bloom, buffer, len, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bloom_print(struct bloom * bloom)
|
||||||
|
{
|
||||||
|
printf("bloom at %p\n", (void *)bloom);
|
||||||
|
if (!bloom->ready) { printf(" *** NOT READY ***\n"); }
|
||||||
|
printf(" ->version = %d.%d\n", bloom->major, bloom->minor);
|
||||||
|
printf(" ->entries = %u\n", bloom->entries);
|
||||||
|
printf(" ->error = %f\n", bloom->error);
|
||||||
|
printf(" ->bits = %lu\n", bloom->bits);
|
||||||
|
printf(" ->bits per elem = %f\n", bloom->bpe);
|
||||||
|
printf(" ->bytes = %lu", bloom->bytes);
|
||||||
|
unsigned int KB = bloom->bytes / 1024;
|
||||||
|
unsigned int MB = KB / 1024;
|
||||||
|
printf(" (%u KB, %u MB)\n", KB, MB);
|
||||||
|
printf(" ->hash functions = %d\n", bloom->hashes);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bloom_free(struct bloom * bloom)
|
||||||
|
{
|
||||||
|
if (bloom->ready) {
|
||||||
|
free(bloom->bf);
|
||||||
|
}
|
||||||
|
bloom->ready = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_reset(struct bloom * bloom)
|
||||||
|
{
|
||||||
|
if (!bloom->ready) return 1;
|
||||||
|
memset(bloom->bf, 0, bloom->bytes);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_save(struct bloom * bloom, char * filename)
|
||||||
|
{
|
||||||
|
if (filename == NULL || filename[0] == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fd = open(filename, O_WRONLY | O_CREAT, 0644);
|
||||||
|
if (fd < 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC));
|
||||||
|
if (out != strlen(BLOOM_MAGIC)) { goto save_error; } // LCOV_EXCL_LINE
|
||||||
|
|
||||||
|
uint16_t size = sizeof(struct bloom);
|
||||||
|
out = write(fd, &size, sizeof(uint16_t));
|
||||||
|
if (out != sizeof(uint16_t)) { goto save_error; } // LCOV_EXCL_LINE
|
||||||
|
|
||||||
|
out = write(fd, bloom, sizeof(struct bloom));
|
||||||
|
if (out != sizeof(struct bloom)) { goto save_error; } // LCOV_EXCL_LINE
|
||||||
|
|
||||||
|
out = write(fd, bloom->bf, bloom->bytes);
|
||||||
|
if (out != bloom->bytes) { goto save_error; } // LCOV_EXCL_LINE
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
return 0;
|
||||||
|
// LCOV_EXCL_START
|
||||||
|
save_error:
|
||||||
|
close(fd);
|
||||||
|
return 1;
|
||||||
|
// LCOV_EXCL_STOP
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_load(struct bloom * bloom, char * filename)
|
||||||
|
{
|
||||||
|
int rv = 0;
|
||||||
|
|
||||||
|
if (filename == NULL || filename[0] == 0) { return 1; }
|
||||||
|
if (bloom == NULL) { return 2; }
|
||||||
|
|
||||||
|
memset(bloom, 0, sizeof(struct bloom));
|
||||||
|
|
||||||
|
int fd = open(filename, O_RDONLY);
|
||||||
|
if (fd < 0) { return 3; }
|
||||||
|
|
||||||
|
char line[30];
|
||||||
|
memset(line, 0, 30);
|
||||||
|
ssize_t in = read(fd, line, strlen(BLOOM_MAGIC));
|
||||||
|
|
||||||
|
if (in != strlen(BLOOM_MAGIC)) {
|
||||||
|
rv = 4;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) {
|
||||||
|
rv = 5;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t size;
|
||||||
|
in = read(fd, &size, sizeof(uint16_t));
|
||||||
|
if (in != sizeof(uint16_t)) {
|
||||||
|
rv = 6;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size != sizeof(struct bloom)) {
|
||||||
|
rv = 7;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
in = read(fd, bloom, sizeof(struct bloom));
|
||||||
|
if (in != sizeof(struct bloom)) {
|
||||||
|
rv = 8;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->bf = NULL;
|
||||||
|
if (bloom->major != BLOOM_VERSION_MAJOR) {
|
||||||
|
rv = 9;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->bf = (unsigned char *)malloc(bloom->bytes);
|
||||||
|
if (bloom->bf == NULL) { rv = 10; goto load_error; } // LCOV_EXCL_LINE
|
||||||
|
|
||||||
|
in = read(fd, bloom->bf, bloom->bytes);
|
||||||
|
if (in != bloom->bytes) {
|
||||||
|
rv = 11;
|
||||||
|
free(bloom->bf);
|
||||||
|
bloom->bf = NULL;
|
||||||
|
goto load_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
load_error:
|
||||||
|
close(fd);
|
||||||
|
bloom->ready = 0;
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src)
|
||||||
|
{
|
||||||
|
if (bloom_dest->ready == 0) {
|
||||||
|
printf("bloom at %p not initialized!\n", (void *)bloom_dest);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bloom_src->ready == 0) {
|
||||||
|
printf("bloom at %p not initialized!\n", (void *)bloom_src);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bloom_dest->entries != bloom_src->entries) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bloom_dest->error != bloom_src->error) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bloom_dest->major != bloom_src->major) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bloom_dest->minor != bloom_src->minor) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not really possible if properly used but check anyway to avoid the
|
||||||
|
// possibility of buffer overruns.
|
||||||
|
if (bloom_dest->bytes != bloom_src->bytes) {
|
||||||
|
return 1; // LCOV_EXCL_LINE
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long int p;
|
||||||
|
for (p = 0; p < bloom_dest->bytes; p++) {
|
||||||
|
bloom_dest->bf[p] |= bloom_src->bf[p];
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char * bloom_version()
|
||||||
|
{
|
||||||
|
return MAKESTRING(BLOOM_VERSION);
|
||||||
|
}
|
||||||
241
deps/bloom/bloom.h
vendored
Normal file
241
deps/bloom/bloom.h
vendored
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2022, Jyri J. Virkki
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is under BSD license. See LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _BLOOM_H
|
||||||
|
#define _BLOOM_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define NULL_BLOOM_FILTER { 0, 0, 0, 0, 0.0, 0, 0, 0, 0.0, NULL }
|
||||||
|
|
||||||
|
#define ENTRIES_T unsigned int
|
||||||
|
#define BYTES_T unsigned long int
|
||||||
|
#define BITS_T unsigned long int
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Structure to keep track of one bloom filter. Caller needs to
|
||||||
|
* allocate this and pass it to the functions below. First call for
|
||||||
|
* every struct must be to bloom_init().
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct bloom
|
||||||
|
{
|
||||||
|
// These fields are part of the public interface of this structure.
|
||||||
|
// Client code may read these values if desired. Client code MUST NOT
|
||||||
|
// modify any of these.
|
||||||
|
unsigned int entries;
|
||||||
|
unsigned long int bits;
|
||||||
|
unsigned long int bytes;
|
||||||
|
unsigned char hashes;
|
||||||
|
double error;
|
||||||
|
|
||||||
|
// Fields below are private to the implementation. These may go away or
|
||||||
|
// change incompatibly at any moment. Client code MUST NOT access or rely
|
||||||
|
// on these.
|
||||||
|
unsigned char ready;
|
||||||
|
unsigned char major;
|
||||||
|
unsigned char minor;
|
||||||
|
double bpe;
|
||||||
|
unsigned char * bf;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Initialize the bloom filter for use.
|
||||||
|
*
|
||||||
|
* The filter is initialized with a bit field and number of hash functions
|
||||||
|
* according to the computations from the wikipedia entry:
|
||||||
|
* http://en.wikipedia.org/wiki/Bloom_filter
|
||||||
|
*
|
||||||
|
* Optimal number of bits is:
|
||||||
|
* bits = (entries * ln(error)) / ln(2)^2
|
||||||
|
*
|
||||||
|
* Optimal number of hash functions is:
|
||||||
|
* hashes = bpe * ln(2)
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* entries - The expected number of entries which will be inserted.
|
||||||
|
* Must be at least 1000 (in practice, likely much larger).
|
||||||
|
* error - Probability of collision (as long as entries are not
|
||||||
|
* exceeded).
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - on success
|
||||||
|
* 1 - on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_init2(struct bloom * bloom, unsigned int entries, double error);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DEPRECATED.
|
||||||
|
* Kept for compatibility with libbloom v.1. To be removed in v3.0.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_init(struct bloom * bloom, int entries, double error);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Check if the given element is in the bloom filter. Remember this may
|
||||||
|
* return false positive if a collision occurred.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* buffer - Pointer to buffer containing element to check.
|
||||||
|
* len - Size of 'buffer'.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - element is not present
|
||||||
|
* 1 - element is present (or false positive due to collision)
|
||||||
|
* -1 - bloom not initialized
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_check(struct bloom * bloom, const void * buffer, int len);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Add the given element to the bloom filter.
|
||||||
|
* The return code indicates if the element (or a collision) was already in,
|
||||||
|
* so for the common check+add use case, no need to call check separately.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* buffer - Pointer to buffer containing element to add.
|
||||||
|
* len - Size of 'buffer'.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - element was not present and was added
|
||||||
|
* 1 - element (or a collision) had already been added previously
|
||||||
|
* -1 - bloom not initialized
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_add(struct bloom * bloom, const void * buffer, int len);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Print (to stdout) info about this bloom filter. Debugging aid.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void bloom_print(struct bloom * bloom);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Deallocate internal storage.
|
||||||
|
*
|
||||||
|
* Upon return, the bloom struct is no longer usable. You may call bloom_init
|
||||||
|
* again on the same struct to reinitialize it again.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
*
|
||||||
|
* Return: none
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void bloom_free(struct bloom * bloom);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Erase internal storage.
|
||||||
|
*
|
||||||
|
* Erases all elements. Upon return, the bloom struct returns to its initial
|
||||||
|
* (initialized) state.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* 0 - on success
|
||||||
|
* 1 - on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_reset(struct bloom * bloom);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Save a bloom filter to a file.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* filename - Create (or overwrite) bloom data to this file.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* 0 - on success
|
||||||
|
* 1 - on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_save(struct bloom * bloom, char * filename);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Load a bloom filter from a file.
|
||||||
|
*
|
||||||
|
* This functions loads a file previously saved with bloom_save().
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* filename - Load bloom filter data from this file.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* 0 - on success
|
||||||
|
* > 0 - on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_load(struct bloom * bloom, char * filename);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Merge two compatible bloom filters.
|
||||||
|
*
|
||||||
|
* On success, bloom_dest will contain all elements of bloom_src in addition
|
||||||
|
* to its own. The bloom_src bloom filter is never modified.
|
||||||
|
*
|
||||||
|
* Both bloom_dest and bloom_src must be initialized and both must have
|
||||||
|
* identical parameters.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom_dest - will contain the merged elements from bloom_src
|
||||||
|
* bloom_src - its elements will be merged into bloom_dest
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - on success
|
||||||
|
* 1 - incompatible bloom filters
|
||||||
|
* -1 - bloom not initialized
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Returns version string compiled into library.
|
||||||
|
*
|
||||||
|
* Return: version string
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
const char * bloom_version();
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
64
deps/bloom/murmurhash2.c
vendored
Normal file
64
deps/bloom/murmurhash2.c
vendored
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
// MurmurHash2, by Austin Appleby
|
||||||
|
|
||||||
|
// Note - This code makes a few assumptions about how your machine behaves -
|
||||||
|
|
||||||
|
// 1. We can read a 4-byte value from any address without crashing
|
||||||
|
// 2. sizeof(int) == 4
|
||||||
|
|
||||||
|
// And it has a few limitations -
|
||||||
|
|
||||||
|
// 1. It will not work incrementally.
|
||||||
|
// 2. It will not produce the same results on little-endian and big-endian
|
||||||
|
// machines.
|
||||||
|
|
||||||
|
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
|
||||||
|
{
|
||||||
|
// 'm' and 'r' are mixing constants generated offline.
|
||||||
|
// They're not really 'magic', they just happen to work well.
|
||||||
|
|
||||||
|
const unsigned int m = 0x5bd1e995;
|
||||||
|
const int r = 24;
|
||||||
|
|
||||||
|
// Initialize the hash to a 'random' value
|
||||||
|
|
||||||
|
unsigned int h = seed ^ len;
|
||||||
|
|
||||||
|
// Mix 4 bytes at a time into the hash
|
||||||
|
|
||||||
|
const unsigned char * data = (const unsigned char *)key;
|
||||||
|
|
||||||
|
while(len >= 4)
|
||||||
|
{
|
||||||
|
unsigned int k = *(unsigned int *)data;
|
||||||
|
|
||||||
|
k *= m;
|
||||||
|
k ^= k >> r;
|
||||||
|
k *= m;
|
||||||
|
|
||||||
|
h *= m;
|
||||||
|
h ^= k;
|
||||||
|
|
||||||
|
data += 4;
|
||||||
|
len -= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle the last few bytes of the input array
|
||||||
|
|
||||||
|
switch(len)
|
||||||
|
{
|
||||||
|
case 3: h ^= data[2] << 16;
|
||||||
|
case 2: h ^= data[1] << 8;
|
||||||
|
case 1: h ^= data[0];
|
||||||
|
h *= m;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Do a few final mixes of the hash to ensure the last few
|
||||||
|
// bytes are well-incorporated.
|
||||||
|
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= m;
|
||||||
|
h ^= h >> 15;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
6
deps/bloom/murmurhash2.h
vendored
Normal file
6
deps/bloom/murmurhash2.h
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#ifndef _BLOOM_MURMURHASH2
|
||||||
|
#define _BLOOM_MURMURHASH2
|
||||||
|
|
||||||
|
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -7,9 +7,13 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
|
|||||||
|
|
||||||
add_subdirectory(ip_matcher/IntervalIndex)
|
add_subdirectory(ip_matcher/IntervalIndex)
|
||||||
|
|
||||||
add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
|
set(SCANNER_SRC bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
|
||||||
expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
|
expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
|
||||||
fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
|
fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
|
||||||
ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
|
ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
|
||||||
interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp)
|
interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp)
|
||||||
|
|
||||||
|
set(LIB_SOURCE_FILES ${PROJECT_SOURCE_DIR}/deps/bloom/bloom.c ${PROJECT_SOURCE_DIR}/deps/bloom/murmurhash2.c)
|
||||||
|
|
||||||
|
add_library(adapter-static ${SCANNER_SRC} ${LIB_SOURCE_FILES})
|
||||||
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static)
|
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static)
|
||||||
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "adapter_hs.h"
|
#include "adapter_hs.h"
|
||||||
#include "uthash/uthash.h"
|
#include "uthash/uthash.h"
|
||||||
|
#include "bloom/bloom.h"
|
||||||
#include "maat_utils.h"
|
#include "maat_utils.h"
|
||||||
#include "../../bool_matcher/bool_matcher.h"
|
#include "../../bool_matcher/bool_matcher.h"
|
||||||
|
|
||||||
@@ -66,6 +67,7 @@ struct hs_lit_engine {
|
|||||||
size_t n_thread;
|
size_t n_thread;
|
||||||
hs_database_t *hs_db;
|
hs_database_t *hs_db;
|
||||||
hs_scratch_t **hs_scratches;
|
hs_scratch_t **hs_scratches;
|
||||||
|
struct bloom **blooms;
|
||||||
struct hs_lit_stream **streams;
|
struct hs_lit_stream **streams;
|
||||||
struct pattern_attribute *ref_pat_attr;
|
struct pattern_attribute *ref_pat_attr;
|
||||||
struct log_handle *logger;
|
struct log_handle *logger;
|
||||||
@@ -76,6 +78,7 @@ struct hs_regex_engine {
|
|||||||
size_t n_thread;
|
size_t n_thread;
|
||||||
hs_database_t *hs_db;
|
hs_database_t *hs_db;
|
||||||
hs_scratch_t **hs_scratches;
|
hs_scratch_t **hs_scratches;
|
||||||
|
struct bloom **blooms;
|
||||||
struct hs_regex_stream **streams;
|
struct hs_regex_stream **streams;
|
||||||
struct pattern_attribute *ref_pat_attr;
|
struct pattern_attribute *ref_pat_attr;
|
||||||
struct log_handle *logger;
|
struct log_handle *logger;
|
||||||
@@ -163,6 +166,16 @@ void hs_lit_engine_free(void *hs_lit_engine)
|
|||||||
hs_lit_inst->hs_db = NULL;
|
hs_lit_inst->hs_db = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hs_lit_inst->blooms != NULL) {
|
||||||
|
for (i = 0; i < hs_lit_inst->n_thread; i++) {
|
||||||
|
if (hs_lit_inst->blooms[i] != NULL) {
|
||||||
|
bloom_free(hs_lit_inst->blooms[i]);
|
||||||
|
FREE(hs_lit_inst->blooms[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FREE(hs_lit_inst->blooms);
|
||||||
|
}
|
||||||
|
|
||||||
if (hs_lit_inst->hs_scratches != NULL) {
|
if (hs_lit_inst->hs_scratches != NULL) {
|
||||||
for (i = 0; i < hs_lit_inst->n_thread; i++) {
|
for (i = 0; i < hs_lit_inst->n_thread; i++) {
|
||||||
if (hs_lit_inst->hs_scratches[i] != NULL) {
|
if (hs_lit_inst->hs_scratches[i] != NULL) {
|
||||||
@@ -197,6 +210,12 @@ void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
|
|||||||
hs_lit_inst->hs_db = (hs_database_t *)hs_lit_db;
|
hs_lit_inst->hs_db = (hs_database_t *)hs_lit_db;
|
||||||
hs_lit_inst->logger = logger;
|
hs_lit_inst->logger = logger;
|
||||||
hs_lit_inst->ref_pat_attr = pat_attr;
|
hs_lit_inst->ref_pat_attr = pat_attr;
|
||||||
|
hs_lit_inst->blooms = ALLOC(struct bloom *, n_thread);
|
||||||
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
|
hs_lit_inst->blooms[i] = ALLOC(struct bloom, 1);
|
||||||
|
bloom_init2(hs_lit_inst->blooms[i], 1024, 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
|
hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
|
||||||
int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches,
|
int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches,
|
||||||
n_thread, logger);
|
n_thread, logger);
|
||||||
@@ -228,6 +247,23 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
|
|||||||
unsigned long long pattern_id = id;
|
unsigned long long pattern_id = id;
|
||||||
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
||||||
|
|
||||||
|
unsigned long long *tmp_pat_id = NULL;
|
||||||
|
if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) {
|
||||||
|
for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
|
||||||
|
tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
|
||||||
|
if (*tmp_pat_id == pattern_id) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
|
||||||
|
sizeof(unsigned long long)) == 1) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
|
||||||
|
sizeof(unsigned long long));
|
||||||
|
}
|
||||||
|
|
||||||
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -302,6 +338,7 @@ void *hs_lit_stream_open(void *hs_lit_engine, int thread_id)
|
|||||||
lit_stream->thread_id = thread_id;
|
lit_stream->thread_id = thread_id;
|
||||||
lit_stream->ref_hs_rt = hs_lit_inst;
|
lit_stream->ref_hs_rt = hs_lit_inst;
|
||||||
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
||||||
|
lit_stream->matched_pat->ref_bloom = hs_lit_inst->blooms[thread_id];
|
||||||
lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr;
|
lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr;
|
||||||
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
|
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
|
||||||
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||||
@@ -343,6 +380,7 @@ void hs_lit_stream_close(void *hs_lit_stream)
|
|||||||
/* stream->hs_rt point to hs_instance->hs_rt which will call free
|
/* stream->hs_rt point to hs_instance->hs_rt which will call free
|
||||||
same as hs_attr */
|
same as hs_attr */
|
||||||
stream->ref_hs_rt = NULL;
|
stream->ref_hs_rt = NULL;
|
||||||
|
stream->matched_pat->ref_bloom = NULL;
|
||||||
stream->matched_pat->ref_pat_attr = NULL;
|
stream->matched_pat->ref_pat_attr = NULL;
|
||||||
|
|
||||||
if (stream->matched_pat->pattern_ids != NULL) {
|
if (stream->matched_pat->pattern_ids != NULL) {
|
||||||
@@ -399,6 +437,7 @@ static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
|
|||||||
|
|
||||||
*n_pattern_id = array_index;
|
*n_pattern_id = array_index;
|
||||||
utarray_clear(matched_pat->pattern_ids);
|
utarray_clear(matched_pat->pattern_ids);
|
||||||
|
bloom_reset(matched_pat->ref_bloom);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -479,6 +518,16 @@ void hs_regex_engine_free(void *hs_regex_engine)
|
|||||||
hs_regex_inst->hs_db = NULL;
|
hs_regex_inst->hs_db = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hs_regex_inst->blooms != NULL) {
|
||||||
|
for (i = 0; i < hs_regex_inst->n_thread; i++) {
|
||||||
|
if (hs_regex_inst->blooms[i] != NULL) {
|
||||||
|
bloom_free(hs_regex_inst->blooms[i]);
|
||||||
|
FREE(hs_regex_inst->blooms[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FREE(hs_regex_inst->blooms);
|
||||||
|
}
|
||||||
|
|
||||||
if (hs_regex_inst->hs_scratches != NULL) {
|
if (hs_regex_inst->hs_scratches != NULL) {
|
||||||
for (i = 0; i < hs_regex_inst->n_thread; i++) {
|
for (i = 0; i < hs_regex_inst->n_thread; i++) {
|
||||||
if (hs_regex_inst->hs_scratches[i] != NULL) {
|
if (hs_regex_inst->hs_scratches[i] != NULL) {
|
||||||
@@ -513,8 +562,13 @@ void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
|
|||||||
hs_regex_inst->hs_db = (hs_database_t *)hs_regex_db;
|
hs_regex_inst->hs_db = (hs_database_t *)hs_regex_db;
|
||||||
hs_regex_inst->ref_pat_attr = pat_attr;
|
hs_regex_inst->ref_pat_attr = pat_attr;
|
||||||
hs_regex_inst->logger = logger;
|
hs_regex_inst->logger = logger;
|
||||||
hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
|
hs_regex_inst->blooms = ALLOC(struct bloom *, n_thread);
|
||||||
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
|
hs_regex_inst->blooms[i] = ALLOC(struct bloom, 1);
|
||||||
|
bloom_init2(hs_regex_inst->blooms[i], 1024, 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
|
hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
|
||||||
int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db,
|
int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db,
|
||||||
hs_regex_inst->hs_scratches,
|
hs_regex_inst->hs_scratches,
|
||||||
n_thread, logger);
|
n_thread, logger);
|
||||||
@@ -570,6 +624,7 @@ void hs_regex_stream_close(void *hs_regex_stream)
|
|||||||
/* stream->hs_rt point to hs_instance->hs_rt which will call free
|
/* stream->hs_rt point to hs_instance->hs_rt which will call free
|
||||||
same as hs_attr */
|
same as hs_attr */
|
||||||
stream->ref_hs_rt = NULL;
|
stream->ref_hs_rt = NULL;
|
||||||
|
stream->matched_pat->ref_bloom = NULL;
|
||||||
stream->matched_pat->ref_pat_attr = NULL;
|
stream->matched_pat->ref_pat_attr = NULL;
|
||||||
|
|
||||||
if (stream->matched_pat->pattern_ids != NULL) {
|
if (stream->matched_pat->pattern_ids != NULL) {
|
||||||
@@ -595,6 +650,7 @@ void *hs_regex_stream_open(void *hs_regex_engine, int thread_id)
|
|||||||
regex_stream->thread_id = thread_id;
|
regex_stream->thread_id = thread_id;
|
||||||
regex_stream->ref_hs_rt = hs_regex_inst;
|
regex_stream->ref_hs_rt = hs_regex_inst;
|
||||||
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
||||||
|
regex_stream->matched_pat->ref_bloom = hs_regex_inst->blooms[thread_id];
|
||||||
regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr;
|
regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr;
|
||||||
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
|
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
|
||||||
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||||
|
|||||||
@@ -63,7 +63,8 @@ struct rs_regex_stream {
|
|||||||
struct rs_lit_engine {
|
struct rs_lit_engine {
|
||||||
size_t n_thread;
|
size_t n_thread;
|
||||||
rs_database_t *rs_db;
|
rs_database_t *rs_db;
|
||||||
struct rs_lit_stream **streams; /* per thread */
|
struct bloom **blooms;
|
||||||
|
struct rs_lit_stream **streams; /* per thread */
|
||||||
struct pattern_attribute *ref_pat_attr;
|
struct pattern_attribute *ref_pat_attr;
|
||||||
struct log_handle *logger;
|
struct log_handle *logger;
|
||||||
};
|
};
|
||||||
@@ -72,7 +73,8 @@ struct rs_lit_engine {
|
|||||||
struct rs_regex_engine {
|
struct rs_regex_engine {
|
||||||
size_t n_thread;
|
size_t n_thread;
|
||||||
rs_database_t *rs_db;
|
rs_database_t *rs_db;
|
||||||
struct rs_regex_stream **streams; /* per thread */
|
struct bloom **blooms;
|
||||||
|
struct rs_regex_stream **streams; /* per thread */
|
||||||
struct pattern_attribute *ref_pat_attr;
|
struct pattern_attribute *ref_pat_attr;
|
||||||
struct log_handle *logger;
|
struct log_handle *logger;
|
||||||
};
|
};
|
||||||
@@ -187,6 +189,23 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
|
|||||||
unsigned long long pattern_id = id;
|
unsigned long long pattern_id = id;
|
||||||
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
||||||
|
|
||||||
|
unsigned long long *tmp_pat_id = NULL;
|
||||||
|
if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) {
|
||||||
|
for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
|
||||||
|
tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
|
||||||
|
if (*tmp_pat_id == pattern_id) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
|
||||||
|
sizeof(unsigned long long)) == 1) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
|
||||||
|
sizeof(unsigned long long));
|
||||||
|
}
|
||||||
|
|
||||||
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -264,6 +283,7 @@ static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
|
|||||||
|
|
||||||
*n_pattern_id = array_index;
|
*n_pattern_id = array_index;
|
||||||
utarray_clear(matched_pat->pattern_ids);
|
utarray_clear(matched_pat->pattern_ids);
|
||||||
|
bloom_reset(matched_pat->ref_bloom);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -281,6 +301,16 @@ void rs_lit_engine_free(void *rs_lit_engine)
|
|||||||
rs_lit_inst->rs_db = NULL;
|
rs_lit_inst->rs_db = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rs_lit_inst->blooms != NULL) {
|
||||||
|
for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
|
||||||
|
if (rs_lit_inst->blooms[i] != NULL) {
|
||||||
|
bloom_free(rs_lit_inst->blooms[i]);
|
||||||
|
FREE(rs_lit_inst->blooms[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FREE(rs_lit_inst->blooms);
|
||||||
|
}
|
||||||
|
|
||||||
if (rs_lit_inst->streams != NULL) {
|
if (rs_lit_inst->streams != NULL) {
|
||||||
for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
|
for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
|
||||||
if (rs_lit_inst->streams[i] != NULL) {
|
if (rs_lit_inst->streams[i] != NULL) {
|
||||||
@@ -306,8 +336,14 @@ void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
|
|||||||
rs_lit_inst->rs_db = (rs_database_t *)rs_lit_db;
|
rs_lit_inst->rs_db = (rs_database_t *)rs_lit_db;
|
||||||
rs_lit_inst->ref_pat_attr = pat_attr;
|
rs_lit_inst->ref_pat_attr = pat_attr;
|
||||||
rs_lit_inst->logger = logger;
|
rs_lit_inst->logger = logger;
|
||||||
rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread);
|
|
||||||
|
|
||||||
|
rs_lit_inst->blooms = ALLOC(struct bloom *, n_thread);
|
||||||
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
|
rs_lit_inst->blooms[i] = ALLOC(struct bloom, 1);
|
||||||
|
bloom_init2(rs_lit_inst->blooms[i], 1024, 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
|
rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread);
|
||||||
for (size_t i = 0; i < n_thread; i++) {
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
rs_lit_inst->streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i);
|
rs_lit_inst->streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i);
|
||||||
}
|
}
|
||||||
@@ -354,6 +390,7 @@ void *rs_lit_stream_open(void *rs_lit_engine, int thread_id)
|
|||||||
lit_stream->thread_id = thread_id;
|
lit_stream->thread_id = thread_id;
|
||||||
lit_stream->ref_rs_rt = rs_lit_inst;
|
lit_stream->ref_rs_rt = rs_lit_inst;
|
||||||
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
||||||
|
lit_stream->matched_pat->ref_bloom = rs_lit_inst->blooms[thread_id];
|
||||||
lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr;
|
lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr;
|
||||||
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
|
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
|
||||||
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||||
@@ -387,6 +424,7 @@ void rs_lit_stream_close(void *rs_lit_stream)
|
|||||||
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
|
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
|
||||||
same as rs_attr */
|
same as rs_attr */
|
||||||
lit_stream->ref_rs_rt = NULL;
|
lit_stream->ref_rs_rt = NULL;
|
||||||
|
lit_stream->matched_pat->ref_bloom = NULL;
|
||||||
lit_stream->matched_pat->ref_pat_attr = NULL;
|
lit_stream->matched_pat->ref_pat_attr = NULL;
|
||||||
|
|
||||||
if (lit_stream->matched_pat->pattern_ids != NULL) {
|
if (lit_stream->matched_pat->pattern_ids != NULL) {
|
||||||
@@ -434,6 +472,16 @@ void rs_regex_engine_free(void *rs_regex_engine)
|
|||||||
rs_regex_inst->rs_db = NULL;
|
rs_regex_inst->rs_db = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rs_regex_inst->blooms != NULL) {
|
||||||
|
for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
|
||||||
|
if (rs_regex_inst->blooms[i] != NULL) {
|
||||||
|
bloom_free(rs_regex_inst->blooms[i]);
|
||||||
|
FREE(rs_regex_inst->blooms[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FREE(rs_regex_inst->blooms);
|
||||||
|
}
|
||||||
|
|
||||||
if (rs_regex_inst->streams != NULL) {
|
if (rs_regex_inst->streams != NULL) {
|
||||||
for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
|
for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
|
||||||
if (rs_regex_inst->streams[i] != NULL) {
|
if (rs_regex_inst->streams[i] != NULL) {
|
||||||
@@ -459,8 +507,14 @@ void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
|
|||||||
rs_regex_inst->rs_db = (rs_database_t *)rs_regex_db;
|
rs_regex_inst->rs_db = (rs_database_t *)rs_regex_db;
|
||||||
rs_regex_inst->ref_pat_attr = pat_attr;
|
rs_regex_inst->ref_pat_attr = pat_attr;
|
||||||
rs_regex_inst->logger = logger;
|
rs_regex_inst->logger = logger;
|
||||||
|
|
||||||
|
rs_regex_inst->blooms = ALLOC(struct bloom *, n_thread);
|
||||||
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
|
rs_regex_inst->blooms[i] = ALLOC(struct bloom, 1);
|
||||||
|
bloom_init2(rs_regex_inst->blooms[i], 1024, 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread);
|
rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread);
|
||||||
|
|
||||||
for (size_t i = 0; i < n_thread; i++) {
|
for (size_t i = 0; i < n_thread; i++) {
|
||||||
rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i);
|
rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i);
|
||||||
}
|
}
|
||||||
@@ -507,6 +561,7 @@ void *rs_regex_stream_open(void *rs_regex_engine, int thread_id)
|
|||||||
regex_stream->thread_id = thread_id;
|
regex_stream->thread_id = thread_id;
|
||||||
regex_stream->ref_rs_rt = rs_regex_inst;
|
regex_stream->ref_rs_rt = rs_regex_inst;
|
||||||
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
||||||
|
regex_stream->matched_pat->ref_bloom = rs_regex_inst->blooms[thread_id];
|
||||||
regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr;
|
regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr;
|
||||||
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
|
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
|
||||||
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||||
@@ -540,6 +595,7 @@ void rs_regex_stream_close(void *rs_regex_stream)
|
|||||||
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
|
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
|
||||||
same as rs_attr */
|
same as rs_attr */
|
||||||
regex_stream->ref_rs_rt = NULL;
|
regex_stream->ref_rs_rt = NULL;
|
||||||
|
regex_stream->matched_pat->ref_bloom = NULL;
|
||||||
regex_stream->matched_pat->ref_pat_attr = NULL;
|
regex_stream->matched_pat->ref_pat_attr = NULL;
|
||||||
|
|
||||||
if (regex_stream->matched_pat->pattern_ids != NULL) {
|
if (regex_stream->matched_pat->pattern_ids != NULL) {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
|
||||||
#include "log/log.h"
|
#include "log/log.h"
|
||||||
|
#include "bloom/bloom.h"
|
||||||
#include "maat_utils.h"
|
#include "maat_utils.h"
|
||||||
#include "../bool_matcher/bool_matcher.h"
|
#include "../bool_matcher/bool_matcher.h"
|
||||||
#include "expr_matcher_inc.h"
|
#include "expr_matcher_inc.h"
|
||||||
@@ -409,7 +410,7 @@ static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_
|
|||||||
unsigned long long unique_pat_ids[n_hit_pattern];
|
unsigned long long unique_pat_ids[n_hit_pattern];
|
||||||
size_t n_unique_pat_id = 0;
|
size_t n_unique_pat_id = 0;
|
||||||
|
|
||||||
qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long *), compare_pattern_id);
|
qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long), compare_pattern_id);
|
||||||
|
|
||||||
for (size_t i = 0; i < n_hit_pattern; i++) {
|
for (size_t i = 0; i < n_hit_pattern; i++) {
|
||||||
tmp_pat_id = hit_pattern_ids[i];
|
tmp_pat_id = hit_pattern_ids[i];
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ extern "C"
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "uthash/utarray.h"
|
#include "uthash/utarray.h"
|
||||||
|
#include "bloom/bloom.h"
|
||||||
#include "expr_matcher.h"
|
#include "expr_matcher.h"
|
||||||
|
|
||||||
#define MAX_HIT_PATTERN_NUM 1024
|
#define MAX_HIT_PATTERN_NUM 1024
|
||||||
@@ -36,6 +37,7 @@ struct pattern_attribute {
|
|||||||
|
|
||||||
struct matched_pattern {
|
struct matched_pattern {
|
||||||
UT_array *pattern_ids;
|
UT_array *pattern_ids;
|
||||||
|
struct bloom *ref_bloom;
|
||||||
struct pattern_attribute *ref_pat_attr;
|
struct pattern_attribute *ref_pat_attr;
|
||||||
size_t scan_data_len;
|
size_t scan_data_len;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1894,7 +1894,7 @@ static void compile_state_cache_hit_not_groups(struct compile_state *compile_sta
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (n_hit_group_id != 0) {
|
if (n_hit_group_id != 0) {
|
||||||
qsort(hit_group_ids, n_hit_group_id, sizeof(long long *), compare_group_id);
|
qsort(hit_group_ids, n_hit_group_id, sizeof(long long), compare_group_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct table_group *tbl_group = NULL;
|
struct table_group *tbl_group = NULL;
|
||||||
@@ -2426,7 +2426,7 @@ int compile_runtime_match(struct compile_runtime *compile_rt, long long *compile
|
|||||||
(void **)compile_items,
|
(void **)compile_items,
|
||||||
compile_ids_size);
|
compile_ids_size);
|
||||||
if (bool_match_ret > 0) {
|
if (bool_match_ret > 0) {
|
||||||
qsort(compile_items, bool_match_ret, sizeof(struct compile_item *),
|
qsort(compile_items, bool_match_ret, sizeof(struct compile_item *),
|
||||||
compare_compile_item);
|
compare_compile_item);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user