From e5c9d7a2a006312e1548fd9cd96e7692b12b5d37 Mon Sep 17 00:00:00 2001 From: zhengchao Date: Tue, 25 Oct 2022 13:14:48 +0800 Subject: [PATCH] Hyperscan adapter is in progress. --- inc/Maat_command.h | 4 +- inc/Maat_rule.h | 4 +- src/entry/Maat_api.cpp | 4 +- src/entry/Maat_rule.cpp | 123 +- src/entry/Maat_table_schema.cpp | 2 +- src/entry/hyperscan_adapter.cpp | 22 + src/inc_internal/Maat_rule_internal.h | 2 +- src/inc_internal/Maat_table_runtime.h | 8 + src/inc_internal/Maat_table_schema.h | 4 +- src/inc_internal/hyperscan_adapter.h | 33 + src/inc_internal/view_only/hyperscan/hs.h | 51 + .../view_only/hyperscan/hs_common.h | 596 ++++++++ .../view_only/hyperscan/hs_compile.h | 1224 +++++++++++++++++ .../view_only/hyperscan/hs_runtime.h | 621 +++++++++ 14 files changed, 2681 insertions(+), 17 deletions(-) create mode 100644 src/entry/hyperscan_adapter.cpp create mode 100644 src/inc_internal/hyperscan_adapter.h create mode 100644 src/inc_internal/view_only/hyperscan/hs.h create mode 100644 src/inc_internal/view_only/hyperscan/hs_common.h create mode 100644 src/inc_internal/view_only/hyperscan/hs_compile.h create mode 100644 src/inc_internal/view_only/hyperscan/hs_runtime.h diff --git a/inc/Maat_command.h b/inc/Maat_command.h index 72c728a..3b783a4 100644 --- a/inc/Maat_command.h +++ b/inc/Maat_command.h @@ -34,8 +34,8 @@ enum MAAT_EXPR_TYPE enum MAAT_MATCH_METHOD { MATCH_METHOD_SUB=0, - MATCH_METHOD_RIGHT, - MATCH_METHOD_LEFT, + MATCH_METHOD_SUFFIX, + MATCH_METHOD_PREFIX, MATCH_METHOD_COMPLETE }; diff --git a/inc/Maat_rule.h b/inc/Maat_rule.h index 0a1c096..ab54f22 100644 --- a/inc/Maat_rule.h +++ b/inc/Maat_rule.h @@ -5,8 +5,8 @@ * Her feather was the measure that determined whether the souls (considered * to reside in the heart) of the departed would reach the paradise of afterlife * successfully. -* Author: zhengchao, MESA -* Version 2020-06-13 version 3.0.0 +* Author: contact@zhengchao.io +* Version 2022-10-24 version 4.0.0 ********************************************************* */ #ifndef H_MAAT_RULE_H_INCLUDE diff --git a/src/entry/Maat_api.cpp b/src/entry/Maat_api.cpp index 970283a..ce226bd 100644 --- a/src/entry/Maat_api.cpp +++ b/src/entry/Maat_api.cpp @@ -1418,7 +1418,7 @@ int Maat_full_scan_string_detail(Maat_feather_t feather, int table_id, scan_result_t *region_result=NULL; struct Maat_table_schema *p_table=NULL; - struct expr_table_schema* expr_desc=NULL; + struct string_table_schema* expr_desc=NULL; struct timespec start,end; Maat_scanner* my_scanner=NULL; @@ -2022,7 +2022,7 @@ stream_para_t Maat_stream_scan_string_start(Maat_feather_t feather,int table_id, return NULL; } - struct expr_table_schema* expr_desc=&(p_table->expr); + struct string_table_schema* expr_desc=&(p_table->expr); struct _stream_para_t* sp=ALLOC(struct _stream_para_t ,1); scanner=_feather->scanner; sp->feather=_feather; diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index fd568a9..3124cce 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -95,8 +95,8 @@ int is_valid_match_method(enum MAAT_MATCH_METHOD match_method) switch(match_method) { case MATCH_METHOD_SUB: - case MATCH_METHOD_RIGHT: - case MATCH_METHOD_LEFT: + case MATCH_METHOD_SUFFIX: + case MATCH_METHOD_PREFIX: case MATCH_METHOD_COMPLETE: return 1; default: @@ -626,10 +626,10 @@ scan_rule_t* create_rs_str_rule(unsigned int sub_type,enum MAAT_MATCH_METHOD mat case MATCH_METHOD_COMPLETE: p_rule->string_rule.match_mode=1; break; - case MATCH_METHOD_LEFT: + case MATCH_METHOD_PREFIX: p_rule->string_rule.l_offset=-2; break; - case MATCH_METHOD_RIGHT: + case MATCH_METHOD_SUFFIX: p_rule->string_rule.r_offset=-2; break; case MATCH_METHOD_SUB: @@ -1079,14 +1079,123 @@ void Maat_region_inner_add_expr_id(struct Maat_region_inner* region, int expr_id return; } +int add_expr_ng_rule(struct Maat_table_schema* table, struct db_expr_rule_t* db_rule, struct Maat_scanner *scanner, void* logger) +{ + int district_id=-1; + if(table->table_type==TABLE_TYPE_EXPR_PLUS) + { + assert(strlen(db_rule->district)>0); + str_unescape(db_rule->district); + district_id=get_district_id(scanner, db_rule->district); + } + struct hs_expression *hs_expr=ALLOC(struct hs_expression, 1); + struct hs_pattern *sub_pattern=NULL; + int i=0, j=0, ret=0; + int sub_expr_cnt=0; + char *p=NULL, *saveptr=NULL, *region_string=NULL; + char *tmp=NULL, *tmp_pattern=NULL; + hs_expr->type=db_rule->expr_type; + struct Maat_region_inner* user_tag=NULL; -int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule,struct Maat_scanner *scanner,void* logger) + switch(db_rule->expr_type) + { + case EXPR_TYPE_AND: + case EXPR_TYPE_OFFSET: + for(i=0, p=db_rule->keywords; ; i++,p=NULL) + { + tmp=strtok_r_esc(p, '&', &saveptr); + if(tmp==NULL) + { + break; + } + if(i>=MAAT_MAX_EXPR_ITEM_NUM) + { + MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , + "Table %s region cfg %d too many expr.", table->table_name[table->updating_name], db_rule->region_id); + goto error_out; + } + if(strlen(tmp)==0) continue; + sub_pattern=hs_expr->sub_patterns[hs_expr->n_sub_pattern]; + tmp_pattern=tmp; + if(db_rule->expr_type==EXPR_TYPE_OFFSET) + { + sscanf(tmp, "%d-%d:", &(sub_pattern->start_offset),&(sub_pattern->end_offset)); + tmp_pattern=(char*)memchr(tmp_pattern, ':', strlen(tmp_pattern)); + if(tmp_pattern==NULL) + { + MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , + "Table %s region cfg %d invalid offset keyword format.", table->table_name[table->updating_name], db_rule->region_id); + goto error_out; + } + tmp_pattern++;//skip ':' + } + else + { + sub_pattern->start_offset=sub_pattern->end_offset=-1; + } + sub_pattern->pattern=_maat_strdup(tmp_pattern); + str_unescape(hs_expr->sub_patterns[i].pattern); + hs_expr->n_sub_pattern++; + } + break; + case EXPR_TYPE_REGEX: + case EXPR_TYPE_STRING: + hs_expr->n_sub_pattern=1; + switch(db_rule->match_method) + { + case MATCH_METHOD_COMPLETE: + hs_expr->sub_patterns[0].start_offset=-2; + hs_expr->sub_patterns[0].end_offset=-2; + break; + case MATCH_METHOD_PREFIX: + hs_expr->sub_patterns[0].start_offset=-2; + hs_expr->sub_patterns[0].end_offset=-1; + break; + case MATCH_METHOD_SUFFIX: + hs_expr->sub_patterns[0].start_offset=-1; + hs_expr->sub_patterns[0].end_offset=-2; + break; + case MATCH_METHOD_SUB: + hs_expr->sub_patterns[0].start_offset=-1; + hs_expr->sub_patterns[0].end_offset=-1; + break; + default: + assert(0); + break; + } + hs_expr->sub_patterns[0].pattern=_maat_strdup(tmp); + str_unescape(hs_expr->sub_patterns[0].pattern); + break; + default: + break; + } + user_tag=Maat_region_inner_new(db_rule->group_id, db_rule->region_id, table->table_id, district_id); + ret=Maat_hierarchy_add_region_to_group(scanner->hier, db_rule->group_id, db_rule->region_id, table->table_id, user_tag); + if(ret!=0) + { + goto error_out; + } + hs_expr->user_tag=user_tag; + for(i=0; in_sub_pattern; i++) + { + if(db_rule->is_hexbin) + { + hex2bin(hs_expr->sub_patterns[i].pattern, int hex_len, char * binary, int size) + } + } +error_out: + + free(hs_expr); + if(user_tag) Maat_region_inner_free(user_tag); + return -1; +} +int add_expr_rule(struct Maat_table_schema* table,struct db_expr_rule_t* db_rule,struct Maat_scanner *scanner,void* logger) { unsigned int i=0,j=0; char* p=NULL,*saveptr=NULL,*region_string=NULL; int region_str_len=0,ret=0,k=0; int expr_id=0,district_id=-1; - struct expr_table_schema* expr_desc=&(table->expr); + struct string_table_schema* expr_desc=&(table->expr); scan_rule_t*p_rule=NULL; enum MAAT_CHARSET dst_charset=CHARSET_NONE; @@ -1573,7 +1682,7 @@ void update_group2group_rule(struct Maat_table_schema* table, const char* table_ } void update_expr_rule(struct Maat_table_schema* table,const char* table_line,struct Maat_scanner *scanner,void* logger) { - struct db_str_rule_t* maat_str_rule=ALLOC(struct db_str_rule_t, 1); + struct db_expr_rule_t* maat_str_rule=ALLOC(struct db_expr_rule_t, 1); int ret=0,db_hexbin=0,rule_type=0; struct Maat_table_runtime* table_rt=Maat_table_runtime_get(scanner->table_rt_mgr, table->table_id); switch(table->table_type) diff --git a/src/entry/Maat_table_schema.cpp b/src/entry/Maat_table_schema.cpp index aa51285..60191a1 100644 --- a/src/entry/Maat_table_schema.cpp +++ b/src/entry/Maat_table_schema.cpp @@ -66,7 +66,7 @@ int read_expr_table_info(const char* line, struct Maat_table_schema* table, stru int j=0, ret[4]={0}; char table_type[16], src_charset[256], dst_charset[256], merge[4], quick_str_scan[32]={0}; char *token=NULL, *sub_token=NULL, *saveptr; - struct expr_table_schema* p=&(table->expr); + struct string_table_schema* p=&(table->expr); sscanf(line, "%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s",&(table->table_id), table->table_name[0], table_type, diff --git a/src/entry/hyperscan_adapter.cpp b/src/entry/hyperscan_adapter.cpp new file mode 100644 index 0000000..007b7f3 --- /dev/null +++ b/src/entry/hyperscan_adapter.cpp @@ -0,0 +1,22 @@ +#include +#include +struct hs_adapter +{ + hs_database_t *hs_pure_literal_db; + hs_database_t *hs_regex_db; + struct bool_matcher *logical_matcher; + int mode; + int n_thread; + hs_scratch_t *scratchs[]; +}; + +struct hs_adapter *hs_adapter_new(struct hs_expression ** exprs, size_t n_expr, int scan_mode, int n_thread) +{ +} +void hs_adpter_free(struct hs_adapter *adapter) +{ + +} +int hs_adapter_scan(struct hs_adapter *adapter, int thread_id, const char* data, unsigned int length, void **matched_tags, size_t n_tag) +{ +} diff --git a/src/inc_internal/Maat_rule_internal.h b/src/inc_internal/Maat_rule_internal.h index 96b05e9..6340c6c 100644 --- a/src/inc_internal/Maat_rule_internal.h +++ b/src/inc_internal/Maat_rule_internal.h @@ -34,7 +34,7 @@ typedef void* rule_scanner_t; -struct db_str_rule_t +struct db_expr_rule_t { int region_id; int group_id; diff --git a/src/inc_internal/Maat_table_runtime.h b/src/inc_internal/Maat_table_runtime.h index 4568fed..4cbea03 100644 --- a/src/inc_internal/Maat_table_runtime.h +++ b/src/inc_internal/Maat_table_runtime.h @@ -38,6 +38,14 @@ struct ip_plugin_runtime size_t mem_use_by_ip_matcher; int changed_flag; }; +struct expr_ng_runtime +{ + struct EX_data_rt* ex_data_rt; + hs_database_t *hs_pure_literal_db; + hs_database_t *hs_regex_db; + struct bool_matcher *logical_matcher; + int changed_flag; +}; struct expr_runtime { long long expr_rule_cnt; //expr_type=0,1,3 diff --git a/src/inc_internal/Maat_table_schema.h b/src/inc_internal/Maat_table_schema.h index 05f7daa..5238b8b 100644 --- a/src/inc_internal/Maat_table_schema.h +++ b/src/inc_internal/Maat_table_schema.h @@ -82,7 +82,7 @@ struct compile_table_schema struct compile_ex_data_idx ex_desc[MAX_COMPILE_EX_DATA_NUM]; }; -struct expr_table_schema +struct string_table_schema { enum MAAT_CHARSET src_charset; enum MAAT_CHARSET dst_charset[MAX_CHARSET_NUM]; @@ -170,7 +170,7 @@ struct Maat_table_schema union { struct compile_table_schema compile; - struct expr_table_schema expr; + struct string_table_schema expr; struct plugin_table_schema plugin; struct ip_plugin_table_schema ip_plugin; struct fqdn_plugin_table_schema fqdn_plugin; diff --git a/src/inc_internal/hyperscan_adapter.h b/src/inc_internal/hyperscan_adapter.h new file mode 100644 index 0000000..9aadd53 --- /dev/null +++ b/src/inc_internal/hyperscan_adapter.h @@ -0,0 +1,33 @@ +#pragma once +#include +#ifdef __cplusplus +extern "C" +{ +#endif + +#define HSA_MAX_SUB_STRING_NUM 8 +struct hs_pattern +{ + char *pattern; + size_t pattern_len; + int start_offset; //-1: not specified; -2: match is start at the begining of the input + int end_offset; //-1: not specified; -2; match is end at the end of the input +}; + +struct hs_expression +{ + void *user_tag; + enum MAAT_EXPR_TYPE type; + size_t n_sub_pattern; + struct hs_pattern sub_patterns[]; +}; +struct hs_adapter; +struct hs_adapter *hs_adapter_new(const struct hs_expression ** exprs, size_t n_expr, int scan_mode, int n_thread); +void hs_adpter_free(struct hs_adapter *adapter); + +int hs_adapter_scan(struct hs_adapter *adapter, int thread_id, const char* data, unsigned int length, void **matched_tags, size_t n_tag); + +#ifdef __cplusplus +} +#endif + diff --git a/src/inc_internal/view_only/hyperscan/hs.h b/src/inc_internal/view_only/hyperscan/hs.h new file mode 100644 index 0000000..2fe5d24 --- /dev/null +++ b/src/inc_internal/view_only/hyperscan/hs.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2015-2020, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HS_H_ +#define HS_H_ + +/** + * @file + * @brief The complete Hyperscan API definition. + * + * Hyperscan is a high speed regular expression engine. + * + * This header includes both the Hyperscan compiler and runtime components. See + * the individual component headers for documentation. + */ + +/* The current Hyperscan version information. */ + +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 0 + +#include "hs_compile.h" +#include "hs_runtime.h" + +#endif /* HS_H_ */ diff --git a/src/inc_internal/view_only/hyperscan/hs_common.h b/src/inc_internal/view_only/hyperscan/hs_common.h new file mode 100644 index 0000000..93dc1fe --- /dev/null +++ b/src/inc_internal/view_only/hyperscan/hs_common.h @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2015-2019, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HS_COMMON_H_ +#define HS_COMMON_H_ + +#if defined(_WIN32) +#define HS_CDECL __cdecl +#else +#define HS_CDECL +#endif +#include + +/** + * @file + * @brief The Hyperscan common API definition. + * + * Hyperscan is a high speed regular expression engine. + * + * This header contains functions available to both the Hyperscan compiler and + * runtime. + */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +struct hs_database; + +/** + * A Hyperscan pattern database. + * + * Generated by one of the Hyperscan compiler functions: + * - @ref hs_compile() + * - @ref hs_compile_multi() + * - @ref hs_compile_ext_multi() + */ +typedef struct hs_database hs_database_t; + +/** + * A type for errors returned by Hyperscan functions. + */ +typedef int hs_error_t; + +/** + * Free a compiled pattern database. + * + * The free callback set by @ref hs_set_database_allocator() (or @ref + * hs_set_allocator()) will be used by this function. + * + * @param db + * A compiled pattern database. NULL may also be safely provided, in which + * case the function does nothing. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_free_database(hs_database_t *db); + +/** + * Serialize a pattern database to a stream of bytes. + * + * The allocator callback set by @ref hs_set_misc_allocator() (or @ref + * hs_set_allocator()) will be used by this function. + * + * @param db + * A compiled pattern database. + * + * @param bytes + * On success, a pointer to an array of bytes will be returned here. + * These bytes can be subsequently relocated or written to disk. The + * caller is responsible for freeing this block. + * + * @param length + * On success, the number of bytes in the generated byte array will be + * returned here. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be + * allocated, other values may be returned if errors are detected. + */ +hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes, + size_t *length); + +/** + * Reconstruct a pattern database from a stream of bytes previously generated + * by @ref hs_serialize_database(). + * + * This function will allocate sufficient space for the database using the + * allocator set with @ref hs_set_database_allocator() (or @ref + * hs_set_allocator()); to use a pre-allocated region of memory, use the @ref + * hs_deserialize_database_at() function. + * + * @param bytes + * A byte array generated by @ref hs_serialize_database() representing a + * compiled pattern database. + * + * @param length + * The length of the byte array generated by @ref hs_serialize_database(). + * This should be the same value as that returned by @ref + * hs_serialize_database(). + * + * @param db + * On success, a pointer to a newly allocated @ref hs_database_t will be + * returned here. This database can then be used for scanning, and + * eventually freed by the caller using @ref hs_free_database(). + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_deserialize_database(const char *bytes, + const size_t length, + hs_database_t **db); + +/** + * Reconstruct a pattern database from a stream of bytes previously generated + * by @ref hs_serialize_database() at a given memory location. + * + * This function (unlike @ref hs_deserialize_database()) will write the + * reconstructed database to the memory location given in the @p db parameter. + * The amount of space required at this location can be determined with the + * @ref hs_serialized_database_size() function. + * + * @param bytes + * A byte array generated by @ref hs_serialize_database() representing a + * compiled pattern database. + * + * @param length + * The length of the byte array generated by @ref hs_serialize_database(). + * This should be the same value as that returned by @ref + * hs_serialize_database(). + * + * @param db + * Pointer to an 8-byte aligned block of memory of sufficient size to hold + * the deserialized database. On success, the reconstructed database will + * be written to this location. This database can then be used for pattern + * matching. The user is responsible for freeing this memory; the @ref + * hs_free_database() call should not be used. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes, + const size_t length, + hs_database_t *db); + +/** + * Provides the size of the stream state allocated by a single stream opened + * against the given database. + * + * @param database + * Pointer to a compiled (streaming mode) pattern database. + * + * @param stream_size + * On success, the size in bytes of an individual stream opened against the + * given database is placed in this parameter. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database, + size_t *stream_size); + +/** + * Provides the size of the given database in bytes. + * + * @param database + * Pointer to compiled pattern database. + * + * @param database_size + * On success, the size of the compiled database in bytes is placed in this + * parameter. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_database_size(const hs_database_t *database, + size_t *database_size); + +/** + * Utility function for reporting the size that would be required by a + * database if it were deserialized. + * + * This can be used to allocate a shared memory region or other "special" + * allocation prior to deserializing with the @ref hs_deserialize_database_at() + * function. + * + * @param bytes + * Pointer to a byte array generated by @ref hs_serialize_database() + * representing a compiled pattern database. + * + * @param length + * The length of the byte array generated by @ref hs_serialize_database(). + * This should be the same value as that returned by @ref + * hs_serialize_database(). + * + * @param deserialized_size + * On success, the size of the compiled database that would be generated + * by @ref hs_deserialize_database_at() is returned here. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes, + const size_t length, + size_t *deserialized_size); + +/** + * Utility function providing information about a database. + * + * @param database + * Pointer to a compiled database. + * + * @param info + * On success, a string containing the version and platform information for + * the supplied database is placed in the parameter. The string is + * allocated using the allocator supplied in @ref hs_set_misc_allocator() + * (or malloc() if no allocator was set) and should be freed by the caller. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_database_info(const hs_database_t *database, + char **info); + +/** + * Utility function providing information about a serialized database. + * + * @param bytes + * Pointer to a serialized database. + * + * @param length + * Length in bytes of the serialized database. + * + * @param info + * On success, a string containing the version and platform information + * for the supplied serialized database is placed in the parameter. The + * string is allocated using the allocator supplied in @ref + * hs_set_misc_allocator() (or malloc() if no allocator was set) and + * should be freed by the caller. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes, + size_t length, char **info); + +/** + * The type of the callback function that will be used by Hyperscan to allocate + * more memory at runtime as required, for example in @ref hs_open_stream() to + * allocate stream state. + * + * If Hyperscan is to be used in a multi-threaded, or similarly concurrent + * environment, the allocation function will need to be re-entrant, or + * similarly safe for concurrent use. + * + * @param size + * The number of bytes to allocate. + * @return + * A pointer to the region of memory allocated, or NULL on error. + */ +typedef void *(HS_CDECL *hs_alloc_t)(size_t size); + +/** + * The type of the callback function that will be used by Hyperscan to free + * memory regions previously allocated using the @ref hs_alloc_t function. + * + * @param ptr + * The region of memory to be freed. + */ +typedef void (HS_CDECL *hs_free_t)(void *ptr); + +/** + * Set the allocate and free functions used by Hyperscan for allocating + * memory at runtime for stream state, scratch space, database bytecode, + * and various other data structure returned by the Hyperscan API. + * + * The function is equivalent to calling @ref hs_set_stream_allocator(), + * @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and + * @ref hs_set_misc_allocator() with the provided parameters. + * + * This call will override any previous allocators that have been set. + * + * Note: there is no way to change the allocator used for temporary objects + * created during the various compile calls (@ref hs_compile(), @ref + * hs_compile_multi(), @ref hs_compile_ext_multi()). + * + * @param alloc_func + * A callback function pointer that allocates memory. This function must + * return memory suitably aligned for the largest representable data type + * on this platform. + * + * @param free_func + * A callback function pointer that frees allocated memory. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); + +/** + * Set the allocate and free functions used by Hyperscan for allocating memory + * for database bytecode produced by the compile calls (@ref hs_compile(), @ref + * hs_compile_multi(), @ref hs_compile_ext_multi()) and by database + * deserialization (@ref hs_deserialize_database()). + * + * If no database allocation functions are set, or if NULL is used in place of + * both parameters, then memory allocation will default to standard methods + * (such as the system malloc() and free() calls). + * + * This call will override any previous database allocators that have been set. + * + * Note: the database allocator may also be set by calling @ref + * hs_set_allocator(). + * + * Note: there is no way to change how temporary objects created during the + * various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref + * hs_compile_ext_multi()) are allocated. + * + * @param alloc_func + * A callback function pointer that allocates memory. This function must + * return memory suitably aligned for the largest representable data type + * on this platform. + * + * @param free_func + * A callback function pointer that frees allocated memory. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); + +/** + * Set the allocate and free functions used by Hyperscan for allocating memory + * for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref + * hs_expr_info_t and serialized databases. + * + * If no misc allocation functions are set, or if NULL is used in place of both + * parameters, then memory allocation will default to standard methods (such as + * the system malloc() and free() calls). + * + * This call will override any previous misc allocators that have been set. + * + * Note: the misc allocator may also be set by calling @ref hs_set_allocator(). + * + * @param alloc_func + * A callback function pointer that allocates memory. This function must + * return memory suitably aligned for the largest representable data type + * on this platform. + * + * @param free_func + * A callback function pointer that frees allocated memory. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); + +/** + * Set the allocate and free functions used by Hyperscan for allocating memory + * for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch(). + * + * If no scratch allocation functions are set, or if NULL is used in place of + * both parameters, then memory allocation will default to standard methods + * (such as the system malloc() and free() calls). + * + * This call will override any previous scratch allocators that have been set. + * + * Note: the scratch allocator may also be set by calling @ref + * hs_set_allocator(). + * + * @param alloc_func + * A callback function pointer that allocates memory. This function must + * return memory suitably aligned for the largest representable data type + * on this platform. + * + * @param free_func + * A callback function pointer that frees allocated memory. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); + +/** + * Set the allocate and free functions used by Hyperscan for allocating memory + * for stream state by @ref hs_open_stream(). + * + * If no stream allocation functions are set, or if NULL is used in place of + * both parameters, then memory allocation will default to standard methods + * (such as the system malloc() and free() calls). + * + * This call will override any previous stream allocators that have been set. + * + * Note: the stream allocator may also be set by calling @ref + * hs_set_allocator(). + * + * @param alloc_func + * A callback function pointer that allocates memory. This function must + * return memory suitably aligned for the largest representable data type + * on this platform. + * + * @param free_func + * A callback function pointer that frees allocated memory. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); + +/** + * Utility function for identifying this release version. + * + * @return + * A string containing the version number of this release build and the + * date of the build. It is allocated statically, so it does not need to + * be freed by the caller. + */ +const char * HS_CDECL hs_version(void); + +/** + * Utility function to test the current system architecture. + * + * Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction + * set. This function can be called on any x86 platform to determine if the + * system provides the required instruction set. + * + * This function does not test for more advanced features if Hyperscan has + * been built for a more specific architecture, for example the AVX2 + * instruction set. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not + * support Hyperscan. + */ +hs_error_t HS_CDECL hs_valid_platform(void); + +/** + * @defgroup HS_ERROR hs_error_t values + * + * @{ + */ + +/** + * The engine completed normally. + */ +#define HS_SUCCESS 0 + +/** + * A parameter passed to this function was invalid. + * + * This error is only returned in cases where the function can detect an + * invalid parameter -- it cannot be relied upon to detect (for example) + * pointers to freed memory or other invalid data. + */ +#define HS_INVALID (-1) + +/** + * A memory allocation failed. + */ +#define HS_NOMEM (-2) + +/** + * The engine was terminated by callback. + * + * This return value indicates that the target buffer was partially scanned, + * but that the callback function requested that scanning cease after a match + * was located. + */ +#define HS_SCAN_TERMINATED (-3) + +/** + * The pattern compiler failed, and the @ref hs_compile_error_t should be + * inspected for more detail. + */ +#define HS_COMPILER_ERROR (-4) + +/** + * The given database was built for a different version of Hyperscan. + */ +#define HS_DB_VERSION_ERROR (-5) + +/** + * The given database was built for a different platform (i.e., CPU type). + */ +#define HS_DB_PLATFORM_ERROR (-6) + +/** + * The given database was built for a different mode of operation. This error + * is returned when streaming calls are used with a block or vectored database + * and vice versa. + */ +#define HS_DB_MODE_ERROR (-7) + +/** + * A parameter passed to this function was not correctly aligned. + */ +#define HS_BAD_ALIGN (-8) + +/** + * The memory allocator (either malloc() or the allocator set with @ref + * hs_set_allocator()) did not correctly return memory suitably aligned for the + * largest representable data type on this platform. + */ +#define HS_BAD_ALLOC (-9) + +/** + * The scratch region was already in use. + * + * This error is returned when Hyperscan is able to detect that the scratch + * region given is already in use by another Hyperscan API call. + * + * A separate scratch region, allocated with @ref hs_alloc_scratch() or @ref + * hs_clone_scratch(), is required for every concurrent caller of the Hyperscan + * API. + * + * For example, this error might be returned when @ref hs_scan() has been + * called inside a callback delivered by a currently-executing @ref hs_scan() + * call using the same scratch region. + * + * Note: Not all concurrent uses of scratch regions may be detected. This error + * is intended as a best-effort debugging tool, not a guarantee. + */ +#define HS_SCRATCH_IN_USE (-10) + +/** + * Unsupported CPU architecture. + * + * This error is returned when Hyperscan is able to detect that the current + * system does not support the required instruction set. + * + * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 + * (SSSE3). + */ +#define HS_ARCH_ERROR (-11) + +/** + * Provided buffer was too small. + * + * This error indicates that there was insufficient space in the buffer. The + * call should be repeated with a larger provided buffer. + * + * Note: in this situation, it is normal for the amount of space required to be + * returned in the same manner as the used space would have been returned if the + * call was successful. + */ +#define HS_INSUFFICIENT_SPACE (-12) + +/** + * Unexpected internal error. + * + * This error indicates that there was unexpected matching behaviors. This + * could be related to invalid usage of stream and scratch space or invalid memory + * operations by users. + * + */ +#define HS_UNKNOWN_ERROR (-13) + +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* HS_COMMON_H_ */ diff --git a/src/inc_internal/view_only/hyperscan/hs_compile.h b/src/inc_internal/view_only/hyperscan/hs_compile.h new file mode 100644 index 0000000..b318c29 --- /dev/null +++ b/src/inc_internal/view_only/hyperscan/hs_compile.h @@ -0,0 +1,1224 @@ +/* + * Copyright (c) 2015-2020, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HS_COMPILE_H_ +#define HS_COMPILE_H_ + +/** + * @file + * @brief The Hyperscan compiler API definition. + * + * Hyperscan is a high speed regular expression engine. + * + * This header contains functions for compiling regular expressions into + * Hyperscan databases that can be used by the Hyperscan runtime. + */ + +#include "hs_common.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + * A type containing error details that is returned by the compile calls (@ref + * hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on + * failure. The caller may inspect the values returned in this type to + * determine the cause of failure. + * + * Common errors generated during the compile process include: + * + * - *Invalid parameter* + * + * An invalid argument was specified in the compile call. + * + * - *Unrecognised flag* + * + * An unrecognised value was passed in the flags argument. + * + * - *Pattern matches empty buffer* + * + * By default, Hyperscan only supports patterns that will *always* + * consume at least one byte of input. Patterns that do not have this + * property (such as `/(abc)?/`) will produce this error unless + * the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such + * patterns will produce a match for *every* byte when scanned. + * + * - *Embedded anchors not supported* + * + * Hyperscan only supports the use of anchor meta-characters (such as + * `^` and `$`) in patterns where they could *only* match + * at the start or end of a buffer. A pattern containing an embedded + * anchor, such as `/abc^def/`, can never match, as there is no + * way for `abc` to precede the start of the data stream. + * + * - *Bounded repeat is too large* + * + * The pattern contains a repeated construct with very large finite + * bounds. + * + * - *Unsupported component type* + * + * An unsupported PCRE construct was used in the pattern. + * + * - *Unable to generate bytecode* + * + * This error indicates that Hyperscan was unable to compile a pattern + * that is syntactically valid. The most common cause is a pattern that is + * very long and complex or contains a large repeated subpattern. + * + * - *Unable to allocate memory* + * + * The library was unable to allocate temporary storage used during + * compilation time. + * + * - *Allocator returned misaligned memory* + * + * The memory allocator (either malloc() or the allocator set with @ref + * hs_set_allocator()) did not correctly return memory suitably aligned + * for the largest representable data type on this platform. + * + * - *Internal error* + * + * An unexpected error occurred: if this error is reported, please contact + * the Hyperscan team with a description of the situation. + */ +typedef struct hs_compile_error { + /** + * A human-readable error message describing the error. + */ + char *message; + + /** + * The zero-based number of the expression that caused the error (if this + * can be determined). If the error is not specific to an expression, then + * this value will be less than zero. + */ + int expression; +} hs_compile_error_t; + +/** + * A type containing information on the target platform which may optionally be + * provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(), + * @ref hs_compile_ext_multi()). + * + * A hs_platform_info structure may be populated for the current platform by + * using the @ref hs_populate_platform() call. + */ +typedef struct hs_platform_info { + /** + * Information about the target platform which may be used to guide the + * optimisation process of the compile. + * + * Use of this field does not limit the processors that the resulting + * database can run on, but may impact the performance of the resulting + * database. + */ + unsigned int tune; + + /** + * Relevant CPU features available on the target platform + * + * This value may be produced by combining HS_CPU_FEATURE_* flags (such as + * @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together + * to produce the value. + */ + unsigned long long cpu_features; + + /** + * Reserved for future use. + */ + unsigned long long reserved1; + + /** + * Reserved for future use. + */ + unsigned long long reserved2; +} hs_platform_info_t; + +/** + * A type containing information related to an expression that is returned by + * @ref hs_expression_info() or @ref hs_expression_ext_info. + */ +typedef struct hs_expr_info { + /** + * The minimum length in bytes of a match for the pattern. + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative lower bound for the true minimum length of + * a match. + */ + unsigned int min_width; + + /** + * The maximum length in bytes of a match for the pattern. If the pattern + * has an unbounded maximum length, this will be set to the maximum value + * of an unsigned int (UINT_MAX). + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative upper bound for the true maximum length of + * a match. + */ + unsigned int max_width; + + /** + * Whether this expression can produce matches that are not returned in + * order, such as those produced by assertions. Zero if false, non-zero if + * true. + */ + char unordered_matches; + + /** + * Whether this expression can produce matches at end of data (EOD). In + * streaming mode, EOD matches are raised during @ref hs_close_stream(), + * since it is only when @ref hs_close_stream() is called that the EOD + * location is known. Zero if false, non-zero if true. + * + * Note: trailing `\b` word boundary assertions may also result in EOD + * matches as end-of-data can act as a word boundary. + */ + char matches_at_eod; + + /** + * Whether this expression can *only* produce matches at end of data (EOD). + * In streaming mode, all matches for this expression are raised during + * @ref hs_close_stream(). Zero if false, non-zero if true. + */ + char matches_only_at_eod; +} hs_expr_info_t; + +/** + * A structure containing additional parameters related to an expression, + * passed in at build time to @ref hs_compile_ext_multi() or @ref + * hs_expression_ext_info. + * + * These parameters allow the set of matches produced by a pattern to be + * constrained at compile time, rather than relying on the application to + * process unwanted matches at runtime. + */ +typedef struct hs_expr_ext { + /** + * Flags governing which parts of this structure are to be used by the + * compiler. See @ref HS_EXT_FLAG. + */ + unsigned long long flags; + + /** + * The minimum end offset in the data stream at which this expression + * should match successfully. To use this parameter, set the + * @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field. + */ + unsigned long long min_offset; + + /** + * The maximum end offset in the data stream at which this expression + * should match successfully. To use this parameter, set the + * @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field. + */ + unsigned long long max_offset; + + /** + * The minimum match length (from start to end) required to successfully + * match this expression. To use this parameter, set the + * @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field. + */ + unsigned long long min_length; + + /** + * Allow patterns to approximately match within this edit distance. To use + * this parameter, set the @ref HS_EXT_FLAG_EDIT_DISTANCE flag in the + * hs_expr_ext::flags field. + */ + unsigned edit_distance; + + /** + * Allow patterns to approximately match within this Hamming distance. To + * use this parameter, set the @ref HS_EXT_FLAG_HAMMING_DISTANCE flag in the + * hs_expr_ext::flags field. + */ + unsigned hamming_distance; +} hs_expr_ext_t; + +/** + * @defgroup HS_EXT_FLAG hs_expr_ext_t flags + * + * These flags are used in @ref hs_expr_ext_t::flags to indicate which fields + * are used. + * + * @{ + */ + +/** Flag indicating that the hs_expr_ext::min_offset field is used. */ +#define HS_EXT_FLAG_MIN_OFFSET 1ULL + +/** Flag indicating that the hs_expr_ext::max_offset field is used. */ +#define HS_EXT_FLAG_MAX_OFFSET 2ULL + +/** Flag indicating that the hs_expr_ext::min_length field is used. */ +#define HS_EXT_FLAG_MIN_LENGTH 4ULL + +/** Flag indicating that the hs_expr_ext::edit_distance field is used. */ +#define HS_EXT_FLAG_EDIT_DISTANCE 8ULL + +/** Flag indicating that the hs_expr_ext::hamming_distance field is used. */ +#define HS_EXT_FLAG_HAMMING_DISTANCE 16ULL + +/** @} */ + +/** + * The basic regular expression compiler. + * + * This is the function call with which an expression is compiled into a + * Hyperscan database which can be passed to the runtime functions (such as + * @ref hs_scan(), @ref hs_open_stream(), etc.) + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the + * expression per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags, + unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error); + +/** + * The multiple regular expression compiler. + * + * This is the function call with which a set of expressions is compiled into a + * database which can be passed to the runtime functions (such as @ref + * hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with + * a unique integer which is passed into the match callback to identify the + * pattern that has matched. + * + * @param expressions + * Array of NULL-terminated expressions to compile. Note that (as for @ref + * hs_compile()) these strings must contain only the pattern to be + * matched, with no delimiters or flags. For example, the expression + * `/abc?def/i` should be compiled by providing `abc?def` as the first + * string in the @p expressions array, and @ref HS_FLAG_CASELESS as the + * first value in the @p flags array. + * + * @param flags + * Array of flags which modify the behaviour of each expression. Multiple + * flags may be used by ORing them together. Specifying the NULL pointer + * in place of an array will set the flags value for all patterns to zero. + * Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns + * with this match id per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. + * + * @param ids + * An array of integers specifying the ID number to be associated with the + * corresponding pattern in the expressions array. Specifying the NULL + * pointer in place of an array will set the ID value for all patterns to + * zero. + * + * @param elements + * The number of elements in the input arrays. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the @p error + * parameter. + * + */ +hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, + const unsigned int *flags, + const unsigned int *ids, + unsigned int elements, unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); + +/** + * The multiple regular expression compiler with extended parameter support. + * + * This function call compiles a group of expressions into a database in the + * same way as @ref hs_compile_multi(), but allows additional parameters to be + * specified via an @ref hs_expr_ext_t structure per expression. + * + * @param expressions + * Array of NULL-terminated expressions to compile. Note that (as for @ref + * hs_compile()) these strings must contain only the pattern to be + * matched, with no delimiters or flags. For example, the expression + * `/abc?def/i` should be compiled by providing `abc?def` as the first + * string in the @p expressions array, and @ref HS_FLAG_CASELESS as the + * first value in the @p flags array. + * + * @param flags + * Array of flags which modify the behaviour of each expression. Multiple + * flags may be used by ORing them together. Specifying the NULL pointer + * in place of an array will set the flags value for all patterns to zero. + * Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns + * with this match id per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. + * + * @param ids + * An array of integers specifying the ID number to be associated with the + * corresponding pattern in the expressions array. Specifying the NULL + * pointer in place of an array will set the ID value for all patterns to + * zero. + * + * @param ext + * An array of pointers to filled @ref hs_expr_ext_t structures that + * define extended behaviour for each pattern. NULL may be specified if no + * extended behaviour is needed for an individual pattern, or in place of + * the whole array if it is not needed for any expressions. Memory used by + * these structures must be both allocated and freed by the caller. + * + * @param elements + * The number of elements in the input arrays. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the @p error + * parameter. + * + */ +hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions, + const unsigned int *flags, + const unsigned int *ids, + const hs_expr_ext_t *const *ext, + unsigned int elements, unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error); + +/** + * The basic pure literal expression compiler. + * + * This is the function call with which a pure literal expression (not a + * common regular expression) is compiled into a Hyperscan database which + * can be passed to the runtime functions (such as @ref hs_scan(), + * @ref hs_open_stream(), etc.) + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. Meanwhile, the string content shall be fully parsed in a literal + * sense without any regular grammars. For example, the @p expression + * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?` + * here doesn't mean 0 or 1 quantifier under regular semantics. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Compared to @ref hs_compile(), fewer + * valid values are provided: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the + * expression per stream. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param len + * The length of the text content of the pure literal expression. As the + * text content indicated by @p expression is treated as single character + * one by one, the special terminating character `\0` should be allowed + * to appear in expression, and not treated as a terminator for a string. + * Thus, the end of a pure literal expression cannot be indicated by + * identifying `\0`, but by counting to the expression length. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags, + const size_t len, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); +/** + * The multiple pure literal expression compiler. + * + * This is the function call with which a set of pure literal expressions is + * compiled into a database which can be passed to the runtime functions (such + * as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be + * labelled with a unique integer which is passed into the match callback to + * identify the pattern that has matched. + * + * @param expressions + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. Meanwhile, the string content shall be fully parsed in a literal + * sense without any regular grammars. For example, the @p expression + * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?` + * here doesn't mean 0 or 1 quantifier under regular semantics. + * + * @param flags + * Array of flags which modify the behaviour of each expression. Multiple + * flags may be used by ORing them together. Specifying the NULL pointer + * in place of an array will set the flags value for all patterns to zero. + * Compared to @ref hs_compile_multi(), fewer valid values are provided: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the + * expression per stream. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param ids + * An array of integers specifying the ID number to be associated with the + * corresponding pattern in the expressions array. Specifying the NULL + * pointer in place of an array will set the ID value for all patterns to + * zero. + * + * @param lens + * Array of lengths of the text content of each pure literal expression. + * As the text content indicated by @p expression is treated as single + * character one by one, the special terminating character `\0` should be + * allowed to appear in expression, and not treated as a terminator for a + * string. Thus, the end of a pure literal expression cannot be indicated + * by identifying `\0`, but by counting to the expression length. + * + * @param elements + * The number of elements in the input arrays. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions, + const unsigned *flags, + const unsigned *ids, + const size_t *lens, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); + +/** + * Free an error structure generated by @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi(). + * + * @param error + * The @ref hs_compile_error_t to be freed. NULL may also be safely + * provided. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); + +/** + * Utility function providing information about a regular expression. The + * information provided in @ref hs_expr_info_t includes the minimum and maximum + * width of a pattern match. + * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the + * expression per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. + * + * @param info + * On success, a pointer to the pattern information will be returned in + * this parameter, or NULL on failure. This structure is allocated using + * the allocator supplied in @ref hs_set_allocator() (or malloc() if no + * allocator was set) and should be freed by the caller. + * + * @param error + * If the call fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_expression_info(const char *expression, + unsigned int flags, + hs_expr_info_t **info, + hs_compile_error_t **error); + +/** + * Utility function providing information about a regular expression, with + * extended parameter support. The information provided in @ref hs_expr_info_t + * includes the minimum and maximum width of a pattern match. + * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the + * expression per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. + * + * @param ext + * A pointer to a filled @ref hs_expr_ext_t structure that defines + * extended behaviour for this pattern. NULL may be specified if no + * extended parameters are needed. + * + * @param info + * On success, a pointer to the pattern information will be returned in + * this parameter, or NULL on failure. This structure is allocated using + * the allocator supplied in @ref hs_set_allocator() (or malloc() if no + * allocator was set) and should be freed by the caller. + * + * @param error + * If the call fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_expression_ext_info(const char *expression, + unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error); + +/** + * Populates the platform information based on the current host. + * + * @param platform + * On success, the pointed to structure is populated based on the current + * host. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); + +/** + * @defgroup HS_PATTERN_FLAG Pattern flags + * + * @{ + */ + +/** + * Compile flag: Set case-insensitive matching. + * + * This flag sets the expression to be matched case-insensitively by default. + * The expression may still use PCRE tokens (notably `(?i)` and + * `(?-i)`) to switch case-insensitive matching on and off. + */ +#define HS_FLAG_CASELESS 1 + +/** + * Compile flag: Matching a `.` will not exclude newlines. + * + * This flag sets any instances of the `.` token to match newline characters as + * well as all other characters. The PCRE specification states that the `.` + * token does not match newline characters by default, so without this flag the + * `.` token will not cross line boundaries. + */ +#define HS_FLAG_DOTALL 2 + +/** + * Compile flag: Set multi-line anchoring. + * + * This flag instructs the expression to make the `^` and `$` tokens match + * newline characters as well as the start and end of the stream. If this flag + * is not specified, the `^` token will only ever match at the start of a + * stream, and the `$` token will only ever match at the end of a stream within + * the guidelines of the PCRE specification. + */ +#define HS_FLAG_MULTILINE 4 + +/** + * Compile flag: Set single-match only mode. + * + * This flag sets the expression's match ID to match at most once. In streaming + * mode, this means that the expression will return only a single match over + * the lifetime of the stream, rather than reporting every match as per + * standard Hyperscan semantics. In block mode or vectored mode, only the first + * match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be + * returned. + * + * If multiple expressions in the database share the same match ID, then they + * either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify + * @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID + * specify the flag, then at most one match with the match ID will be generated + * per stream. + * + * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST + * is not currently supported. + */ +#define HS_FLAG_SINGLEMATCH 8 + +/** + * Compile flag: Allow expressions that can match against empty buffers. + * + * This flag instructs the compiler to allow expressions that can match against + * empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every + * possible match for an expression, such expressions generally execute very + * slowly; the default behaviour is to return an error when an attempt to + * compile one is made. Using this flag will force the compiler to allow such + * an expression. + */ +#define HS_FLAG_ALLOWEMPTY 16 + +/** + * Compile flag: Enable UTF-8 mode for this expression. + * + * This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8 + * characters. The results of scanning invalid UTF-8 sequences with a Hyperscan + * library that has been compiled with one or more patterns using this flag are + * undefined. + */ +#define HS_FLAG_UTF8 32 + +/** + * Compile flag: Enable Unicode property support for this expression. + * + * This flag instructs Hyperscan to use Unicode properties, rather than the + * default ASCII interpretations, for character mnemonics like `\w` and `\s` as + * well as the POSIX character classes. It is only meaningful in conjunction + * with @ref HS_FLAG_UTF8. + */ +#define HS_FLAG_UCP 64 + +/** + * Compile flag: Enable prefiltering mode for this expression. + * + * This flag instructs Hyperscan to compile an "approximate" version of this + * pattern for use in a prefiltering application, even if Hyperscan does not + * support the pattern in normal operation. + * + * The set of matches returned when this flag is used is guaranteed to be a + * superset of the matches specified by the non-prefiltering expression. + * + * If the pattern contains pattern constructs not supported by Hyperscan (such + * as zero-width assertions, back-references or conditional references) these + * constructs will be replaced internally with broader constructs that may + * match more often. + * + * Furthermore, in prefiltering mode Hyperscan may simplify a pattern that + * would otherwise return a "Pattern too large" error at compile time, or for + * performance reasons (subject to the matching guarantee above). + * + * It is generally expected that the application will subsequently confirm + * prefilter matches with another regular expression matcher that can provide + * exact matches for the pattern. + * + * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST + * is not currently supported. + */ +#define HS_FLAG_PREFILTER 128 + +/** + * Compile flag: Enable leftmost start of match reporting. + * + * This flag instructs Hyperscan to report the leftmost possible start of match + * offset when a match is reported for this expression. (By default, no start + * of match is returned.) + * + * For all the 3 modes, enabling this behaviour may reduce performance. And + * particularly, it may increase stream state requirements in streaming mode. + */ +#define HS_FLAG_SOM_LEFTMOST 256 + +/** + * Compile flag: Logical combination. + * + * This flag instructs Hyperscan to parse this expression as logical + * combination syntax. + * Logical constraints consist of operands, operators and parentheses. + * The operands are expression indices, and operators can be + * '!'(NOT), '&'(AND) or '|'(OR). + * For example: + * (101&102&103)|(104&!105) + * ((301|302)&303)&(304|305) + */ +#define HS_FLAG_COMBINATION 512 + +/** + * Compile flag: Don't do any match reporting. + * + * This flag instructs Hyperscan to ignore match reporting for this expression. + * It is designed to be used on the sub-expressions in logical combinations. + */ +#define HS_FLAG_QUIET 1024 + +/** @} */ + +/** + * @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags + * + * @{ + */ + +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) + * + * Setting this flag indicates that the target platform supports AVX2 + * instructions. + */ +#define HS_CPU_FEATURES_AVX2 (1ULL << 2) + +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX512) + * + * Setting this flag indicates that the target platform supports AVX512 + * instructions, specifically AVX-512BW. Using AVX512 implies the use of AVX2. + */ +#define HS_CPU_FEATURES_AVX512 (1ULL << 3) + +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 512 + * Vector Byte Manipulation Instructions (Intel(R) AVX512VBMI) + * + * Setting this flag indicates that the target platform supports AVX512VBMI + * instructions. Using AVX512VBMI implies the use of AVX512. + */ +#define HS_CPU_FEATURES_AVX512VBMI (1ULL << 4) + +/** @} */ + +/** + * @defgroup HS_TUNE_FLAG Tuning flags + * + * @{ + */ + +/** + * Tuning Parameter - Generic + * + * This indicates that the compiled database should not be tuned for any + * particular target platform. + */ +#define HS_TUNE_FAMILY_GENERIC 0 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge + * + * This indicates that the compiled database should be tuned for the + * Sandy Bridge microarchitecture. + */ +#define HS_TUNE_FAMILY_SNB 1 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge + * + * This indicates that the compiled database should be tuned for the + * Ivy Bridge microarchitecture. + */ +#define HS_TUNE_FAMILY_IVB 2 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Haswell + * + * This indicates that the compiled database should be tuned for the + * Haswell microarchitecture. + */ +#define HS_TUNE_FAMILY_HSW 3 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Silvermont + * + * This indicates that the compiled database should be tuned for the + * Silvermont microarchitecture. + */ +#define HS_TUNE_FAMILY_SLM 4 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Broadwell + * + * This indicates that the compiled database should be tuned for the + * Broadwell microarchitecture. + */ +#define HS_TUNE_FAMILY_BDW 5 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake + * + * This indicates that the compiled database should be tuned for the + * Skylake microarchitecture. + */ +#define HS_TUNE_FAMILY_SKL 6 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake Server + * + * This indicates that the compiled database should be tuned for the + * Skylake Server microarchitecture. + */ +#define HS_TUNE_FAMILY_SKX 7 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Goldmont + * + * This indicates that the compiled database should be tuned for the + * Goldmont microarchitecture. + */ +#define HS_TUNE_FAMILY_GLM 8 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Icelake + * + * This indicates that the compiled database should be tuned for the + * Icelake microarchitecture. + */ +#define HS_TUNE_FAMILY_ICL 9 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Icelake Server + * + * This indicates that the compiled database should be tuned for the + * Icelake Server microarchitecture. + */ +#define HS_TUNE_FAMILY_ICX 10 + +/** @} */ + +/** + * @defgroup HS_MODE_FLAG Compile mode flags + * + * The mode flags are used as values for the mode parameter of the various + * compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref + * hs_compile_ext_multi()). + * + * A mode value can be built by ORing these flag values together; the only + * required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref + * HS_MODE_VECTORED. Other flags may be added to enable support for additional + * features. + * + * @{ + */ + +/** + * Compiler mode flag: Block scan (non-streaming) database. + */ +#define HS_MODE_BLOCK 1 + +/** + * Compiler mode flag: Alias for @ref HS_MODE_BLOCK. + */ +#define HS_MODE_NOSTREAM 1 + +/** + * Compiler mode flag: Streaming database. + */ +#define HS_MODE_STREAM 2 + +/** + * Compiler mode flag: Vectored scanning database. + */ +#define HS_MODE_VECTORED 4 + +/** + * Compiler mode flag: use full precision to track start of match offsets in + * stream state. + * + * This mode will use the most stream state per pattern, but will always return + * an accurate start of match offset regardless of how far back in the past it + * was found. + * + * One of the SOM_HORIZON modes must be selected to use the @ref + * HS_FLAG_SOM_LEFTMOST expression flag. + */ +#define HS_MODE_SOM_HORIZON_LARGE (1U << 24) + +/** + * Compiler mode flag: use medium precision to track start of match offsets in + * stream state. + * + * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and + * will limit start of match accuracy to offsets within 2^32 bytes of the + * end of match offset reported. + * + * One of the SOM_HORIZON modes must be selected to use the @ref + * HS_FLAG_SOM_LEFTMOST expression flag. + */ +#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25) + +/** + * Compiler mode flag: use limited precision to track start of match offsets in + * stream state. + * + * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and + * will limit start of match accuracy to offsets within 2^16 bytes of the + * end of match offset reported. + * + * One of the SOM_HORIZON modes must be selected to use the @ref + * HS_FLAG_SOM_LEFTMOST expression flag. + */ +#define HS_MODE_SOM_HORIZON_SMALL (1U << 26) + +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* HS_COMPILE_H_ */ diff --git a/src/inc_internal/view_only/hyperscan/hs_runtime.h b/src/inc_internal/view_only/hyperscan/hs_runtime.h new file mode 100644 index 0000000..6d34b6c --- /dev/null +++ b/src/inc_internal/view_only/hyperscan/hs_runtime.h @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2015-2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HS_RUNTIME_H_ +#define HS_RUNTIME_H_ + +#include + +/** + * @file + * @brief The Hyperscan runtime API definition. + * + * Hyperscan is a high speed regular expression engine. + * + * This header contains functions for using compiled Hyperscan databases for + * scanning data at runtime. + */ + +#include "hs_common.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + * Definition of the stream identifier type. + */ +struct hs_stream; + +/** + * The stream identifier returned by @ref hs_open_stream(). + */ +typedef struct hs_stream hs_stream_t; + +struct hs_scratch; + +/** + * A Hyperscan scratch space. + */ +typedef struct hs_scratch hs_scratch_t; + +/** + * Definition of the match event callback function type. + * + * A callback function matching the defined type must be provided by the + * application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref + * hs_scan_stream() functions (or other streaming calls which can produce + * matches). + * + * This callback function will be invoked whenever a match is located in the + * target data during the execution of a scan. The details of the match are + * passed in as parameters to the callback function, and the callback function + * should return a value indicating whether or not matching should continue on + * the target data. If no callbacks are desired from a scan call, NULL may be + * provided in order to suppress match production. + * + * This callback function should not attempt to call Hyperscan API functions on + * the same stream nor should it attempt to reuse the scratch space allocated + * for the API calls that caused it to be triggered. Making another call to the + * Hyperscan library with completely independent parameters should work (for + * example, scanning a different database in a new stream and with new scratch + * space), but reusing data structures like stream state and/or scratch space + * will produce undefined behavior. + * + * @param id + * The ID number of the expression that matched. If the expression was a + * single expression compiled with @ref hs_compile(), this value will be + * zero. + * + * @param from + * - If a start of match flag is enabled for the current pattern, this + * argument will be set to the start of match for the pattern assuming + * that that start of match value lies within the current 'start of match + * horizon' chosen by one of the SOM_HORIZON mode flags. + + * - If the start of match value lies outside this horizon (possible only + * when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE), + * the @p from value will be set to @ref HS_OFFSET_PAST_HORIZON. + + * - This argument will be set to zero if the Start of Match flag is not + * enabled for the given pattern. + * + * @param to + * The offset after the last byte that matches the expression. + * + * @param flags + * This is provided for future use and is unused at present. + * + * @param context + * The pointer supplied by the user to the @ref hs_scan(), @ref + * hs_scan_vector() or @ref hs_scan_stream() function. + * + * @return + * Non-zero if the matching should cease, else zero. If scanning is + * performed in streaming mode and a non-zero value is returned, any + * subsequent calls to @ref hs_scan_stream() for that stream will + * immediately return with @ref HS_SCAN_TERMINATED. + */ +typedef int (HS_CDECL *match_event_handler)(unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void *context); + +/** + * Open and initialise a stream. + * + * @param db + * A compiled pattern database. + * + * @param flags + * Flags modifying the behaviour of the stream. This parameter is provided + * for future use and is unused at present. + * + * @param stream + * On success, a pointer to the generated @ref hs_stream_t will be + * returned; NULL on failure. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags, + hs_stream_t **stream); + +/** + * Write data to be scanned to the opened stream. + * + * This is the function call in which the actual pattern matching takes place + * as data is written to the stream. Matches will be returned via the @ref + * match_event_handler callback supplied. + * + * @param id + * The stream ID (returned by @ref hs_open_stream()) to which the data + * will be written. + * + * @param data + * Pointer to the data to be scanned. + * + * @param length + * The number of bytes to scan. + * + * @param flags + * Flags modifying the behaviour of the stream. This parameter is provided + * for future use and is unused at present. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param ctxt + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +/** + * Close a stream. + * + * This function completes matching on the given stream and frees the memory + * associated with the stream state. After this call, the stream pointed to by + * @p id is invalid and can no longer be used. To reuse the stream state after + * completion, rather than closing it, the @ref hs_reset_stream function can be + * used. + * + * This function must be called for any stream created with @ref + * hs_open_stream(), even if scanning has been terminated by a non-zero return + * from the match callback function. + * + * Note: This operation may result in matches being returned (via calls to the + * match event callback) for expressions anchored to the end of the data stream + * (for example, via the use of the `$` meta-character). If these matches are + * not desired, NULL may be provided as the @ref match_event_handler callback. + * + * If NULL is provided as the @ref match_event_handler callback, it is + * permissible to provide a NULL scratch. + * + * @param id + * The stream ID returned by @ref hs_open_stream(). + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @p onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param ctxt + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * Returns @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +/** + * Reset a stream to an initial state. + * + * Conceptually, this is equivalent to performing @ref hs_close_stream() on the + * given stream, followed by a @ref hs_open_stream(). This new stream replaces + * the original stream in memory, avoiding the overhead of freeing the old + * stream and allocating the new one. + * + * Note: This operation may result in matches being returned (via calls to the + * match event callback) for expressions anchored to the end of the original + * data stream (for example, via the use of the `$` meta-character). If these + * matches are not desired, NULL may be provided as the @ref match_event_handler + * callback. + * + * Note: the stream will also be tied to the same database. + * + * @param id + * The stream (as created by @ref hs_open_stream()) to be replaced. + * + * @param flags + * Flags modifying the behaviour of the stream. This parameter is provided + * for future use and is unused at present. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @p onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +/** + * Duplicate the given stream. The new stream will have the same state as the + * original including the current stream offset. + * + * @param to_id + * On success, a pointer to the new, copied @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param from_id + * The stream (as created by @ref hs_open_stream()) to be copied. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id, + const hs_stream_t *from_id); + +/** + * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream + * will first be reset (reporting any EOD matches if a non-NULL @p onEvent + * callback handler is provided). + * + * Note: the 'to' stream and the 'from' stream must be open against the same + * database. + * + * @param to_id + * On success, a pointer to the new, copied @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param from_id + * The stream (as created by @ref hs_open_stream()) to be copied. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @p onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, + const hs_stream_t *from_id, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); + +/** + * Creates a compressed representation of the provided stream in the buffer + * provided. This compressed representation can be converted back into a stream + * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream(). + * The size of the compressed representation will be placed into @p used_space. + * + * If there is not sufficient space in the buffer to hold the compressed + * representation, @ref HS_INSUFFICIENT_SPACE will be returned and @p used_space + * will be populated with the amount of space required. + * + * Note: this function does not close the provided stream, you may continue to + * use the stream or to free it with @ref hs_close_stream(). + * + * @param stream + * The stream (as created by @ref hs_open_stream()) to be compressed. + * + * @param buf + * Buffer to write the compressed representation into. Note: if the call is + * just being used to determine the amount of space required, it is allowed + * to pass NULL here and @p buf_space as 0. + * + * @param buf_space + * The number of bytes in @p buf. If buf_space is too small, the call will + * fail with @ref HS_INSUFFICIENT_SPACE. + * + * @param used_space + * Pointer to where the amount of used space will be written to. The used + * buffer space is always less than or equal to @p buf_space. If the call + * fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to + * write out the amount of buffer space required. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided + * buffer is too small. + */ +hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * into a new stream. + * + * Note: @p buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @p db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param db + * The compiled pattern database that the compressed stream was opened + * against. + * + * @param stream + * On success, a pointer to the expanded @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db, + hs_stream_t **stream, const char *buf, + size_t buf_size); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * on top of the 'to' stream. The 'to' stream will first be reset (reporting + * any EOD matches if a non-NULL @p onEvent callback handler is provided). + * + * Note: the 'to' stream must be opened against the same database as the + * compressed stream. + * + * Note: @p buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @p db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param to_stream + * A pointer to a valid stream state. A pointer to the expanded @ref + * hs_stream_t will be returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @p onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); + +/** + * The block (non-streaming) regular expression scanner. + * + * This is the function call in which the actual pattern matching takes place + * for block-mode pattern databases. + * + * @param db + * A compiled pattern database. + * + * @param data + * Pointer to the data to be scanned. + * + * @param length + * The number of bytes to scan. + * + * @param flags + * Flags modifying the behaviour of this function. This parameter is + * provided for future use and is unused at present. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch() for this + * database. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, match_event_handler onEvent, + void *context); + +/** + * The vectored regular expression scanner. + * + * This is the function call in which the actual pattern matching takes place + * for vectoring-mode pattern databases. + * + * @param db + * A compiled pattern database. + * + * @param data + * An array of pointers to the data blocks to be scanned. + * + * @param length + * An array of lengths (in bytes) of each data block to scan. + * + * @param count + * Number of data blocks to scan. This should correspond to the size of + * of the @p data and @p length arrays. + * + * @param flags + * Flags modifying the behaviour of this function. This parameter is + * provided for future use and is unused at present. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch() for + * this database. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match + * callback indicated that scanning should stop; other values on error. + */ +hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, + const char *const *data, + const unsigned int *length, + unsigned int count, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +/** + * Allocate a "scratch" space for use by Hyperscan. + * + * This is required for runtime use, and one scratch space per thread, or + * concurrent caller, is required. Any allocator callback set by @ref + * hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this + * function. + * + * @param db + * The database, as produced by @ref hs_compile(). + * + * @param scratch + * On first allocation, a pointer to NULL should be provided so a new + * scratch can be allocated. If a scratch block has been previously + * allocated, then a pointer to it should be passed back in to see if it + * is valid for this database block. If a new scratch block is required, + * the original will be freed and the new one returned, otherwise the + * previous scratch block will be returned. On success, the scratch block + * will be suitable for use with the provided database in addition to any + * databases that original scratch space was suitable for. + * + * @return + * @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the + * allocation fails. Other errors may be returned if invalid parameters + * are specified. + */ +hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, + hs_scratch_t **scratch); + +/** + * Allocate a scratch space that is a clone of an existing scratch space. + * + * This is useful when multiple concurrent threads will be using the same set + * of compiled databases, and another scratch space is required. Any allocator + * callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator() + * will be used by this function. + * + * @param src + * The existing @ref hs_scratch_t to be cloned. + * + * @param dest + * A pointer to the new scratch space will be returned here. + * + * @return + * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails. + * Other errors may be returned if invalid parameters are specified. + */ +hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, + hs_scratch_t **dest); + +/** + * Provides the size of the given scratch space. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref + * hs_clone_scratch(). + * + * @param scratch_size + * On success, the size of the scratch space in bytes is placed in this + * parameter. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, + size_t *scratch_size); + +/** + * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref + * hs_clone_scratch(). + * + * The free callback set by @ref hs_set_scratch_allocator() or @ref + * hs_set_allocator() will be used by this function. + * + * @param scratch + * The scratch block to be freed. NULL may also be safely provided. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch); + +/** + * Callback 'from' return value, indicating that the start of this match was + * too early to be tracked with the requested SOM_HORIZON precision. + */ +#define HS_OFFSET_PAST_HORIZON (~0ULL) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* HS_RUNTIME_H_ */