/* ********************************************************************************************** * File: maat_group.cpp * Description: * Authors: Liu wentan * Date: 2022-10-31 * Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved. *********************************************************************************************** */ #include #include #include "log/log.h" #include "maat_group.h" #include "maat_utils.h" #include "uthash/uthash.h" #include "igraph/igraph.h" #include "maat_kv.h" #define MODULE_GROUP module_name_str("maat.group") struct group2group_item { long long group_id; long long super_group_id; }; struct group2group_schema { int group_id_column; int super_group_id_column; int table_id;//ugly struct table_manager *ref_tbl_mgr; }; struct maat_group { igraph_integer_t vertex_id; long long group_id; int ref_by_super_group_cnt; int ref_by_sub_group_cnt; size_t top_group_cnt; long long *top_group_ids; UT_hash_handle hh_group_id; UT_hash_handle hh_vertex_id; }; struct maat_group_topology { struct maat_group *hash_group_by_id; //key: group_id, value: struct maat_group *. struct maat_group *hash_group_by_vertex; //key: vetex_id, value: struct maat_group *. Multimap (Items with multiple keys). igraph_t group_graph; igraph_integer_t grp_vertex_id_generator; struct log_handle *logger; }; struct group2group_runtime { struct maat_group_topology *group_topo; struct maat_group_topology *updating_group_topo; long long version; long long rule_num; long long update_err_cnt; int updating_flag; struct maat_garbage_bin *ref_garbage_bin; struct log_handle *logger; }; void *group2group_schema_new(cJSON *json, struct table_manager *tbl_mgr, const char *table_name, struct log_handle *logger) { struct group2group_schema *g2g_schema = ALLOC(struct group2group_schema, 1); cJSON *custom_item = NULL; cJSON *item = cJSON_GetObjectItem(json, "table_id"); if (item != NULL && item->type == cJSON_Number) { g2g_schema->table_id = item->valueint; } else { log_error(logger, MODULE_GROUP, "[%s:%d] table %s has no table_id column", table_name); goto error; } item = cJSON_GetObjectItem(json, "custom"); if (item == NULL || item->type != cJSON_Object) { log_error(logger, MODULE_GROUP, "[%s:%d] table %s has no custom column", __FUNCTION__, __LINE__, table_name); goto error; } custom_item = cJSON_GetObjectItem(item, "group_id"); if (custom_item != NULL && custom_item->type == cJSON_Number) { g2g_schema->group_id_column = custom_item->valueint; } else { log_error(logger, MODULE_GROUP, "[%s:%d] table %s has no group_id column", table_name); goto error; } custom_item = cJSON_GetObjectItem(item, "super_group_id"); if (custom_item != NULL && custom_item->type == cJSON_Number) { g2g_schema->super_group_id_column = custom_item->valueint; } else { log_error(logger, MODULE_GROUP, "[%s:%d] table %s has no super_group_id column", table_name); goto error; } g2g_schema->ref_tbl_mgr = tbl_mgr; return g2g_schema; error: FREE(g2g_schema); return NULL; } void group2group_schema_free(void *g2g_schema) { FREE(g2g_schema); } void group_vertex_free(struct maat_group *group) { FREE(group->top_group_ids); FREE(group); } struct maat_group_topology *maat_group_topology_new(struct log_handle *logger) { struct maat_group_topology *group_topo = ALLOC(struct maat_group_topology, 1); UNUSED int ret = 0; group_topo->hash_group_by_id = NULL; group_topo->hash_group_by_vertex = NULL; ret = igraph_empty(&group_topo->group_graph, 0, IGRAPH_DIRECTED); assert(ret == IGRAPH_SUCCESS); group_topo->logger = logger; return group_topo; } void maat_group_topology_free(struct maat_group_topology *group_topo) { struct maat_group *group = NULL, *tmp_group = NULL; HASH_CLEAR(hh_vertex_id, group_topo->hash_group_by_vertex);//No need group memory clean up. HASH_ITER(hh_group_id, group_topo->hash_group_by_id, group, tmp_group) { HASH_DELETE(hh_group_id, group_topo->hash_group_by_id, group); group_vertex_free(group); } assert(group_topo->hash_group_by_id == NULL); igraph_destroy(&group_topo->group_graph); FREE(group_topo); } struct maat_group *maat_group_clone(struct maat_group *group) { struct maat_group *group_copy = ALLOC(struct maat_group, 1); group_copy->group_id = group->group_id; group_copy->vertex_id = group->vertex_id; group_copy->ref_by_sub_group_cnt = group->ref_by_sub_group_cnt; group_copy->ref_by_super_group_cnt = group->ref_by_super_group_cnt; group_copy->top_group_cnt = group->top_group_cnt; if (group_copy->top_group_cnt > 0) { group_copy->top_group_ids = ALLOC(long long, group_copy->top_group_cnt); memcpy(group_copy->top_group_ids, group->top_group_ids, group_copy->top_group_cnt * sizeof(long long)); } return group_copy; } struct maat_group_topology *maat_group_topology_clone(struct maat_group_topology *group_topo) { if (NULL == group_topo) { return NULL; } struct maat_group_topology *group_topo_copy = ALLOC(struct maat_group_topology, 1); struct maat_group *group = NULL, *tmp_group = NULL; HASH_ITER(hh_group_id, group_topo->hash_group_by_id, group, tmp_group) { struct maat_group *group_copy = maat_group_clone(group); HASH_ADD(hh_group_id, group_topo_copy->hash_group_by_id, group_id, sizeof(group_copy->group_id), group_copy); HASH_ADD(hh_vertex_id, group_topo_copy->hash_group_by_vertex, vertex_id, sizeof(group_copy->vertex_id), group_copy); } igraph_copy(&(group_topo_copy->group_graph), &(group_topo->group_graph)); group_topo_copy->grp_vertex_id_generator = group_topo->grp_vertex_id_generator; group_topo_copy->logger = group_topo->logger; return group_topo_copy; } void *group2group_runtime_new(void *g2g_schema, size_t max_thread_num, struct maat_garbage_bin *garbage_bin, struct log_handle *logger) { if (NULL == g2g_schema) { return NULL; } struct group2group_runtime *g2g_rt = ALLOC(struct group2group_runtime, 1); g2g_rt->group_topo = maat_group_topology_new(logger); g2g_rt->ref_garbage_bin = garbage_bin; g2g_rt->logger = logger; return g2g_rt; } void group2group_runtime_free(void *g2g_runtime) { if (NULL == g2g_runtime) { return; } struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; if (g2g_rt->group_topo != NULL) { maat_group_topology_free(g2g_rt->group_topo); g2g_rt->group_topo = NULL; } if (g2g_rt->updating_group_topo != NULL) { maat_group_topology_free(g2g_rt->updating_group_topo); g2g_rt->updating_group_topo = NULL; } FREE(g2g_rt); } struct group2group_item * group2group_item_new(const char *line, struct group2group_schema *g2g_schema, const char *table_name, struct log_handle *logger) { size_t column_offset = 0; size_t column_len = 0; struct group2group_item *g2g_item = ALLOC(struct group2group_item, 1); int ret = get_column_pos(line, g2g_schema->group_id_column, &column_offset, &column_len); if (ret < 0) { log_error(logger, MODULE_GROUP, "[%s:%d] group2group table:%s line:%s has no group_id", __FUNCTION__, __LINE__, table_name, line); goto error; } g2g_item->group_id = atoll(line + column_offset); ret = get_column_pos(line, g2g_schema->super_group_id_column, &column_offset, &column_len); if (ret < 0) { log_error(logger, MODULE_GROUP, "[%s:%d] group2group table:%s line:%s has no super_group_id", __FUNCTION__, __LINE__, table_name, line); goto error; } g2g_item->super_group_id = atoll(line + column_offset); return g2g_item; error: FREE(g2g_item); return NULL; } void group2group_item_free(struct group2group_item *g2g_item) { FREE(g2g_item); } size_t print_igraph_vector(igraph_vector_t *v, char *buff, size_t sz) { long int i; int printed = 0; for (i = 0; i < igraph_vector_size(v); i++) { printed += snprintf(buff + printed, sz - printed, " %li", (long int) VECTOR(*v)[i]); } return printed; } struct maat_group *group_topology_add_group(struct maat_group_topology *group_topo, long long group_id) { assert(group_topo != NULL); struct maat_group *group = ALLOC(struct maat_group, 1); group->group_id = group_id; group->vertex_id = group_topo->grp_vertex_id_generator++; assert(igraph_vcount(&group_topo->group_graph)==group->vertex_id); igraph_add_vertices(&group_topo->group_graph, 1, NULL); //Add 1 vertice. HASH_ADD(hh_group_id, group_topo->hash_group_by_id, group_id, sizeof(group->group_id), group); HASH_ADD(hh_vertex_id, group_topo->hash_group_by_vertex, vertex_id, sizeof(group->vertex_id), group); return group; } void group_topology_remove_group(struct maat_group_topology *group_topo, struct maat_group *group) { if (NULL == group_topo || NULL == group) { return; } igraph_vector_t v; char buff[4096] = {0}; assert(group->ref_by_super_group_cnt == 0); igraph_vector_init(&v, 8); igraph_neighbors(&group_topo->group_graph, &v, group->vertex_id, IGRAPH_ALL); if (igraph_vector_size(&v) > 0) { print_igraph_vector(&v, buff, sizeof(buff)); log_error(group_topo->logger, MODULE_GROUP, "[%s:%d] Del group %d exception, still reached by %s.", __FUNCTION__, __LINE__, group->vertex_id, buff); assert(0); } igraph_vector_destroy(&v); assert(group->top_group_ids==NULL); //We should not call igraph_delete_vertices, because this is function changes the ids of the vertices. //igraph_delete_vertices(&hier->group_graph, igraph_vss_1(group->vertex_id)); HASH_DELETE(hh_group_id, group_topo->hash_group_by_id, group); HASH_DELETE(hh_vertex_id, group_topo->hash_group_by_vertex, group); group_vertex_free(group); } struct maat_group *group_topology_find_group(struct maat_group_topology *group_topo, long long group_id) { if (NULL == group_topo || group_id < 0) { return NULL; } struct maat_group *group = NULL; HASH_FIND(hh_group_id, group_topo->hash_group_by_id, &group_id, sizeof(group_id), group); return group; } int group_topology_add_group_to_group(struct maat_group_topology *group_topo, long long group_id, long long super_group_id) { if (NULL == group_topo) { return -1; } struct maat_group *group = group_topology_find_group(group_topo, group_id); if (NULL == group) { group = group_topology_add_group(group_topo, group_id); } struct maat_group *super_group = group_topology_find_group(group_topo, super_group_id); if (NULL == super_group) { super_group = group_topology_add_group(group_topo, super_group_id); } igraph_integer_t edge_id; int ret = igraph_get_eid(&group_topo->group_graph, &edge_id, group->vertex_id, super_group->vertex_id, IGRAPH_DIRECTED, /*error*/ 0); //No duplicated edges between two groups. if (edge_id > 0) { log_error(group_topo->logger, MODULE_GROUP, "[%s:%d] Add group %d to group %d failed, relation already exisited.", __FUNCTION__, __LINE__, group->group_id, super_group->group_id); ret = -1; } else { igraph_add_edge(&group_topo->group_graph, group->vertex_id, super_group->vertex_id); group->ref_by_super_group_cnt++; super_group->ref_by_sub_group_cnt++; ret = 0; } return ret; } int group_topology_remove_group_from_group(struct maat_group_topology *group_topo, long long group_id, long long super_group_id) { if (NULL == group_topo) { return -1; } //No hash write operation, LOCK protection is unnecessary. struct maat_group *group = group_topology_find_group(group_topo, group_id); if (NULL == group) { log_error(group_topo->logger, MODULE_GROUP, "[%s:%d] Del group %d from group %d failed, group %d not exisited.", __FUNCTION__, __LINE__, group_id, super_group_id, group_id); return -1; } struct maat_group *super_group = group_topology_find_group(group_topo, super_group_id); if (NULL == super_group) { log_error(group_topo->logger, MODULE_GROUP, "[%s:%d] Del group %d from group %d failed, superior group %d not exisited.", __FUNCTION__, __LINE__, group_id, super_group_id, super_group_id); return -1; } igraph_es_t es; igraph_integer_t edge_num_before = 0, edge_num_after = 0; edge_num_before = igraph_ecount(&group_topo->group_graph); // The edges between the given pairs of vertices will be included in the edge selection. //The vertex pairs must be given as the arguments of the function call, the third argument //is the first vertex of the first edge, the fourth argument is the second vertex of the //first edge, the fifth is the first vertex of the second edge and so on. The last element //of the argument list must be -1 to denote the end of the argument list. //https://igraph.org/c/doc/igraph-Iterators.html#igraph_es_pairs_small int ret = igraph_es_pairs_small(&es, IGRAPH_DIRECTED, group->vertex_id, super_group->vertex_id, -1); assert(ret==IGRAPH_SUCCESS); // ignore no such edge to abort(). igraph_set_error_handler(igraph_error_handler_ignore); ret = igraph_delete_edges(&group_topo->group_graph, es); edge_num_after = igraph_ecount(&group_topo->group_graph); igraph_es_destroy(&es); if (ret != IGRAPH_SUCCESS || edge_num_before - edge_num_after != 1) { assert(0); return -1; } group->ref_by_super_group_cnt--; super_group->ref_by_sub_group_cnt--; return 0; } static size_t effective_vertices_count(igraph_vector_t *vids) { size_t i = 0; int tmp_vid = 0; for (i = 0; i < (size_t)igraph_vector_size(vids); i++) { tmp_vid = (int) VECTOR(*vids)[i]; if (tmp_vid < 0) { break; } } return i; } int group_topology_build_top_groups(struct maat_group_topology *group_topo) { if (NULL == group_topo) { return -1; } struct maat_group *group = NULL, *tmp = NULL; struct maat_group *super_group = NULL; int tmp_vid = 0; size_t top_group_cnt = 0; long long *temp_group_ids = NULL; igraph_bool_t is_dag; igraph_is_dag(&(group_topo->group_graph), &is_dag); if (!is_dag) { log_error(group_topo->logger, MODULE_GROUP, "[%s:%d] Sub group cycle detected!", __FUNCTION__, __LINE__); return -1; } igraph_integer_t group_graph_vcount = igraph_vcount(&group_topo->group_graph); igraph_vector_t dfs_vids; igraph_vector_init(&dfs_vids, group_graph_vcount); HASH_ITER (hh_group_id, group_topo->hash_group_by_id, group, tmp) { top_group_cnt = 0; temp_group_ids = NULL; //Orphan, Not reference by any one, free it. if (0 == group->ref_by_super_group_cnt && 0 == group->ref_by_sub_group_cnt) { FREE(group->top_group_ids); group_topology_remove_group(group_topo, group); continue; } //A group is referenced by superior groups. if (group->ref_by_super_group_cnt > 0) { igraph_vector_t *vids = &dfs_vids; igraph_dfs(&group_topo->group_graph, group->vertex_id, IGRAPH_OUT, 0, vids, NULL, NULL, NULL, NULL, NULL, NULL); temp_group_ids = ALLOC(long long, effective_vertices_count(vids)); for (size_t i = 0; i < (size_t)igraph_vector_size(vids); i++) { tmp_vid = (int)VECTOR(*vids)[i]; if (tmp_vid < 0) { break; } HASH_FIND(hh_vertex_id, group_topo->hash_group_by_vertex, &tmp_vid, sizeof(tmp_vid), super_group); temp_group_ids[top_group_cnt] = super_group->group_id; top_group_cnt++; } } FREE(group->top_group_ids); group->top_group_cnt = top_group_cnt; if (top_group_cnt > 0) { group->top_group_ids = ALLOC(long long, group->top_group_cnt); memcpy(group->top_group_ids, temp_group_ids, sizeof(long long)*group->top_group_cnt); } if (temp_group_ids != NULL) { FREE(temp_group_ids); } } igraph_vector_destroy(&dfs_vids); return 0; } int group2group_runtime_update(void *g2g_runtime, void *g2g_schema, const char *table_name, const char *line, int valid_column) { if (NULL == g2g_runtime || NULL == g2g_schema || NULL == line) { return -1; } int ret = -1; struct group2group_schema *schema = (struct group2group_schema *)g2g_schema; struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; int is_valid = get_column_value(line, valid_column); if (is_valid < 0) { g2g_rt->update_err_cnt++; return -1; } struct group2group_item *g2g_item = group2group_item_new(line, schema, table_name, g2g_rt->logger); if (NULL == g2g_item) { g2g_rt->update_err_cnt++; return -1; } if (0 == g2g_rt->updating_flag) { assert(g2g_rt->updating_group_topo == NULL); g2g_rt->updating_group_topo = maat_group_topology_clone(g2g_rt->group_topo); g2g_rt->updating_flag = 1; } if (0 == is_valid) { //delete ret = group_topology_remove_group_from_group(g2g_rt->updating_group_topo, g2g_item->group_id, g2g_item->super_group_id); if (0 == ret) { g2g_rt->rule_num--; } else { g2g_rt->update_err_cnt++; } } else { //add ret = group_topology_add_group_to_group(g2g_rt->updating_group_topo, g2g_item->group_id, g2g_item->super_group_id); if (0 == ret) { g2g_rt->rule_num++; } else { g2g_rt->update_err_cnt++; } } group2group_item_free(g2g_item); return ret; } void garbage_maat_group_topology_free(void *data, void *arg) { struct maat_group_topology *group_topo = (struct maat_group_topology *)data; maat_group_topology_free(group_topo); } int group2group_runtime_commit(void *g2g_runtime, const char *table_name, long long maat_rt_version) { if (NULL == g2g_runtime) { return -1; } struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; if (0 == g2g_rt->updating_flag) { return 0; } int ret = group_topology_build_top_groups(g2g_rt->updating_group_topo); if (ret < 0) { log_error(g2g_rt->logger, MODULE_GROUP, "[%s:%d] table[%s] group2group runtime commit failed", __FUNCTION__, __LINE__, table_name); return -1; } struct maat_group_topology *old_group_topo = g2g_rt->group_topo; g2g_rt->group_topo = g2g_rt->updating_group_topo; g2g_rt->updating_group_topo = NULL; g2g_rt->updating_flag = 0; maat_garbage_bagging(g2g_rt->ref_garbage_bin, old_group_topo, NULL, garbage_maat_group_topology_free); g2g_rt->version = maat_rt_version; log_info(g2g_rt->logger, MODULE_GROUP, "table[%s] commit %zu g2g rules and rebuild top_groups completed, version:%lld", table_name, g2g_rt->rule_num, g2g_rt->version); return 0; } long long group2group_runtime_rule_count(void *g2g_runtime) { if (NULL == g2g_runtime) { return 0; } struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; return g2g_rt->rule_num; } long long group2group_runtime_update_err_count(void *g2g_runtime) { if (NULL == g2g_runtime) { return 0; } struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; return g2g_rt->update_err_cnt; } int group2group_runtime_get_top_groups(void *g2g_runtime, long long *group_ids, size_t n_group_ids, long long *top_group_ids) { if (NULL == g2g_runtime || NULL == group_ids || 0 == n_group_ids) { return -1; } size_t top_group_index = 0; struct group2group_runtime *g2g_rt = (struct group2group_runtime *)g2g_runtime; for (size_t i = 0; i < n_group_ids; i++) { struct maat_group *group = group_topology_find_group(g2g_rt->group_topo, group_ids[i]); if (!group) { continue; } for (size_t j = 0; j < group->top_group_cnt; j++) { top_group_ids[top_group_index++] = group->top_group_ids[j]; } } return top_group_index; }