#include "monitor_kernel.h" #include #include #include // #include // #include #define __task_contributes_to_load(task) \ ((READ_ONCE(task->__state) & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ (READ_ONCE(task->__state) & TASK_NOLOAD) == 0) /** * @brief watch_arg to kernel_watch_arg * * @param ptr: kernel space address * @param warg: watch_arg * @param k_watch_arg: kernel_watch_arg * @return unsigned char */ static unsigned char w_arg2k_w_arg(void *kptr, watch_arg warg, kernel_watch_arg *k_watch_arg) { // k_watch_arg init k_watch_arg->task_id = warg.task_id; strncpy(k_watch_arg->name, warg.name, MAX_NAME_LEN + 1); // name k_watch_arg->name[MAX_NAME_LEN + 1] = '\0'; // just in case k_watch_arg->ptr = warg.ptr; k_watch_arg->kptr = kptr; k_watch_arg->length_byte = warg.length_byte; k_watch_arg->threshold = warg.threshold; k_watch_arg->is_unsigned = warg.is_unsigned; k_watch_arg->above_threshold = warg.above_threshold; k_watch_arg->true_value = 0; return 0; } static long long convert_to_longlong(void *ptr, int size, char isUnsigned) { long long ret = 0; // ptr is null if (!ptr) { return 0; } switch (size) { case 1: // 8-bit integer. ret = isUnsigned ? (*(unsigned char *)ptr) : (*(char *)ptr); break; case 2: // 16-bit integer. ret = isUnsigned ? (*(unsigned short *)ptr) : (*(short *)ptr); break; case 4: // 32-bit integer. ret = isUnsigned ? (*(unsigned int *)ptr) : (*(int *)ptr); break; case 8: ret = isUnsigned ? (*(unsigned long long *)ptr) : (*(long long *)ptr); break; default: ret = 0; break; } return ret; } /** * @brief kernel_watch_arg to threshold * * @param k_watch_arg * @param threshold */ static void k_w_arg2threshold(kernel_watch_arg *k_watch_arg, threshold *threshold) { threshold->task_id = k_watch_arg->task_id; strncpy(threshold->name, k_watch_arg->name, MAX_NAME_LEN + 1); threshold->name[MAX_NAME_LEN + 1] = '\0'; threshold->ptr = k_watch_arg->ptr; threshold->threshold = k_watch_arg->threshold; // read true value threshold->true_value = k_watch_arg->true_value; } static void init_mm_tree(mm_tree *mm_tree) { INIT_RADIX_TREE(&mm_tree->mm_tree, GFP_ATOMIC); spin_lock_init(&mm_tree->mm_tree_lock); } /** * @brief init buffer * * @param buf_size * @param buffer * @return int */ static int init_buffer(unsigned int buf_size, struct diag_variant_buffer *buffer) { init_mm_tree(&mm_tree_struct); // init mm_tree init_diag_variant_buffer(buffer, buf_size); int ret = 0; ret = alloc_diag_variant_buffer(buffer); return ret; } // init load_monitor_variant_buffer static int init_global_buffer(void) { return init_buffer(VARIABLE_MONITOR_BUFFER_SIZE, &load_monitor_variant_buffer); } // init stand_alone_buffer static int init_sa_buffer(void) { return init_buffer(STAND_ALONE_BUFFER_SIZE, &stand_alone_buffer); } /** * @brief diag task info | brief | kernel stack | proc chains | raw * stack * * @param p * @param tsk_info */ static void diag_tsk(struct task_struct *p, variable_monitor_task *tsk_info) { unsigned int nr_bt; // printk(KERN_INFO "diag_tsk\n"); diag_task_brief(p, &tsk_info->task); // task brief if (tsk_info->task.sys_task == 1) { // system task nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack } else { // other task nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack dump_proc_chains_argv(1, p, &mm_tree_struct, &tsk_info->proc_chains); // proc chains diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack } } static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info, unsigned long *flags, struct diag_variant_buffer *buffer) { // printk(KERN_INFO "push_tsk_info\n"); diag_variant_buffer_spin_lock(buffer, *flags); diag_variant_buffer_reserve(buffer, sizeof(variable_monitor_task)); diag_variant_buffer_write_nolock(buffer, tsk_info, sizeof(variable_monitor_task)); diag_variant_buffer_seal(buffer); diag_variant_buffer_spin_unlock(buffer, *flags); } /** * @brief push task info to global buffer * * @param tsk_info * @param flags */ // static void push_tskinfo_2_buffer(variable_monitor_task *tsk_info, // unsigned long *flags) { // push_tskinfo_2_buffer_orig(tsk_info, flags, &load_monitor_variant_buffer); // } /** * @brief push task info to stand_alone_buffer * * @param tsk_info * @param flags */ static void push_tskinfo_2_sa_buffer(variable_monitor_task *tsk_info, unsigned long *flags) { push_tskinfo_2_buffer_orig(tsk_info, flags, &stand_alone_buffer); } /** * @brief push user/sys task info to global buffer * * @param tsk_info * @param flags */ static void push_tskinfo_22_buffer(variable_monitor_task *tsk_info, unsigned long *flags) { variable_monitor_task_system *tsk_info_system; if (tsk_info->task.sys_task == 1) // system task { tsk_info_system = (variable_monitor_task_system *)tsk_info; tsk_info_system->et_type = VARIABLE_MONITOR_TASK_TYPE_SYSTEM; diag_variant_buffer_reserve(&load_monitor_variant_buffer, sizeof(variable_monitor_task_system)); diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, tsk_info_system, sizeof(variable_monitor_task_system)); diag_variant_buffer_seal(&load_monitor_variant_buffer); } else { diag_variant_buffer_reserve(&load_monitor_variant_buffer, sizeof(variable_monitor_task)); diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, tsk_info, sizeof(variable_monitor_task)); diag_variant_buffer_seal(&load_monitor_variant_buffer); } } /// @brief clear all watch and reset kernel_wtimer_list/kernel_wtimer_num /// @param static void clear_all_watch(void) { printk(KERN_INFO "clear all watch variable\n"); // cancel timer cancel_all_hrTimer(); // stop and destory work cancel_destory_all_work(); // unmap and release the page free_all_page_list(); // clear timer kernel_wtimer_num = 0; memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list)); } static void diag_vm_record(kernel_watch_timer *k_watch_timer, unsigned char is_print) { static variable_monitor_record vm_record; kernel_watch_arg *kwarg; int i; unsigned long flags; unsigned long event_id = get_cycles(); vm_record.id = event_id; vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE; vm_record.tv = k_watch_timer->tv; vm_record.threshold_over_count = k_watch_timer->threshold_over_count; for (i = 0; i < vm_record.threshold_over_count; i++) { kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]]; k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]); } rcu_read_lock(); diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); diag_variant_buffer_reserve(&load_monitor_variant_buffer, sizeof(variable_monitor_record)); diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record, sizeof(variable_monitor_record)); diag_variant_buffer_seal(&load_monitor_variant_buffer); diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); rcu_read_unlock(); if (is_print) { printk(KERN_INFO "-----------variable monitor----------\n"); printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n", vm_record.tv, ktime_get_real() - vm_record.tv); for (i = 0; i < vm_record.threshold_over_count; i++) { printk( KERN_INFO "\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n", vm_record.threshold_record[i].task_id, vm_record.threshold_record[i] .name, // Assuming name is a null-terminated string vm_record.threshold_record[i].ptr, vm_record.threshold_record[i].threshold, vm_record.threshold_record[i].true_value); } printk(KERN_INFO "-------------------------------------\n"); } } void diag_task_by_tgid(pid_t tgid){ struct task_struct *tsk; int ret; unsigned long flags; static variable_monitor_task tsk_info; tsk = NULL; rcu_read_lock(); tsk = NULL; if (orig_find_task_by_vpid) tsk = orig_find_task_by_vpid(tgid); if (!tsk) { ret = -EINVAL; rcu_read_unlock(); return ; } diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); struct task_struct *thread = tsk; unsigned long event_id = get_cycles(); while_each_thread(tsk, thread) { tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; tsk_info.id = event_id; tsk_info.tv = ktime_get_real(); diag_tsk(tsk, &tsk_info); push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer } diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); rcu_read_unlock(); } void diag_task_all(void){ pr_info("diag_task_all, tv %lld\n", ktime_get_real()); static variable_monitor_task tsk_info; unsigned long event_id = get_cycles(); struct task_struct *g, *p; // g: task group; p: task unsigned long flags; rcu_read_lock(); diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); do_each_thread(g, p) { if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) || ((READ_ONCE(p->__state) & TASK_IDLE) != 0)) { // get_task_struct(p); // count +1 tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; tsk_info.id = event_id; tsk_info.tv = ktime_get_real(); diag_tsk(p, &tsk_info); // put_task_struct(p); // count -1 push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer } } while_each_thread(g, p); diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); rcu_read_unlock(); pr_info("diag_task_all, finish tv %lld\n", ktime_get_real()); } void diag_task_info(kernel_watch_timer *k_watch_timer) { if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached return; diag_task_all(); diag_vm_record(k_watch_timer, 1); } /** * @brief diag task info, for work queue * * @param work */ void diag_task_info_work(struct work_struct *work) { kernel_watch_timer *k_watch_timer = container_of(work, kernel_watch_timer, wk); diag_task_info(k_watch_timer); } /** * @brief all module function init. orig_X | buffer * * @return int */ int monitor_init(void) { int ret = 0; ret = init_orig_fun(); // init orig_X if (ret) return ret; ret = init_global_buffer(); // 256M if (ret) return -1; ret = init_sa_buffer(); // 50M if (ret) return -1; return 0; } /** * @brief monitor exit: clear all watch and free buffer * */ void monitor_exit(void) { // clear all watch clear_all_watch(); // free buffer destroy_diag_variant_buffer(&load_monitor_variant_buffer); printk(KERN_INFO "clear all buffer\n"); } /** * @brief start watch variable * * @param warg: uapi watch_arg * @return int 0 is success */ int start_watch_variable(watch_arg warg) { void *kptr; kernel_watch_timer *timer = NULL; kernel_watch_arg k_watch_arg; // user space address to kernel space address kptr = convert_user_space_ptr(warg.task_id, (unsigned long)warg.ptr); if (kptr == NULL) { printk(KERN_ERR "Cannot access user space\n"); return -EACCES; } // check length if (warg.length_byte != 1 && warg.length_byte != 2 && warg.length_byte != 4 && warg.length_byte != 8) { printk(KERN_ERR "Invalid length %d\n", warg.length_byte); return -EINVAL; } // k_watch_arg init w_arg2k_w_arg(kptr, warg, &k_watch_arg); timer = get_timer(warg.time_ns); // get a valuable timer if (timer == NULL) { printk(KERN_ERR "No timer available, ALL timer is full\n"); return -1; } INIT_WORK(&timer->wk, diag_task_info_work); printk(KERN_INFO "Convert ptr to kptr: %p\n", kptr); printk(KERN_INFO "Associated timer: %p , there are already %d variables, " "timer period %lld.\n", timer, timer->sentinel, timer->time_ns); // printk(KERN_INFO "timer->hr_timer: %p\n", &timer->hr_timer); TIMER_CANCEL(timer); // just in case timer_add_watch(timer, k_watch_arg); TIMER_START(timer); printk(KERN_INFO "Start watching var: %s\n", warg.name); return 0; } /** * @brief reinit all timer's work * */ void init_work_all_hrTimer(void) { int i = 0; kernel_watch_timer *timer = NULL; for (i = 0; i < kernel_wtimer_num; i++) { timer = &(kernel_wtimer_list[i]); // init work INIT_WORK(&timer->wk, diag_task_info_work); } } /** * @brief clear watch with pid * * @param pid */ void clear_watch(pid_t pid) { printk(KERN_INFO "Clear pid: %d's watch variable\n", pid); cancel_all_hrTimer(); // just in case cancel_all_work(); // del_all_kwarg_by_pid(pid); // delete all kwarg with pid free_page_list(pid); // free page with pid init_work_all_hrTimer(); start_all_hrTimer(); // restart timer } // enum { // MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用 // }; /** * @brief main callback function * * @param timer * @return enum hrtimer_restart */ enum hrtimer_restart check_variable_cb(struct hrtimer *timer) { kernel_watch_timer *k_watch_timer = container_of(timer, kernel_watch_timer, hr_timer); int i = 0, j = 0; kernel_watch_arg *kwarg; // check all watched kernel_watch_arg for (i = 0; i < k_watch_timer->sentinel; i++) { kwarg = &k_watch_timer->k_watch_args[i]; if (read_and_compare(kwarg->kptr, kwarg->length_byte, kwarg->above_threshold, kwarg->is_unsigned, kwarg->threshold)) { // printk(KERN_INFO "threshold reached\n"); kwarg->true_value = convert_to_longlong(kwarg->kptr, kwarg->length_byte, kwarg->is_unsigned); k_watch_timer->threshold_buffer[j] = i; j++; } } if (j > 0) // if any threshold reached { k_watch_timer->threshold_over_count = j; k_watch_timer->tv = ktime_get_real(); pr_info("threshold reached, tv %lld\n", k_watch_timer->tv); // highpri_wq // queue_work(system_highpri_wq, &k_watch_timer->wk); diag_task_info(k_watch_timer); // orig_raise_softirq(MY_SOFTIRQ); // for test // restart timer after dump_reset_sec sec hrtimer_forward(timer, timer->base->get_time(), ktime_set(dump_reset_sec, 0)); } else { // keep frequency hrtimer_forward(timer, timer->base->get_time(), k_watch_timer->kt); } return HRTIMER_RESTART; // restart timer } /** * @brief for test only * * @param id * @return int */ int diag_pid(int id) { pr_info("diag_pid\n"); struct task_struct *tsk; int ret; // unsigned long flags; // unsigned long event_id = get_cycles(); // static variable_monitor_task tsk_info = {0}; // static variable_monitor_record vm_record = {0}; pid_t pid = (pid_t)id; rcu_read_lock(); tsk = NULL; if (orig_find_task_by_vpid) tsk = orig_find_task_by_vpid(pid); if (!tsk) { ret = -EINVAL; rcu_read_unlock(); return ret; } rcu_read_unlock(); pr_info("diag_pid: %d\n", tsk->pid); // get_task_struct(tsk); // count +1 // tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; // tsk_info.id = event_id; // tsk_info.tv = vm_record.tv; // diag_tsk(tsk, &tsk_info); // printk(KERN_INFO "pid: %d, name: %s\n", tsk->pid, tsk->comm); setup_perf_event_for_task(tsk); // setup perf event for task // put_task_struct(tsk); // count -1 // push_tskinfo_2_sa_buffer(&tsk_info, &flags); // push to buffer return 0; } /** * @brief for test only * * @param id * @return int */ int diag_tgid(int id) { struct task_struct *tsk; int ret; unsigned long flags; unsigned long event_id = get_cycles(); static variable_monitor_task tsk_info = {0}; static variable_monitor_record vm_record = {0}; pid_t tgid = (pid_t)id; rcu_read_lock(); tsk = NULL; if (orig_find_task_by_vpid) tsk = orig_find_task_by_vpid(tgid); if (!tsk) { ret = -EINVAL; rcu_read_unlock(); return ret; } rcu_read_unlock(); struct task_struct *thread = tsk; while_each_thread(tsk, thread) { get_task_struct(thread); // count +1 tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; tsk_info.id = event_id; tsk_info.tv = vm_record.tv; diag_tsk(thread, &tsk_info); put_task_struct(thread); // count -1 push_tskinfo_2_sa_buffer(&tsk_info, &flags); // push to buffer } return 0; }