This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
zhangyang-variable-monitor/source/module/monitor_kernel_lib.c

696 lines
19 KiB
C

#include "monitor_kernel.h"
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/stacktrace.h>
// #include <linux/sched/task.h>
// #include <linux/sched/mm.h>
#define __task_contributes_to_load(task) \
((READ_ONCE(task->__state) & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0 && \
(READ_ONCE(task->__state) & TASK_NOLOAD) == 0)
/**
* @brief watch_arg to kernel_watch_arg
*
* @param ptr: kernel space address
* @param warg: watch_arg
* @param k_watch_arg: kernel_watch_arg
* @return unsigned char
*/
static unsigned char w_arg2k_w_arg(void *kptr, watch_arg warg,
kernel_watch_arg *k_watch_arg) {
// k_watch_arg init
k_watch_arg->task_id = warg.task_id;
strncpy(k_watch_arg->name, warg.name, MAX_NAME_LEN + 1); // name
k_watch_arg->name[MAX_NAME_LEN + 1] = '\0'; // just in case
k_watch_arg->ptr = warg.ptr;
k_watch_arg->kptr = kptr;
k_watch_arg->length_byte = warg.length_byte;
k_watch_arg->threshold = warg.threshold;
k_watch_arg->is_unsigned = warg.is_unsigned;
k_watch_arg->above_threshold = warg.above_threshold;
k_watch_arg->true_value = 0;
return 0;
}
static long long convert_to_longlong(void *ptr, int size, char isUnsigned) {
long long ret = 0;
// ptr is null
if (!ptr) {
return 0;
}
switch (size) {
case 1: // 8-bit integer.
ret = isUnsigned ? (*(unsigned char *)ptr) : (*(char *)ptr);
break;
case 2: // 16-bit integer.
ret = isUnsigned ? (*(unsigned short *)ptr) : (*(short *)ptr);
break;
case 4: // 32-bit integer.
ret = isUnsigned ? (*(unsigned int *)ptr) : (*(int *)ptr);
break;
case 8:
ret = isUnsigned ? (*(unsigned long long *)ptr) : (*(long long *)ptr);
break;
default:
ret = 0;
break;
}
return ret;
}
/**
* @brief kernel_watch_arg to threshold
*
* @param k_watch_arg
* @param threshold
*/
static void k_w_arg2threshold(kernel_watch_arg *k_watch_arg,
threshold *threshold) {
threshold->task_id = k_watch_arg->task_id;
strncpy(threshold->name, k_watch_arg->name, MAX_NAME_LEN + 1);
threshold->name[MAX_NAME_LEN + 1] = '\0';
threshold->ptr = k_watch_arg->ptr;
threshold->threshold = k_watch_arg->threshold;
// read true value
threshold->true_value = k_watch_arg->true_value;
}
static void init_mm_tree(mm_tree *mm_tree) {
INIT_RADIX_TREE(&mm_tree->mm_tree, GFP_ATOMIC);
spin_lock_init(&mm_tree->mm_tree_lock);
}
/**
* @brief init buffer
*
* @param buf_size
* @param buffer
* @return int
*/
static int init_buffer(unsigned int buf_size,
struct diag_variant_buffer *buffer) {
init_mm_tree(&mm_tree_struct); // init mm_tree
init_diag_variant_buffer(buffer, buf_size);
int ret = 0;
ret = alloc_diag_variant_buffer(buffer);
return ret;
}
// init load_monitor_variant_buffer
static int init_global_buffer(void) {
return init_buffer(VARIABLE_MONITOR_BUFFER_SIZE,
&load_monitor_variant_buffer);
}
// init stand_alone_buffer
static int init_sa_buffer(void) {
return init_buffer(STAND_ALONE_BUFFER_SIZE, &stand_alone_buffer);
}
/**
* @brief diag task info | brief | kernel stack | proc chains | raw
* stack
*
* @param p
* @param tsk_info
*/
static void diag_tsk(struct task_struct *p, variable_monitor_task *tsk_info) {
unsigned int nr_bt;
// printk(KERN_INFO "diag_tsk\n");
diag_task_brief(p, &tsk_info->task); // task brief
if (tsk_info->task.sys_task == 1) { // system task
nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
dump_proc_chains_argv(1, p, &mm_tree_struct,
&tsk_info->proc_chains); // proc chains
} else { // other task
nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
dump_proc_chains_argv(1, p, &mm_tree_struct,
&tsk_info->proc_chains); // proc chains
diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack
}
}
static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info,
unsigned long *flags,
struct diag_variant_buffer *buffer) {
// printk(KERN_INFO "push_tsk_info\n");
diag_variant_buffer_spin_lock(buffer, *flags);
diag_variant_buffer_reserve(buffer, sizeof(variable_monitor_task));
diag_variant_buffer_write_nolock(buffer, tsk_info,
sizeof(variable_monitor_task));
diag_variant_buffer_seal(buffer);
diag_variant_buffer_spin_unlock(buffer, *flags);
}
/**
* @brief push task info to global buffer
*
* @param tsk_info
* @param flags
*/
// static void push_tskinfo_2_buffer(variable_monitor_task *tsk_info,
// unsigned long *flags) {
// push_tskinfo_2_buffer_orig(tsk_info, flags, &load_monitor_variant_buffer);
// }
/**
* @brief push task info to stand_alone_buffer
*
* @param tsk_info
* @param flags
*/
static void push_tskinfo_2_sa_buffer(variable_monitor_task *tsk_info,
unsigned long *flags) {
push_tskinfo_2_buffer_orig(tsk_info, flags, &stand_alone_buffer);
}
/**
* @brief push user/sys task info to global buffer
*
* @param tsk_info
* @param flags
*/
static void push_tskinfo_22_buffer_orig(variable_monitor_task *tsk_info,
unsigned long *flags,
struct diag_variant_buffer *buffer) {
variable_monitor_task_system *tsk_info_system;
if (tsk_info->task.sys_task == 1) // system task
{
tsk_info_system = (variable_monitor_task_system *)tsk_info;
tsk_info_system->et_type = VARIABLE_MONITOR_TASK_TYPE_SYSTEM;
diag_variant_buffer_reserve(buffer, sizeof(variable_monitor_task_system));
diag_variant_buffer_write_nolock(buffer, tsk_info_system,
sizeof(variable_monitor_task_system));
diag_variant_buffer_seal(buffer);
} else {
diag_variant_buffer_reserve(buffer, sizeof(variable_monitor_task));
diag_variant_buffer_write_nolock(buffer, tsk_info,
sizeof(variable_monitor_task));
diag_variant_buffer_seal(buffer);
}
}
static void push_tskinfo_22_buffer(variable_monitor_task *tsk_info,
unsigned long *flags) {
push_tskinfo_22_buffer_orig(tsk_info, flags, &load_monitor_variant_buffer);
}
// static void push_tskinfo_22_sa_buffer(variable_monitor_task *tsk_info,
// unsigned long *flags) {
// push_tskinfo_22_buffer_orig(tsk_info, flags, &stand_alone_buffer);
// }
/// @brief clear all watch and reset kernel_wtimer_list/kernel_wtimer_num
/// @param
static void clear_all_watch(void) {
printk(KERN_INFO "clear all watch variable\n");
// cancel timer
cancel_all_hrTimer();
// stop and destory work
cancel_destory_all_work();
// unmap and release the page
free_all_page_list();
// clear timer
kernel_wtimer_num = 0;
memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list));
}
/**
* @brief all threshold reached info
*
* @param k_watch_timer
* @param is_print
*/
static void diag_vm_record(kernel_watch_timer *k_watch_timer,
unsigned char is_print) {
unsigned long long start_time = ktime_get_real();
static variable_monitor_record vm_record;
kernel_watch_arg *kwarg;
int i;
unsigned long flags;
unsigned long event_id = get_cycles();
vm_record.id = event_id;
vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE;
vm_record.tv = k_watch_timer->tv;
vm_record.threshold_over_count = k_watch_timer->threshold_over_count;
for (i = 0; i < vm_record.threshold_over_count; i++) {
kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]];
k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]);
}
rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
diag_variant_buffer_reserve(&load_monitor_variant_buffer,
sizeof(variable_monitor_record));
diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record,
sizeof(variable_monitor_record));
diag_variant_buffer_seal(&load_monitor_variant_buffer);
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
if (is_print) {
printk(KERN_INFO "-----------variable monitor----------\n");
printk(KERN_INFO
"threshold exceeded, Timestamp %lld, Stack finish Delay %lld:\n",
vm_record.tv, start_time - vm_record.tv);
for (i = 0; i < vm_record.threshold_over_count; i++) {
printk(
KERN_INFO
"\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n",
vm_record.threshold_record[i].task_id,
vm_record.threshold_record[i]
.name, // Assuming name is a null-terminated string
vm_record.threshold_record[i].ptr,
vm_record.threshold_record[i].threshold,
vm_record.threshold_record[i].true_value);
}
printk(KERN_INFO "-------------------------------------\n");
}
}
/**
* @brief diag task by tgid
*
* @param tgid
*/
void diag_task_by_tgid_orig(pid_t tgid, struct diag_variant_buffer *buffer) {
struct task_struct *tsk;
int ret;
unsigned long flags;
static variable_monitor_task tsk_info;
tsk = NULL;
rcu_read_lock();
tsk = NULL;
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(tgid);
if (!tsk) {
ret = -EINVAL;
rcu_read_unlock();
return;
}
diag_variant_buffer_spin_lock(buffer, flags);
struct task_struct *thread = tsk;
unsigned long event_id = get_cycles();
while_each_thread(tsk, thread) {
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
diag_tsk(tsk, &tsk_info);
push_tskinfo_22_buffer_orig(&tsk_info, &flags, buffer); // push to buffer
}
diag_variant_buffer_spin_unlock(buffer, flags);
rcu_read_unlock();
}
void diag_task_by_tgid(pid_t tgid){
diag_task_by_tgid_orig(tgid, &load_monitor_variant_buffer);
}
void diag_task_sa_by_tgid(pid_t tgid){
diag_task_by_tgid_orig(tgid, &stand_alone_buffer);
}
/**
* @brief diag all task info
*
*/
void diag_task_all(void) {
static variable_monitor_task tsk_info;
unsigned long event_id = get_cycles();
struct task_struct *g, *p; // g: task group; p: task
unsigned long flags;
rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
do_each_thread(g, p) {
if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) ||
((READ_ONCE(p->__state) & TASK_IDLE) != 0)) {
// get_task_struct(p); // count +1
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
diag_tsk(p, &tsk_info);
// put_task_struct(p); // count -1
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
}
}
while_each_thread(g, p);
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
}
/**
* @brief diag task entry
*
* @param k_watch_timer
*/
void diag_task(kernel_watch_timer *k_watch_timer) {
if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached
return;
pr_info("diag_stack, tv %lld\n", ktime_get_real());
if (sample_all) { // sample all task
diag_task_all();
} else { // only care threshold reached task
int i;
kernel_watch_arg *kwarg;
for (i = 0; i < k_watch_timer->threshold_over_count; i++) {
kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]];
diag_task_by_tgid_orig(kwarg->task_id, &load_monitor_variant_buffer);
}
}
pr_info("diag_stack, finish tv %lld\n", ktime_get_real());
diag_vm_record(k_watch_timer, 1);
}
/**
* @brief diag task, for work queue
*
* @param work
*/
void diag_task_info_work(struct work_struct *work) {
kernel_watch_timer *k_watch_timer =
container_of(work, kernel_watch_timer, wk);
diag_task(k_watch_timer);
}
/**
* @brief all module function init. orig_X | buffer
*
* @return int
*/
int monitor_init(void) {
int ret = 0;
ret = init_orig_fun(); // init orig_X
if (ret)
return ret;
ret = init_global_buffer(); // 256M
if (ret)
return -1;
ret = init_sa_buffer(); // 50M
if (ret)
return -1;
return 0;
}
/**
* @brief monitor exit: clear all watch and free buffer
*
*/
void monitor_exit(void) {
// clear all watch
clear_all_watch();
// free buffer
destroy_diag_variant_buffer(&load_monitor_variant_buffer);
printk(KERN_INFO "clear all buffer\n");
}
/**
* @brief start watch variable
*
* @param warg: uapi watch_arg
* @return int 0 is success
*/
int start_watch_variable(watch_arg warg) {
void *kptr;
kernel_watch_timer *timer = NULL;
kernel_watch_arg k_watch_arg;
// user space address to kernel space address
kptr = convert_user_space_ptr(warg.task_id, (unsigned long)warg.ptr);
if (kptr == NULL) {
printk(KERN_ERR "Cannot access user space\n");
return -EACCES;
}
// check length
if (warg.length_byte != 1 && warg.length_byte != 2 && warg.length_byte != 4 &&
warg.length_byte != 8) {
printk(KERN_ERR "Invalid length %d\n", warg.length_byte);
return -EINVAL;
}
// k_watch_arg init
w_arg2k_w_arg(kptr, warg, &k_watch_arg);
timer = get_timer(warg.time_ns); // get a valuable timer
if (timer == NULL) {
printk(KERN_ERR "No timer available, ALL timer is full\n");
return -1;
}
INIT_WORK(&timer->wk, diag_task_info_work);
printk(KERN_INFO "Convert ptr to kptr: %p\n", kptr);
printk(KERN_INFO "Associated timer: %p , there are already %d variables, "
"timer period %lld.\n",
timer, timer->sentinel, timer->time_ns);
// printk(KERN_INFO "timer->hr_timer: %p\n", &timer->hr_timer);
TIMER_CANCEL(timer); // just in case
timer_add_watch(timer, k_watch_arg);
TIMER_START(timer);
printk(KERN_INFO "Start watching var: %s\n", warg.name);
return 0;
}
/**
* @brief reinit all timer's work
*
*/
void init_work_all_hrTimer(void) {
int i = 0;
kernel_watch_timer *timer = NULL;
for (i = 0; i < kernel_wtimer_num; i++) {
timer = &(kernel_wtimer_list[i]);
// init work
INIT_WORK(&timer->wk, diag_task_info_work);
}
}
/**
* @brief clear watch with pid
*
* @param pid
*/
void clear_watch(pid_t pid) {
printk(KERN_INFO "Clear pid: %d's watch variable\n", pid);
cancel_all_hrTimer(); // just in case
cancel_all_work(); //
del_all_kwarg_by_pid(pid); // delete all kwarg with pid
free_page_list(pid); // free page with pid
init_work_all_hrTimer();
start_all_hrTimer(); // restart timer
}
#include <linux/module.h>
#include <linux/smp.h>
static void ipi_test(void *info) {
// pr_info("CPU%d do task\n", smp_processor_id());
struct task_struct *tsk = current;
// pr_info("CPU%d pid: %d, name: %s\n", smp_processor_id(), tsk->pid, tsk->comm);
struct diag_variant_buffer *buffer = &stand_alone_buffer;
unsigned long flags;
static variable_monitor_task tsk_info;
rcu_read_lock();
diag_variant_buffer_spin_lock(buffer, flags);
unsigned long event_id = get_cycles();
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
pr_info("diag_tsk tv %lld\n", tsk_info.tv);
diag_tsk(tsk, &tsk_info);
push_tskinfo_22_buffer_orig(&tsk_info, &flags, buffer); // push to buffer
diag_variant_buffer_spin_unlock(buffer, flags);
rcu_read_unlock();
}
/**
* @brief main callback function
*
* @param timer
* @return enum hrtimer_restart
*/
enum hrtimer_restart check_variable_cb(struct hrtimer *timer) {
kernel_watch_timer *k_watch_timer =
container_of(timer, kernel_watch_timer, hr_timer);
int i = 0, j = 0;
kernel_watch_arg *kwarg;
// check all watched kernel_watch_arg
for (i = 0; i < k_watch_timer->sentinel; i++) {
kwarg = &k_watch_timer->k_watch_args[i];
if (read_and_compare(kwarg->kptr, kwarg->length_byte,
kwarg->above_threshold, kwarg->is_unsigned,
kwarg->threshold)) {
// printk(KERN_INFO "threshold reached\n");
kwarg->true_value = convert_to_longlong(kwarg->kptr, kwarg->length_byte,
kwarg->is_unsigned);
k_watch_timer->threshold_buffer[j] = i;
j++;
}
}
if (j > 0) // if any threshold reached
{
k_watch_timer->threshold_over_count = j;
k_watch_timer->tv = ktime_get_real();
pr_info("threshold reached, tv %lld\n", k_watch_timer->tv);
// highpri_wq
// queue_work(system_highpri_wq, &k_watch_timer->wk);
pid_t pid = (pid_t)2636;
struct task_struct *tsk;
tsk = NULL;
rcu_read_lock();
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(pid);
rcu_read_unlock();
if (tsk) {
int cpu = task_cpu(tsk);
// pr_info("diag_pid: %d, cpu %d\n", tsk->pid, cpu);
smp_call_function_single(cpu, ipi_test, NULL, 1);
}
// diag_task(k_watch_timer);
// orig_raise_softirq(MY_SOFTIRQ); // for swirq test
// restart timer after dump_reset_sec sec
hrtimer_forward(timer, timer->base->get_time(),
ktime_set(dump_reset_sec, 0));
} else {
// keep frequency
hrtimer_forward(timer, timer->base->get_time(), k_watch_timer->kt);
}
return HRTIMER_RESTART; // restart timer
}
/**
* @brief for test only
*
* @param id
* @return int
*/
int diag_pid(int id) {
pr_info("diag_pid\n");
struct task_struct *tsk;
int ret;
// unsigned long flags;
// unsigned long event_id = get_cycles();
// static variable_monitor_task tsk_info = {0};
// static variable_monitor_record vm_record = {0};
pid_t pid = (pid_t)id;
rcu_read_lock();
tsk = NULL;
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(pid);
if (!tsk) {
ret = -EINVAL;
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
int cpu = task_cpu(tsk);
pr_info("diag_pid: %d, cpu %d\n", tsk->pid, cpu);
// smp_call_function_single(cpu, ipi_test, NULL, 1); // 让 CPU2 执行 print_str()
// get_task_struct(tsk); // count +1
// tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
// tsk_info.id = event_id;
// tsk_info.tv = vm_record.tv;
// diag_tsk(tsk, &tsk_info);
// printk(KERN_INFO "pid: %d, name: %s\n", tsk->pid, tsk->comm);
// setup_perf_event_for_task(tsk); // setup perf event for task
// put_task_struct(tsk); // count -1
// push_tskinfo_2_sa_buffer(&tsk_info, &flags); // push to buffer
return 0;
}
/**
* @brief for test only
*
* @param id
* @return int
*/
int diag_tgid(int id) {
struct task_struct *tsk;
int ret;
unsigned long flags;
unsigned long event_id = get_cycles();
static variable_monitor_task tsk_info = {0};
static variable_monitor_record vm_record = {0};
pid_t tgid = (pid_t)id;
rcu_read_lock();
tsk = NULL;
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(tgid);
if (!tsk) {
ret = -EINVAL;
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
struct task_struct *thread = tsk;
while_each_thread(tsk, thread) {
get_task_struct(thread); // count +1
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = vm_record.tv;
diag_tsk(thread, &tsk_info);
put_task_struct(thread); // count -1
push_tskinfo_2_sa_buffer(&tsk_info, &flags); // push to buffer
}
return 0;
}