This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
zhangyang-variable-monitor/source/module/monitor_trace.c

572 lines
16 KiB
C
Raw Normal View History

2023-11-16 17:39:26 +08:00
#include "monitor_trace.h"
2023-11-19 20:30:48 -05:00
2023-11-16 13:17:49 +08:00
#include <asm/processor.h>
#include <asm/ptrace.h>
2023-11-23 10:25:25 +08:00
#include <asm/syscall.h> // for syscall_get_nr
2023-11-16 13:17:49 +08:00
#include <linux/irq.h>
2023-11-19 20:30:48 -05:00
#include <linux/rcupdate.h>
2023-11-23 10:25:25 +08:00
#include <linux/sched/mm.h> // for get_task_mm
2023-11-16 13:17:49 +08:00
#include <linux/syscalls.h>
#include <linux/tracehook.h>
2023-11-16 18:26:49 +08:00
mm_tree mm_tree_struct;
2023-11-19 20:30:48 -05:00
struct diag_variant_buffer load_monitor_variant_buffer;
2023-11-16 18:26:49 +08:00
2023-11-16 13:25:52 +08:00
typedef struct {
struct rcu_head rcu_head;
pid_t pid;
struct mm_struct *mm;
char cgroup_buf[256];
char argv[256];
} mm_info;
2023-11-16 13:17:49 +08:00
struct stack_trace {
unsigned int nr_entries, max_entries;
unsigned long *entries;
int skip; /* input argument: How many entries to skip */
};
struct stack_frame_user {
const void __user *next_fp;
unsigned long ret_addr;
};
static inline int diag_get_task_type(struct task_struct *tsk) {
2023-11-23 10:25:25 +08:00
if (orig_get_task_type)
return orig_get_task_type(&tsk->se);
2023-11-16 13:17:49 +08:00
return 0;
}
static inline int orig_diag_cgroup_name(struct cgroup *cgrp, char *buf,
size_t buflen) {
if (orig_kernfs_name && cgrp && cgrp->kn) {
return orig_kernfs_name(cgrp->kn, buf, buflen);
} else {
return 0;
}
}
static inline mm_info *find_mm_info(mm_tree *mm_tree, struct mm_struct *mm) {
mm_info *info;
2023-11-23 10:25:25 +08:00
if (mm == NULL)
return NULL;
2023-11-16 13:17:49 +08:00
info = radix_tree_lookup(&mm_tree->mm_tree, (unsigned long)mm);
return info;
}
static void __diag_cgroup_name(struct task_struct *tsk, char *buf,
unsigned int count, int cgroup) {
int cgroup_id = cpuacct_cgrp_id;
memset(buf, 0, count);
if (cgroup == 1) {
cgroup_id = cpuset_cgrp_id;
}
if (tsk && tsk->cgroups && tsk->cgroups->subsys &&
tsk->cgroups->subsys[cgroup_id] &&
tsk->cgroups->subsys[cgroup_id]->cgroup) {
orig_diag_cgroup_name(tsk->cgroups->subsys[cgroup_id]->cgroup, buf, count);
}
}
static void diag_cgroup_name(struct task_struct *tsk, char *buf,
unsigned int count, int cgroup) {
__diag_cgroup_name(tsk, buf, count, cgroup);
}
static int copy_stack_frame(const void __user *fp,
struct stack_frame_user *frame) {
int ret;
ret = 1;
pagefault_disable();
2023-11-23 10:25:25 +08:00
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
2023-11-16 13:17:49 +08:00
pagefault_enable();
return ret;
}
static int copy_stack_frame_remote(struct task_struct *tsk,
const void __user *fp,
struct stack_frame_user *frame) {
int ret;
struct mm_struct *mm;
mm = get_task_mm(tsk);
2023-11-21 04:43:15 -05:00
if (!mm) {
printk("copy_stack_frame_remote %d get_task_mm fail\n", tsk->pid);
return 0;
}
2023-11-16 13:17:49 +08:00
ret = orig_access_remote_vm(mm, (unsigned long)fp, frame, sizeof(*frame), 0);
2023-11-21 04:43:15 -05:00
printk("copy_stack_frame_remote %d ret:%d\n", tsk->pid, ret);
2023-11-16 13:17:49 +08:00
mmput(mm);
return ret;
}
static inline void save_stack_trace_user_remote(struct task_struct *tsk,
struct stack_trace *trace) {
const struct pt_regs *regs = task_pt_regs(tsk);
const void __user *fp = (const void __user *)regs->bp;
int count = 0;
2023-11-23 10:25:25 +08:00
if (in_atomic()) {
printk("save_stack_trace_user_remote %d in_atomic\n", tsk->pid);
2023-11-22 21:13:52 -05:00
return;
2023-11-21 04:43:15 -05:00
}
if (irqs_disabled()) {
2023-11-23 10:25:25 +08:00
printk("save_stack_trace_user_remote %d irqs_disabled\n", tsk->pid);
2023-11-22 21:13:52 -05:00
return;
2023-11-16 13:17:49 +08:00
}
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
struct stack_frame_user frame;
frame.next_fp = NULL;
frame.ret_addr = 0;
if (!copy_stack_frame_remote(tsk, fp, &frame)) {
2023-11-21 04:43:15 -05:00
printk("save_stack_trace_user_remote %d copy_stack_frame_remote fail\n",
tsk->pid);
2023-11-16 13:17:49 +08:00
break;
}
2023-11-21 04:43:15 -05:00
if ((unsigned long)fp < regs->sp) {
printk("save_stack_trace_user_remote %d fp < sp count:%d\n", tsk->pid,
count);
2023-11-23 10:25:25 +08:00
break; // 如果fp小于sp说明已经到了栈底退出
2023-11-21 04:43:15 -05:00
}
// 如果返回地址不为0说明是一个有效的栈帧保存返回地址
2023-11-16 13:17:49 +08:00
if (frame.ret_addr) {
trace->entries[trace->nr_entries++] = frame.ret_addr;
2023-11-21 04:43:15 -05:00
printk("save_stack_trace_user_remote %d ret_addr:%lx\n", tsk->pid,
frame.ret_addr);
2023-11-23 10:25:25 +08:00
} else {
2023-11-21 04:43:15 -05:00
printk("save_stack_trace_user_remote %d no ret_addr", tsk->pid);
2023-11-21 06:01:12 -05:00
break;
// continue;
2023-11-21 04:43:15 -05:00
}
2023-11-16 13:17:49 +08:00
2023-11-21 04:43:15 -05:00
// 如果fp指向自己说明已经到了栈底退出
if (fp == frame.next_fp) {
printk("save_stack_trace_user_remote %d fp == next_fp", tsk->pid);
break;
2023-11-23 10:25:25 +08:00
}
fp = frame.next_fp; // 否则,继续向下遍历
2023-11-16 13:17:49 +08:00
count++;
/**
* 线hardlockup退
*/
2023-11-23 10:25:25 +08:00
if (count >= trace->max_entries || count >= 100)
break;
2023-11-16 13:17:49 +08:00
}
}
static inline void __save_stack_trace_user(struct stack_trace *trace) {
const struct pt_regs *regs = task_pt_regs(current);
const void __user *fp = (const void __user *)regs->bp;
int count = 0;
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
struct stack_frame_user frame;
frame.next_fp = NULL;
frame.ret_addr = 0;
2023-11-23 10:25:25 +08:00
if (!copy_stack_frame(fp, &frame))
break;
if ((unsigned long)fp < regs->sp)
break;
2023-11-16 13:17:49 +08:00
if (frame.ret_addr) {
trace->entries[trace->nr_entries++] = frame.ret_addr;
}
2023-11-23 10:25:25 +08:00
if (fp == frame.next_fp)
break;
2023-11-16 13:17:49 +08:00
fp = frame.next_fp;
count++;
/**
* 线hardlockup退
*/
2023-11-23 10:25:25 +08:00
if (count >= trace->max_entries || count >= 100)
break;
2023-11-16 13:17:49 +08:00
}
}
2023-11-16 13:25:52 +08:00
static void perfect_save_stack_trace_user(struct stack_trace *trace) {
2023-11-16 13:17:49 +08:00
/*
* Trace user stack if we are not a kernel thread
*/
if (current->mm) {
__save_stack_trace_user(trace);
}
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
2023-11-16 13:25:52 +08:00
static void diagnose_save_stack_trace_user(unsigned long *backtrace) {
2023-11-16 13:17:49 +08:00
struct stack_trace trace;
memset(&trace, 0, sizeof(trace));
2023-11-16 17:39:26 +08:00
memset(backtrace, 0, BACKTRACE_DEPTH * sizeof(unsigned long));
trace.max_entries = BACKTRACE_DEPTH;
2023-11-16 13:17:49 +08:00
trace.entries = backtrace;
perfect_save_stack_trace_user(&trace);
}
2023-11-16 13:25:52 +08:00
static void diagnose_save_stack_trace_user_remote(struct task_struct *tsk,
2023-11-19 20:30:48 -05:00
unsigned long *backtrace) {
2023-11-16 13:17:49 +08:00
struct stack_trace trace;
memset(&trace, 0, sizeof(trace));
2023-11-16 17:39:26 +08:00
memset(backtrace, 0, BACKTRACE_DEPTH * sizeof(unsigned long));
trace.max_entries = BACKTRACE_DEPTH;
2023-11-16 13:17:49 +08:00
trace.entries = backtrace;
/*
* Trace user stack if we are not a kernel thread
*/
if (tsk->mm) {
2023-11-21 04:43:15 -05:00
printk("save_stack_trace_user_remote %d mm\n", tsk->pid);
2023-11-16 13:17:49 +08:00
save_stack_trace_user_remote(tsk, &trace);
}
if (trace.nr_entries < trace.max_entries)
trace.entries[trace.nr_entries++] = ULONG_MAX;
2023-11-21 04:43:15 -05:00
printk("save_stack_trace_user_remote %d, stack: [", tsk->pid);
int i = 0;
2023-11-23 10:25:25 +08:00
for (i = 0; i < BACKTRACE_DEPTH; i++) {
2023-11-21 04:43:15 -05:00
printk("%lx, ", backtrace[i]);
}
printk("]\n");
2023-11-16 13:17:49 +08:00
}
2023-11-23 10:25:25 +08:00
static int diagnose_task_raw_stack_remote(struct task_struct *tsk, void *to,
void __user *from, unsigned long n) {
int ret;
struct mm_struct *mm;
2023-11-23 00:29:21 -05:00
if (in_atomic()) {
printk("task_raw_stack_remote %d in_atomic\n", tsk->pid);
2023-11-23 10:25:25 +08:00
return 0;
}
2023-11-23 00:29:21 -05:00
if (irqs_disabled()) {
printk("task_raw_stack_remote %d irqs_disabled\n", tsk->pid);
return 0;
}
2023-11-23 04:45:35 -05:00
if (in_atomic() || irqs_disabled()) {
return 0;
}
2023-11-23 00:29:21 -05:00
2023-11-23 10:25:25 +08:00
mm = get_task_mm(tsk);
if (!mm)
return 0;
ret = orig_access_remote_vm(mm, (unsigned long)from, to, n, 0);
mmput(mm);
2023-11-23 04:45:35 -05:00
printk("task_raw_stack_remote %d access_remote_vm ret: %d\n", tsk->pid, ret);
2023-11-23 00:29:21 -05:00
2023-11-23 10:25:25 +08:00
return ret < 0 ? ret : 0;
}
2023-11-16 13:17:49 +08:00
void diag_task_brief(struct task_struct *tsk, task_detail *detail) {
struct pid_namespace *ns;
struct pt_regs *task_regs;
struct task_struct *leader;
struct pt_regs *irq_regs;
2023-11-23 10:25:25 +08:00
if (!detail)
return;
2023-11-16 13:17:49 +08:00
memset(detail, 0, sizeof(task_detail));
2023-11-23 10:25:25 +08:00
if (!tsk || tsk->exit_state == EXIT_ZOMBIE) // zombie
2023-11-16 13:17:49 +08:00
return;
leader = tsk->group_leader;
if (!leader || leader->exit_state == EXIT_ZOMBIE) {
return;
}
2023-11-23 10:25:25 +08:00
if (tsk != current) { // not current task
2023-11-16 13:17:49 +08:00
detail->user_mode = -1;
detail->syscallno = -1;
2023-11-23 10:25:25 +08:00
} else if (!tsk->mm) { // current task but kernel thread
2023-11-16 13:17:49 +08:00
detail->user_mode = 0;
detail->syscallno = -1;
2023-11-23 10:25:25 +08:00
} else { // current task and user thread
irq_regs = get_irq_regs(); // get current irq regs
2023-11-16 13:17:49 +08:00
task_regs = task_pt_regs(tsk);
if ((irq_regs && user_mode(irq_regs)) ||
(task_regs && user_mode(task_regs))) {
2023-11-23 10:25:25 +08:00
detail->user_mode = 1; // user mode
2023-11-16 13:17:49 +08:00
} else {
2023-11-23 10:25:25 +08:00
detail->user_mode = 0; // kernel mode
2023-11-16 13:17:49 +08:00
}
if (task_regs) {
2023-11-23 10:25:25 +08:00
detail->syscallno = syscall_get_nr(tsk, task_regs); // get syscall no
2023-11-16 13:17:49 +08:00
}
}
2023-11-23 10:25:25 +08:00
if (tsk->sched_class == orig_idle_sched_class) // idle task
2023-11-16 13:17:49 +08:00
detail->sys_task = 2;
2023-11-23 10:25:25 +08:00
else if (!tsk->mm) // kernel thread
2023-11-16 13:17:49 +08:00
detail->sys_task = 1;
else
detail->sys_task = 0;
2023-11-23 10:25:25 +08:00
detail->pid = tsk->pid; // pid
detail->tgid = tsk->tgid; // tgid
detail->state = tsk->__state; // state
detail->task_type = diag_get_task_type(tsk); // task type
ns = task_active_pid_ns(tsk); // container pid
2023-11-16 13:17:49 +08:00
if (ns && ns != &init_pid_ns) {
detail->container_pid = task_pid_nr_ns(tsk, ns);
detail->container_tgid = task_tgid_nr_ns(tsk, ns);
} else {
detail->container_pid = tsk->pid;
detail->container_tgid = tsk->tgid;
}
strncpy(detail->comm, tsk->comm, TASK_COMM_LEN);
2023-11-23 10:25:25 +08:00
detail->comm[TASK_COMM_LEN - 1] = 0; // comm name
2023-11-16 13:17:49 +08:00
diag_cgroup_name(tsk, detail->cgroup_buf, CGROUP_NAME_LEN, 0);
diag_cgroup_name(tsk, detail->cgroup_cpuset, CGROUP_NAME_LEN, 1);
2023-11-23 10:25:25 +08:00
detail->cgroup_buf[CGROUP_NAME_LEN - 1] = 0; // cgroup name
detail->cgroup_cpuset[CGROUP_NAME_LEN - 1] = 0; // cgroup cpuset name
2023-11-16 13:17:49 +08:00
}
void diag_task_user_stack(struct task_struct *tsk, user_stack_detail *detail) {
struct pt_regs *regs;
unsigned long sp, ip, bp;
struct task_struct *leader;
2023-11-19 20:30:48 -05:00
if (!detail) {
2023-11-16 13:17:49 +08:00
return;
2023-11-17 02:30:40 -05:00
}
2023-11-16 13:17:49 +08:00
detail->stack[0] = 0;
2023-11-19 20:30:48 -05:00
if (!tsk || !tsk->mm) {
2023-11-16 13:17:49 +08:00
return;
2023-11-17 02:30:40 -05:00
}
2023-11-16 13:17:49 +08:00
leader = tsk->group_leader;
if (!leader || !leader->mm || leader->exit_state == EXIT_ZOMBIE) {
return;
}
sp = 0;
ip = 0;
bp = 0;
regs = task_pt_regs(tsk);
if (regs) {
sp = regs->sp;
ip = regs->ip;
bp = regs->bp;
}
detail->regs = *regs;
detail->sp = sp;
detail->ip = ip;
detail->bp = bp;
if (tsk == current) {
2023-11-21 04:43:15 -05:00
printk("diag_task_user_stack %d current\n", tsk->pid);
2023-11-16 13:17:49 +08:00
diagnose_save_stack_trace_user(detail->stack);
} else {
2023-11-21 04:43:15 -05:00
printk("diag_task_user_stack %d no current\n", tsk->pid);
2023-11-16 13:17:49 +08:00
diagnose_save_stack_trace_user_remote(tsk, detail->stack);
}
}
2023-11-23 10:25:25 +08:00
unsigned int diag_task_kern_stack(struct task_struct *tsk,
kern_stack_detail *detail) {
return orig_stack_trace_save_tsk(tsk, detail->stack, BACKTRACE_DEPTH, 0);
2023-11-16 13:17:49 +08:00
}
void dump_proc_chains_argv(int style, struct task_struct *tsk, mm_tree *mm_tree,
proc_chains_detail *detail) {
struct task_struct *walker;
mm_info *mm_info;
int cnt = 0;
int i = 0;
struct task_struct *leader;
for (i = 0; i < PROCESS_CHAINS_COUNT; i++) {
detail->chains[i][0] = 0;
detail->tgid[i] = 0;
}
2023-11-23 10:25:25 +08:00
if (style == 0)
return;
2023-11-16 13:17:49 +08:00
2023-11-23 10:25:25 +08:00
if (!tsk || !tsk->mm)
return;
2023-11-16 13:17:49 +08:00
leader = tsk->group_leader;
if (!leader || !leader->mm ||
2023-11-23 10:25:25 +08:00
leader->exit_state == EXIT_ZOMBIE) { // leader is zombie or no mm
2023-11-16 13:17:49 +08:00
return;
}
rcu_read_lock();
walker = tsk;
while (walker->pid > 0) {
if (!thread_group_leader(walker))
walker = rcu_dereference(walker->group_leader);
mm_info = find_mm_info(mm_tree, walker->mm);
if (mm_info) {
if (mm_info->cgroup_buf[0] == 0)
diag_cgroup_name(walker, mm_info->cgroup_buf, 255, 0);
strncpy(detail->chains[cnt], mm_info->argv, PROCESS_ARGV_LEN);
detail->full_argv[cnt] = 1;
} else {
strncpy(detail->chains[cnt], walker->comm, TASK_COMM_LEN);
detail->full_argv[cnt] = 0;
}
detail->tgid[cnt] = walker->pid;
2023-11-20 22:27:08 -05:00
// if ((detail->tgid[cnt] != 0) | (detail->full_argv[cnt] != 0)) {
2023-11-23 10:25:25 +08:00
// printk("pid: %d,full_argv: %d, chains: %s, cnt:%d\n",
// detail->tgid[cnt],
2023-11-20 22:27:08 -05:00
// detail->full_argv[cnt], detail->chains[cnt], cnt);
// }
2023-11-16 13:17:49 +08:00
walker = rcu_dereference(walker->real_parent);
cnt++;
2023-11-23 10:25:25 +08:00
if (cnt >= PROCESS_CHAINS_COUNT)
break;
2023-11-16 13:17:49 +08:00
}
rcu_read_unlock();
2023-11-16 17:39:26 +08:00
}
2023-11-23 10:25:25 +08:00
/**
* @brief copy task raw stack
*
* @param tsk
* @param detail
*/
void diag_task_raw_stack(struct task_struct *tsk, raw_stack_detail *detail) {
struct pt_regs *regs;
int i;
int ret;
unsigned long sp, ip, bp;
char *stack;
memset(detail->stack, 0, DIAG_USER_STACK_SIZE);
detail->stack_size = 0;
if (!tsk || !tsk->mm)
return;
regs = task_pt_regs(tsk);
if (!regs)
return;
sp = regs->sp;
ip = regs->ip;
bp = regs->bp;
detail->regs = *regs;
detail->sp = sp;
detail->ip = ip;
detail->bp = bp;
stack = (char *)&detail->stack[0];
for (i = 0; i < (DIAG_USER_STACK_SIZE / 1024); i++) {
if (tsk == current) {
pagefault_disable();
ret = __copy_from_user_inatomic(
stack, (void __user *)sp + detail->stack_size, 1024);
pagefault_enable();
} else {
ret = diagnose_task_raw_stack_remote(
tsk, stack, (void __user *)sp + detail->stack_size, 1024);
}
2023-11-23 04:45:35 -05:00
printk("diag_task_raw_stack %d i:%d ret:%d\n", tsk->pid, i, ret);
2023-11-23 10:25:25 +08:00
if (ret)
break;
else
detail->stack_size += 1024;
stack += 1024;
}
}
2023-11-16 17:39:26 +08:00
/// @brief print all task stack
/// @param
// static void print_task_stack(void) {
// struct task_struct *g, *p; // g: task group; p: task
// unsigned long backtrace[BACKTRACE_DEPTH]; // save stack
// unsigned int nr_bt; // stack depth
// unsigned long long current_time; // last time
// current_time = ktime_get_real();
// printk("Timestamp (ns): %lld\n", current_time);
// printk("Recent Load: %lu.%02lu, %lu.%02lu, %lu.%02lu\n", // recent load
// LOAD_INT(avenrun[0]), LOAD_FRAC(avenrun[0]), LOAD_INT(avenrun[1]),
// LOAD_FRAC(avenrun[1]), LOAD_INT(avenrun[2]), LOAD_FRAC(avenrun[2]));
// rcu_read_lock(); // lock run queue
// // printk("Running task\n");
// do_each_thread(g, p) {
// if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) ||
// p->__state == TASK_IDLE) {
// printk("task: %s, pid %d, state %d\n", p->comm, p->pid,
// p->__state); //! todo
// nr_bt = orig_stack_trace_save_tsk(p, backtrace, BACKTRACE_DEPTH, 0);
// stack_trace_print(backtrace, nr_bt, 0); // print
// }
// }
// while_each_thread(g, p);
// rcu_read_unlock(); // unlock run queue
2023-11-19 20:30:48 -05:00
// }
// void diag_printf_kern_stack(kern_stack_detail *kern_stack, int reverse) {
// int i;
// symbol sym;
// printf(" 内核态堆栈:\n");
// if (reverse) {
// for (i = BACKTRACE_DEPTH - 1; i >= 0; i--) {
// if (kern_stack->stack[i] == (size_t)-1 || kern_stack->stack[i] == 0) {
// continue;
// }
// sym.reset(kern_stack->stack[i]);
// if (g_symbol_parser.find_kernel_symbol(sym)) {
2023-11-23 10:25:25 +08:00
// printf("#@ 0x%lx %s ([kernel.kallsyms])\n",
// kern_stack->stack[i],
2023-11-19 20:30:48 -05:00
// sym.name.c_str());
// } else {
// printf("#@ 0x%lx %s\n", kern_stack->stack[i], "UNKNOWN");
// }
// }
// } else {
// for (i = 0; i < BACKTRACE_DEPTH; i++) {
// if (kern_stack->stack[i] == (size_t)-1 || kern_stack->stack[i] == 0) {
// break;
// }
// sym.reset(kern_stack->stack[i]);
// if (g_symbol_parser.find_kernel_symbol(sym)) {
2023-11-23 10:25:25 +08:00
// printf("#@ 0x%lx %s ([kernel.kallsyms])\n",
// kern_stack->stack[i],
2023-11-19 20:30:48 -05:00
// sym.name.c_str());
// } else {
// printf("#@ 0x%lx %s\n", kern_stack->stack[i], "UNKNOWN");
// }
// }
// }
// }
// void diag_printf_kern_stack(struct diag_kern_stack_detail *kern_stack) {
// diag_printf_kern_stack(kern_stack, 0);
// }