549 lines
14 KiB
C
549 lines
14 KiB
C
#include "monitor_trace.h"
|
||
|
||
#include <asm/processor.h>
|
||
#include <asm/ptrace.h>
|
||
#include <asm/syscall.h> // for syscall_get_nr
|
||
#include <linux/irq.h>
|
||
#include <linux/rcupdate.h>
|
||
#include <linux/sched/mm.h> // for get_task_mm
|
||
#include <linux/syscalls.h>
|
||
#include <linux/tracehook.h>
|
||
|
||
mm_tree mm_tree_struct;
|
||
struct diag_variant_buffer load_monitor_variant_buffer;
|
||
struct diag_variant_buffer stand_alone_buffer;
|
||
|
||
typedef struct {
|
||
struct rcu_head rcu_head;
|
||
pid_t pid;
|
||
struct mm_struct *mm;
|
||
char cgroup_buf[256];
|
||
char argv[256];
|
||
} mm_info;
|
||
|
||
struct stack_trace {
|
||
unsigned int nr_entries, max_entries;
|
||
unsigned long *entries;
|
||
int skip; /* input argument: How many entries to skip */
|
||
};
|
||
|
||
struct stack_frame_user {
|
||
const void __user *next_fp;
|
||
unsigned long ret_addr;
|
||
};
|
||
|
||
static inline int diag_get_task_type(struct task_struct *tsk) {
|
||
if (orig_get_task_type)
|
||
return orig_get_task_type(&tsk->se);
|
||
return 0;
|
||
}
|
||
|
||
static inline int orig_diag_cgroup_name(struct cgroup *cgrp, char *buf,
|
||
size_t buflen) {
|
||
if (orig_kernfs_name && cgrp && cgrp->kn) {
|
||
return orig_kernfs_name(cgrp->kn, buf, buflen);
|
||
} else {
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @brief find mm_info by mm
|
||
*
|
||
* @param mm_tree
|
||
* @param mm
|
||
* @return mm_info*
|
||
*/
|
||
static inline mm_info *find_mm_info(mm_tree *mm_tree, struct mm_struct *mm) {
|
||
mm_info *info;
|
||
if (mm == NULL)
|
||
return NULL;
|
||
info = radix_tree_lookup(&mm_tree->mm_tree, (unsigned long)mm);
|
||
return info;
|
||
}
|
||
|
||
static void __diag_cgroup_name(struct task_struct *tsk, char *buf,
|
||
unsigned int count, int cgroup) {
|
||
int cgroup_id = cpuacct_cgrp_id;
|
||
|
||
memset(buf, 0, count);
|
||
|
||
if (cgroup == 1) {
|
||
cgroup_id = cpuset_cgrp_id;
|
||
}
|
||
|
||
if (tsk && tsk->cgroups && tsk->cgroups->subsys &&
|
||
tsk->cgroups->subsys[cgroup_id] &&
|
||
tsk->cgroups->subsys[cgroup_id]->cgroup) {
|
||
orig_diag_cgroup_name(tsk->cgroups->subsys[cgroup_id]->cgroup, buf, count);
|
||
}
|
||
}
|
||
|
||
static void diag_cgroup_name(struct task_struct *tsk, char *buf,
|
||
unsigned int count, int cgroup) {
|
||
__diag_cgroup_name(tsk, buf, count, cgroup);
|
||
}
|
||
|
||
/**
|
||
* @brief copy stack frame by fp
|
||
*
|
||
* @param fp
|
||
* @param frame
|
||
* @return int
|
||
*/
|
||
static int copy_stack_frame(const void __user *fp,
|
||
struct stack_frame_user *frame) {
|
||
int ret;
|
||
|
||
ret = 1;
|
||
pagefault_disable();
|
||
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
|
||
ret = 0;
|
||
pagefault_enable();
|
||
|
||
return ret;
|
||
}
|
||
|
||
static int copy_stack_frame_remote(struct task_struct *tsk,
|
||
const void __user *fp,
|
||
struct stack_frame_user *frame) {
|
||
int ret;
|
||
struct mm_struct *mm;
|
||
|
||
mm = get_task_mm(tsk);
|
||
if (!mm) {
|
||
printk(KERN_INFO "copy_stack_frame_remote %d get_task_mm fail\n", tsk->pid);
|
||
return 0;
|
||
}
|
||
|
||
ret = orig_access_remote_vm(mm, (unsigned long)fp, frame, sizeof(*frame), 0);
|
||
// printk(KERN_INFO "copy_stack_frame_remote %d ret:%d\n", tsk->pid, ret);
|
||
mmput(mm);
|
||
|
||
return ret;
|
||
}
|
||
|
||
/**
|
||
* @brief save stack trace | not current task
|
||
*
|
||
* @param tsk
|
||
* @param trace
|
||
*/
|
||
static inline void save_stack_trace_user_remote(struct task_struct *tsk,
|
||
struct stack_trace *trace) {
|
||
const struct pt_regs *regs = task_pt_regs(tsk);
|
||
const void __user *fp = (const void __user *)regs->bp;
|
||
int count = 0;
|
||
|
||
// if (in_atomic()) {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n",
|
||
// tsk->pid);
|
||
// return;
|
||
// }
|
||
|
||
// if (irqs_disabled()) {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n",
|
||
// tsk->pid);
|
||
// return;
|
||
// }
|
||
|
||
if (trace->nr_entries < trace->max_entries)
|
||
trace->entries[trace->nr_entries++] = regs->ip;
|
||
|
||
while (trace->nr_entries < trace->max_entries) {
|
||
struct stack_frame_user frame;
|
||
|
||
frame.next_fp = NULL;
|
||
frame.ret_addr = 0;
|
||
|
||
if (!copy_stack_frame_remote(tsk, fp, &frame)) {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d
|
||
// copy_stack_frame_remote fail\n",
|
||
// tsk->pid);
|
||
break;
|
||
}
|
||
|
||
if ((unsigned long)fp < regs->sp) {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d fp < sp count:%d\n",
|
||
// tsk->pid,
|
||
// count);
|
||
break; // 如果fp小于sp,说明已经到了栈底,退出
|
||
}
|
||
// 如果返回地址不为0,说明是一个有效的栈帧,保存返回地址
|
||
if (frame.ret_addr) {
|
||
trace->entries[trace->nr_entries++] = frame.ret_addr;
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d ret_addr:%lx\n",
|
||
// tsk->pid,
|
||
// frame.ret_addr);
|
||
} else {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d no ret_addr",
|
||
// tsk->pid);
|
||
break;
|
||
}
|
||
|
||
// 如果fp指向自己,说明已经到了栈底,退出
|
||
if (fp == frame.next_fp) {
|
||
// printk(KERN_INFO "save_stack_trace_user_remote %d fp == next_fp",
|
||
// tsk->pid);
|
||
break;
|
||
}
|
||
fp = frame.next_fp; // 否则,继续向下遍历
|
||
|
||
count++;
|
||
/**
|
||
* 线上环境发现这里有hardlockup,这里强制退出
|
||
*/
|
||
if (count >= trace->max_entries || count >= 100)
|
||
break;
|
||
}
|
||
}
|
||
|
||
static inline void __save_stack_trace_user(struct stack_trace *trace) {
|
||
const struct pt_regs *regs = task_pt_regs(current);
|
||
const void __user *fp = (const void __user *)regs->bp;
|
||
int count = 0;
|
||
|
||
if (trace->nr_entries < trace->max_entries)
|
||
trace->entries[trace->nr_entries++] = regs->ip;
|
||
|
||
while (trace->nr_entries < trace->max_entries) {
|
||
struct stack_frame_user frame;
|
||
|
||
frame.next_fp = NULL;
|
||
frame.ret_addr = 0;
|
||
if (!copy_stack_frame(fp, &frame))
|
||
break;
|
||
if ((unsigned long)fp < regs->sp)
|
||
break;
|
||
if (frame.ret_addr) {
|
||
trace->entries[trace->nr_entries++] = frame.ret_addr;
|
||
}
|
||
if (fp == frame.next_fp)
|
||
break;
|
||
fp = frame.next_fp;
|
||
count++;
|
||
/**
|
||
* 线上环境发现这里有hardlockup,这里强制退出
|
||
*/
|
||
if (count >= trace->max_entries || count >= 100)
|
||
break;
|
||
}
|
||
}
|
||
|
||
// static void perfect_save_stack_trace_user(struct stack_trace *trace) {
|
||
// /*
|
||
// * Trace user stack if we are not a kernel thread
|
||
// */
|
||
// if (current->mm) {
|
||
// __save_stack_trace_user(trace);
|
||
// }
|
||
// if (trace->nr_entries < trace->max_entries)
|
||
// trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||
// }
|
||
|
||
/**
|
||
* @brief save stack trace | current task
|
||
*
|
||
* @param backtrace
|
||
*/
|
||
// static void diagnose_save_stack_trace_user(unsigned long *backtrace) {
|
||
// struct stack_trace trace;
|
||
|
||
// memset(&trace, 0, sizeof(trace));
|
||
// memset(backtrace, 0, BACKTRACE_DEPTH * sizeof(unsigned long));
|
||
// trace.max_entries = BACKTRACE_DEPTH;
|
||
// trace.entries = backtrace;
|
||
// perfect_save_stack_trace_user(&trace);
|
||
// }
|
||
|
||
/**
|
||
* @brief save stack trace | not current task
|
||
*
|
||
* @param tsk
|
||
* @param backtrace
|
||
*/
|
||
|
||
// static void diagnose_save_stack_trace_user_remote(struct task_struct *tsk,
|
||
// unsigned long *backtrace) {
|
||
// struct stack_trace trace;
|
||
|
||
// memset(&trace, 0, sizeof(trace));
|
||
// memset(backtrace, 0, BACKTRACE_DEPTH * sizeof(unsigned long));
|
||
// trace.max_entries = BACKTRACE_DEPTH;
|
||
// trace.entries = backtrace;
|
||
|
||
// /*
|
||
// * Trace user stack if we are not a kernel thread
|
||
// */
|
||
// if (tsk->mm) {
|
||
// // printk(KERN_INFO "save_stack_trace_user_remote %d mm\n", tsk->pid);
|
||
// save_stack_trace_user_remote(tsk, &trace);
|
||
// }
|
||
// if (trace.nr_entries < trace.max_entries)
|
||
// trace.entries[trace.nr_entries++] = ULONG_MAX;
|
||
// }
|
||
|
||
static int diagnose_task_raw_stack_remote(struct task_struct *tsk, void *to,
|
||
void __user *from, unsigned long n) {
|
||
int ret;
|
||
struct mm_struct *mm;
|
||
|
||
// if (in_atomic()) {
|
||
// printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid);
|
||
// return 0;
|
||
// }
|
||
|
||
// if (irqs_disabled()) {
|
||
// printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid);
|
||
// return 0;
|
||
// }
|
||
|
||
if (in_atomic() || irqs_disabled()) {
|
||
return 0;
|
||
}
|
||
|
||
mm = get_task_mm(tsk);
|
||
if (!mm)
|
||
return 0;
|
||
|
||
ret = orig_access_remote_vm(mm, (unsigned long)from, to, n, 0);
|
||
mmput(mm);
|
||
|
||
// printk(KERN_INFO "task_raw_stack_remote %d access_remote_vm ret: %d\n",
|
||
// tsk->pid, ret);
|
||
|
||
return ret < 0 ? ret : 0;
|
||
}
|
||
|
||
void diag_task_brief(struct task_struct *tsk, task_detail *detail) {
|
||
struct pid_namespace *ns;
|
||
struct pt_regs *task_regs;
|
||
struct task_struct *leader;
|
||
struct pt_regs *irq_regs;
|
||
|
||
if (!detail)
|
||
return;
|
||
|
||
memset(detail, 0, sizeof(task_detail));
|
||
|
||
if (!tsk || tsk->exit_state == EXIT_ZOMBIE) // zombie
|
||
return;
|
||
leader = tsk->group_leader;
|
||
if (!leader || leader->exit_state == EXIT_ZOMBIE) {
|
||
return;
|
||
}
|
||
|
||
if (tsk != current) { // not current task
|
||
detail->user_mode = -1;
|
||
detail->syscallno = -1;
|
||
} else if (!tsk->mm) { // current task but kernel thread
|
||
detail->user_mode = 0;
|
||
detail->syscallno = -1;
|
||
} else { // current task and user thread
|
||
irq_regs = get_irq_regs(); // get current irq regs
|
||
task_regs = task_pt_regs(tsk);
|
||
|
||
if ((irq_regs && user_mode(irq_regs)) ||
|
||
(task_regs && user_mode(task_regs))) {
|
||
detail->user_mode = 1; // user mode
|
||
} else {
|
||
detail->user_mode = 0; // kernel mode
|
||
}
|
||
|
||
if (task_regs) {
|
||
detail->syscallno = syscall_get_nr(tsk, task_regs); // get syscall no
|
||
}
|
||
}
|
||
|
||
if (tsk->sched_class == orig_idle_sched_class) // idle task
|
||
detail->sys_task = 2;
|
||
else if (!tsk->mm) // kernel thread
|
||
detail->sys_task = 1;
|
||
else
|
||
detail->sys_task = 0;
|
||
|
||
detail->pid = tsk->pid; // pid
|
||
detail->tgid = tsk->tgid; // tgid
|
||
detail->state = tsk->__state; // state
|
||
detail->task_type = diag_get_task_type(tsk); // task type
|
||
ns = task_active_pid_ns(tsk); // container pid
|
||
if (ns && ns != &init_pid_ns) {
|
||
detail->container_pid = task_pid_nr_ns(tsk, ns);
|
||
detail->container_tgid = task_tgid_nr_ns(tsk, ns);
|
||
} else {
|
||
detail->container_pid = tsk->pid;
|
||
detail->container_tgid = tsk->tgid;
|
||
}
|
||
strncpy(detail->comm, tsk->comm, TASK_COMM_LEN);
|
||
detail->comm[TASK_COMM_LEN - 1] = 0; // comm name
|
||
diag_cgroup_name(tsk, detail->cgroup_buf, CGROUP_NAME_LEN, 0);
|
||
diag_cgroup_name(tsk, detail->cgroup_cpuset, CGROUP_NAME_LEN, 1);
|
||
|
||
detail->cgroup_buf[CGROUP_NAME_LEN - 1] = 0; // cgroup name
|
||
detail->cgroup_cpuset[CGROUP_NAME_LEN - 1] = 0; // cgroup cpuset name
|
||
}
|
||
|
||
// void diag_task_user_stack(struct task_struct *tsk, user_stack_detail *detail)
|
||
// {
|
||
// struct pt_regs *regs;
|
||
// unsigned long sp, ip, bp;
|
||
// struct task_struct *leader;
|
||
|
||
// if (!detail) {
|
||
// return;
|
||
// }
|
||
|
||
// detail->stack[0] = 0;
|
||
// if (!tsk || !tsk->mm) {
|
||
// return;
|
||
// }
|
||
|
||
// leader = tsk->group_leader;
|
||
// if (!leader || !leader->mm || leader->exit_state == EXIT_ZOMBIE) {
|
||
// return;
|
||
// }
|
||
|
||
// sp = 0;
|
||
// ip = 0;
|
||
// bp = 0;
|
||
// regs = task_pt_regs(tsk);
|
||
// if (regs) {
|
||
// sp = regs->sp;
|
||
// ip = regs->ip;
|
||
// bp = regs->bp;
|
||
// }
|
||
// detail->regs = *regs;
|
||
// detail->sp = sp;
|
||
// detail->ip = ip;
|
||
// detail->bp = bp;
|
||
|
||
// if (tsk == current) {
|
||
// // printk(KERN_INFO "diag_task_user_stack %d current\n", tsk->pid);
|
||
// diagnose_save_stack_trace_user(detail->stack);
|
||
// } else {
|
||
// // printk(KERN_INFO "diag_task_user_stack %d no current\n", tsk->pid);
|
||
// diagnose_save_stack_trace_user_remote(tsk, detail->stack);
|
||
// }
|
||
// }
|
||
|
||
/**
|
||
* @brief diag task kernel stack | -> to orig_stack_trace_save_tsk
|
||
*
|
||
* @param tsk
|
||
* @param detail
|
||
* @return unsigned int
|
||
*/
|
||
unsigned int diag_task_kern_stack(struct task_struct *tsk,
|
||
kern_stack_detail *detail) {
|
||
return orig_stack_trace_save_tsk(tsk, detail->stack, BACKTRACE_DEPTH, 0);
|
||
}
|
||
|
||
/**
|
||
* @brief diag task proc chains
|
||
*
|
||
* @param style
|
||
* @param tsk
|
||
* @param mm_tree
|
||
* @param detail
|
||
*/
|
||
void dump_proc_chains_argv(int style, struct task_struct *tsk, mm_tree *mm_tree,
|
||
proc_chains_detail *detail) {
|
||
struct task_struct *walker;
|
||
mm_info *mm_info;
|
||
int cnt = 0;
|
||
int i = 0;
|
||
struct task_struct *leader;
|
||
|
||
for (i = 0; i < PROCESS_CHAINS_COUNT; i++) {
|
||
detail->chains[i][0] = 0;
|
||
detail->tgid[i] = 0;
|
||
}
|
||
if (style == 0)
|
||
return;
|
||
|
||
if (!tsk || !tsk->mm)
|
||
return;
|
||
|
||
leader = tsk->group_leader;
|
||
if (!leader || !leader->mm ||
|
||
leader->exit_state == EXIT_ZOMBIE) { // leader is zombie or no mm
|
||
return;
|
||
}
|
||
|
||
rcu_read_lock();
|
||
walker = tsk;
|
||
|
||
while (walker->pid > 0) {
|
||
if (!thread_group_leader(walker))
|
||
walker = rcu_dereference(walker->group_leader);
|
||
mm_info = find_mm_info(mm_tree, walker->mm);
|
||
if (mm_info) {
|
||
if (mm_info->cgroup_buf[0] == 0)
|
||
diag_cgroup_name(walker, mm_info->cgroup_buf, 255, 0);
|
||
strncpy(detail->chains[cnt], mm_info->argv, PROCESS_ARGV_LEN);
|
||
detail->full_argv[cnt] = 1;
|
||
} else {
|
||
strncpy(detail->chains[cnt], walker->comm, TASK_COMM_LEN);
|
||
detail->full_argv[cnt] = 0;
|
||
}
|
||
detail->tgid[cnt] = walker->pid;
|
||
walker = rcu_dereference(walker->real_parent);
|
||
cnt++;
|
||
if (cnt >= PROCESS_CHAINS_COUNT)
|
||
break;
|
||
}
|
||
rcu_read_unlock();
|
||
}
|
||
|
||
/**
|
||
* @brief copy task raw stack
|
||
*
|
||
* @param tsk
|
||
* @param detail
|
||
*/
|
||
void diag_task_raw_stack(struct task_struct *tsk, raw_stack_detail *detail) {
|
||
struct pt_regs *regs;
|
||
int i;
|
||
int ret;
|
||
unsigned long sp, ip, bp;
|
||
char *stack;
|
||
|
||
memset(detail->stack, 0, DIAG_USER_STACK_SIZE);
|
||
detail->stack_size = 0;
|
||
|
||
if (!tsk || !tsk->mm)
|
||
return;
|
||
|
||
regs = task_pt_regs(tsk);
|
||
if (!regs)
|
||
return;
|
||
|
||
sp = regs->sp;
|
||
ip = regs->ip;
|
||
bp = regs->bp;
|
||
|
||
detail->regs = *regs;
|
||
detail->sp = sp;
|
||
detail->ip = ip;
|
||
detail->bp = bp;
|
||
stack = (char *)&detail->stack[0];
|
||
for (i = 0; i < (DIAG_USER_STACK_SIZE / 1024); i++) {
|
||
if (tsk == current) {
|
||
pagefault_disable();
|
||
ret = __copy_from_user_inatomic(
|
||
stack, (void __user *)sp + detail->stack_size, 1024);
|
||
pagefault_enable();
|
||
} else {
|
||
ret = diagnose_task_raw_stack_remote(
|
||
tsk, stack, (void __user *)sp + detail->stack_size, 1024);
|
||
}
|
||
// printk(KERN_INFO "diag_task_raw_stack %d i:%d ret:%d\n", tsk->pid, i,
|
||
// ret);
|
||
if (ret)
|
||
break;
|
||
else
|
||
detail->stack_size += 1024;
|
||
|
||
stack += 1024;
|
||
}
|
||
} |