From a7e4c1e4fa85714a9e062afe5529218871bb959f Mon Sep 17 00:00:00 2001 From: zy Date: Thu, 14 Dec 2023 02:22:17 -0500 Subject: [PATCH] ucli perf software irq --- source/Makefile | 1 + source/module/monitor_kallsyms.c | 5 + source/module/monitor_kallsyms.h | 5 +- source/module/monitor_kernel.c | 15 +++ source/module/monitor_kernel.h | 6 +- source/module/monitor_kernel_lib.c | 149 +++++++++++++++++++++-------- source/module/monitor_perf.c | 50 ++++++---- source/module/monitor_proc.c | 17 ++++ source/module/monitor_proc.h | 4 +- source/module/monitor_trace.c | 36 +++---- source/module/monitor_trace.h | 1 + source/ucli/Makefile | 2 +- 12 files changed, 207 insertions(+), 84 deletions(-) diff --git a/source/Makefile b/source/Makefile index c0fb1d6..5f0f0c4 100644 --- a/source/Makefile +++ b/source/Makefile @@ -7,6 +7,7 @@ variable_monitor-objs := module/monitor_kernel.o \ module/monitor_timer.o \ module/monitor_trace.o \ module/monitor_proc.o \ + module/monitor_perf.o \ buffer/trace_buffer.o \ buffer/variant_buffer.o \ diff --git a/source/module/monitor_kallsyms.c b/source/module/monitor_kallsyms.c index 182412e..7111967 100644 --- a/source/module/monitor_kallsyms.c +++ b/source/module/monitor_kallsyms.c @@ -19,6 +19,9 @@ int (*orig_access_remote_vm)(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); struct task_struct *(*orig_find_task_by_vpid)(pid_t nr); +void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *)); +void (*orig_raise_softirq)(unsigned int nr); + /** * @brief diag_kallsyms_lookup_name init * @@ -59,5 +62,7 @@ int init_orig_fun(void) { get_task_type); // get_task_type | this fun is not available on 5.17.15 LOOKUP_SYMS_NORET(kernfs_name); // kernfs_name LOOKUP_SYMS_NORET(find_task_by_vpid); + LOOKUP_SYMS_NORET(open_softirq); + LOOKUP_SYMS_NORET(raise_softirq); return 0; } diff --git a/source/module/monitor_kallsyms.h b/source/module/monitor_kallsyms.h index c77e7bd..cab6893 100644 --- a/source/module/monitor_kallsyms.h +++ b/source/module/monitor_kallsyms.h @@ -43,4 +43,7 @@ extern int (*orig_access_remote_vm)( struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); // read remote memory extern struct task_struct *(*orig_find_task_by_vpid)( - pid_t nr); // find task by pid \ No newline at end of file + pid_t nr); // find task by pid + +extern void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *)); +extern void (*orig_raise_softirq)(unsigned int nr); \ No newline at end of file diff --git a/source/module/monitor_kernel.c b/source/module/monitor_kernel.c index 2ee7838..2695266 100644 --- a/source/module/monitor_kernel.c +++ b/source/module/monitor_kernel.c @@ -10,6 +10,19 @@ #define DEVICE_NAME "variable_monitor" +#include + +enum { + MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用 +}; + +static void my_softirq_handler(struct softirq_action *action) +{ + // 这是软中断处理函数,它不能睡眠,必须快速执行 + // 在这里调用 get_raw_stack + diag_task_all(); +} + // for character device static dev_t dev_num; static struct cdev *watch_cdev; @@ -177,6 +190,8 @@ int init_module(void) { // orig_X | buffer monitor_init(); + orig_open_softirq(MY_SOFTIRQ, my_softirq_handler); + return 0; } diff --git a/source/module/monitor_kernel.h b/source/module/monitor_kernel.h index e879c7f..ae927df 100644 --- a/source/module/monitor_kernel.h +++ b/source/module/monitor_kernel.h @@ -34,5 +34,9 @@ void clear_watch(pid_t pid); // for release enum hrtimer_restart check_variable_cb(struct hrtimer *timer); // hrtimer callback +// for test int diag_pid(int id); // for test -int diag_tgid(int id); // for test \ No newline at end of file +int diag_tgid(int id); // for test +void diag_task_info(kernel_watch_timer *k_watch_timer); +void diag_task_all(void); +void diag_task_by_tgid(pid_t tgid); \ No newline at end of file diff --git a/source/module/monitor_kernel_lib.c b/source/module/monitor_kernel_lib.c index 508e35b..5285807 100644 --- a/source/module/monitor_kernel_lib.c +++ b/source/module/monitor_kernel_lib.c @@ -2,6 +2,7 @@ #include #include +#include // #include // #include @@ -119,11 +120,14 @@ static void diag_tsk(struct task_struct *p, variable_monitor_task *tsk_info) { unsigned int nr_bt; // printk(KERN_INFO "diag_tsk\n"); diag_task_brief(p, &tsk_info->task); // task brief - // diag_task_user_stack(p, &tsk_info->user_stack); // user stack - nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack - dump_proc_chains_argv(1, p, &mm_tree_struct, - &tsk_info->proc_chains); // proc chains - diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack + if (tsk_info->task.sys_task == 1) { // system task + nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack + } else { // other task + nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack + dump_proc_chains_argv(1, p, &mm_tree_struct, + &tsk_info->proc_chains); // proc chains + diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack + } } static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info, @@ -203,31 +207,25 @@ static void clear_all_watch(void) { memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list)); } -void diag_task_info(kernel_watch_timer *k_watch_timer) { - if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached - return; +static void diag_vm_record(kernel_watch_timer *k_watch_timer, + unsigned char is_print) { + static variable_monitor_record vm_record; - // printk(KERN_INFO "diag_task_info_work\n"); - - struct task_struct *g, *p; // g: task group; p: task - unsigned long flags; - unsigned long event_id = get_cycles(); - - static variable_monitor_task tsk_info = {0}; - static variable_monitor_record vm_record = {0}; kernel_watch_arg *kwarg; + int i; + unsigned long flags; + + unsigned long event_id = get_cycles(); vm_record.id = event_id; vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE; vm_record.tv = k_watch_timer->tv; vm_record.threshold_over_count = k_watch_timer->threshold_over_count; - int i; for (i = 0; i < vm_record.threshold_over_count; i++) { kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]]; k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]); } - rcu_read_lock(); diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); @@ -236,21 +234,92 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) { diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record, sizeof(variable_monitor_record)); diag_variant_buffer_seal(&load_monitor_variant_buffer); - // diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); - // for task info + diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); + rcu_read_unlock(); + + if (is_print) { + printk(KERN_INFO "-----------variable monitor----------\n"); + printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n", + vm_record.tv, ktime_get_real() - vm_record.tv); + + for (i = 0; i < vm_record.threshold_over_count; i++) { + printk( + KERN_INFO + "\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n", + vm_record.threshold_record[i].task_id, + vm_record.threshold_record[i] + .name, // Assuming name is a null-terminated string + vm_record.threshold_record[i].ptr, + vm_record.threshold_record[i].threshold, + vm_record.threshold_record[i].true_value); + } + + printk(KERN_INFO "-------------------------------------\n"); + } +} + +void diag_task_by_tgid(pid_t tgid){ + + struct task_struct *tsk; + int ret; + unsigned long flags; + + static variable_monitor_task tsk_info; + + tsk = NULL; + rcu_read_lock(); + tsk = NULL; + if (orig_find_task_by_vpid) + tsk = orig_find_task_by_vpid(tgid); + if (!tsk) { + ret = -EINVAL; + rcu_read_unlock(); + return ; + } + + diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); + + struct task_struct *thread = tsk; + unsigned long event_id = get_cycles(); + + while_each_thread(tsk, thread) { + + tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; + tsk_info.id = event_id; + tsk_info.tv = ktime_get_real(); + diag_tsk(tsk, &tsk_info); + + push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer + } + diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); + rcu_read_unlock(); +} + +void diag_task_all(void){ + pr_info("diag_task_all, tv %lld\n", ktime_get_real()); + + static variable_monitor_task tsk_info; + + unsigned long event_id = get_cycles(); + struct task_struct *g, *p; // g: task group; p: task + unsigned long flags; + + rcu_read_lock(); + diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); + do_each_thread(g, p) { if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) || ((READ_ONCE(p->__state) & TASK_IDLE) != 0)) { - get_task_struct(p); // count +1 + // get_task_struct(p); // count +1 tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; tsk_info.id = event_id; - tsk_info.tv = vm_record.tv; + tsk_info.tv = ktime_get_real(); diag_tsk(p, &tsk_info); - put_task_struct(p); // count -1 + // put_task_struct(p); // count -1 push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer } @@ -259,24 +328,14 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) { diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); rcu_read_unlock(); + pr_info("diag_task_all, finish tv %lld\n", ktime_get_real()); +} - printk(KERN_INFO "-----------variable monitor----------\n"); - printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n", - vm_record.tv, ktime_get_real() - vm_record.tv); - - for (i = 0; i < vm_record.threshold_over_count; i++) { - printk(KERN_INFO - "\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n", - vm_record.threshold_record[i].task_id, - vm_record.threshold_record[i] - .name, // Assuming name is a null-terminated string - vm_record.threshold_record[i].ptr, - vm_record.threshold_record[i].threshold, - vm_record.threshold_record[i].true_value); - } - - printk(KERN_INFO "-------------------------------------\n"); - return; +void diag_task_info(kernel_watch_timer *k_watch_timer) { + if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached + return; + diag_task_all(); + diag_vm_record(k_watch_timer, 1); } /** @@ -400,6 +459,10 @@ void clear_watch(pid_t pid) { start_all_hrTimer(); // restart timer } +// enum { +// MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用 +// }; + /** * @brief main callback function * @@ -429,9 +492,13 @@ enum hrtimer_restart check_variable_cb(struct hrtimer *timer) { { k_watch_timer->threshold_over_count = j; k_watch_timer->tv = ktime_get_real(); + pr_info("threshold reached, tv %lld\n", k_watch_timer->tv); // highpri_wq - queue_work(system_highpri_wq, &k_watch_timer->wk); + // queue_work(system_highpri_wq, &k_watch_timer->wk); diag_task_info(k_watch_timer); + + // orig_raise_softirq(MY_SOFTIRQ); // for test + // restart timer after dump_reset_sec sec hrtimer_forward(timer, timer->base->get_time(), ktime_set(dump_reset_sec, 0)); diff --git a/source/module/monitor_perf.c b/source/module/monitor_perf.c index 549d7ac..76df6f3 100644 --- a/source/module/monitor_perf.c +++ b/source/module/monitor_perf.c @@ -3,13 +3,15 @@ static struct perf_event *pe; void vm_perf_overflow_callback(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) { + struct perf_sample_data *data, + struct pt_regs *regs) { // handle perf event data // struct perf_callchain_entry *callchain; // int nr, i; pr_info("perf event callback\n"); + + // perf_event_disable(event); // 如果 perf_sample_data 有调用堆栈信息 // if (data->callchain) { @@ -42,10 +44,10 @@ void vm_perf_overflow_callback(struct perf_event *event, struct perf_event_attr pea = { .type = PERF_TYPE_SOFTWARE, .size = sizeof(struct perf_event_attr), - .config = PERF_COUNT_SW_CPU_CLOCK, + .config = PERF_COUNT_SW_DUMMY, .sample_period = 1, .sample_type = PERF_SAMPLE_CALLCHAIN, - // .disabled = 1, + .disabled = 1, }; #include @@ -61,32 +63,38 @@ void setup_perf_event_for_task(struct task_struct *tsk) { pr_info("Perf event already created\n"); return; } - int cpu; - struct perf_event **events; - for_each_possible_cpu(cpu) { - struct perf_event **event = per_cpu_ptr(events, cpu); - if (cpu_is_offline(cpu)) { - *event = NULL; - continue; - } - *event = perf_event_create_kernel_counter(&pea, cpu, tsk, - vm_perf_overflow_callback, NULL); - if (IS_ERR(*event)) { - printk(KERN_INFO "create perf event failure\n"); - // return -1; - } - } + // int cpu; + // struct perf_event **events; + // for_each_possible_cpu(cpu) { + // struct perf_event **event = per_cpu_ptr(events, cpu); + // if (cpu_is_offline(cpu)) { + // pr_info("cpu %d is offline\n", cpu); + // *event = NULL; + // continue; + // } + // *event = perf_event_create_kernel_counter(&pea, cpu, tsk, + // vm_perf_overflow_callback, NULL); + + // // perf_event_create_kernel_counter(&pea, cpu, tsk, + // // vm_perf_overflow_callback, + // // NULL); + // if (IS_ERR(*event)) { + // printk(KERN_INFO "create perf event failure\n"); + // // return -1; + // } + // // perf_event_enable(*event); + // } // pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk, // vm_perf_callback, NULL); - pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk, + pe = perf_event_create_kernel_counter(&pea, -1, tsk, vm_perf_overflow_callback, NULL); if (IS_ERR(pe)) { pr_info("Error in perf_event_create_kernel_counter\n"); return; } - // perf_event_enable(pe); // enable perf event + perf_event_enable(pe); // enable perf event } /** diff --git a/source/module/monitor_proc.c b/source/module/monitor_proc.c index 9603d5b..f7960f8 100644 --- a/source/module/monitor_proc.c +++ b/source/module/monitor_proc.c @@ -8,6 +8,7 @@ const char* proc_dir = "variable_monitor"; int def_interval_ns = DEFAULT_INTERVAL_NS; int dump_reset_sec = DEFAULT_DUMP_RESET_SEC; +int sample_all = DEFAULT_SAMPLE_ALL; static ssize_t read_proc(struct file *file, char __user *buf, size_t count, loff_t *offset, int *var) { @@ -54,6 +55,16 @@ static ssize_t write_proc_dump_reset_sec(struct file *file, return write_proc(file, buf, count, offset, &dump_reset_sec); } +static ssize_t read_proc_sample_all(struct file *file, char __user *buf, + size_t count, loff_t *offset) { + return read_proc(file, buf, count, offset, &sample_all); +} +static ssize_t write_proc_sample_all(struct file *file, + const char __user *buf, size_t count, + loff_t *offset) { + return write_proc(file, buf, count, offset, &sample_all); +} + static const struct proc_ops proc_def_interval_ns_ops = { .proc_read = read_proc_def_interval_ns, .proc_write = write_proc_def_interval_ns, @@ -64,6 +75,11 @@ static const struct proc_ops proc_dump_reset_sec_ops = { .proc_write = write_proc_dump_reset_sec, }; +static const struct proc_ops proc_sample_all_ops = { + .proc_read = read_proc_sample_all, + .proc_write = write_proc_sample_all, +}; + int monitor_proc_init(void) { struct proc_dir_entry *dir; @@ -75,6 +91,7 @@ int monitor_proc_init(void) { proc_create("def_interval_ns", 0666, dir, &proc_def_interval_ns_ops); proc_create("dump_reset_sec", 0666, dir, &proc_dump_reset_sec_ops); + proc_create("sample_all", 0666, dir, &proc_sample_all_ops); return 0; } diff --git a/source/module/monitor_proc.h b/source/module/monitor_proc.h index 33a3ead..8ed4efc 100644 --- a/source/module/monitor_proc.h +++ b/source/module/monitor_proc.h @@ -2,10 +2,12 @@ #define MODULE_MONITOR_PROC_H #define DEFAULT_INTERVAL_NS 10000 // 10us -#define DEFAULT_DUMP_RESET_SEC 60 // 60s +#define DEFAULT_DUMP_RESET_SEC 10 // 60s +#define DEFAULT_SAMPLE_ALL 0 extern int def_interval_ns; extern int dump_reset_sec; +extern int sample_all; int monitor_proc_init(void); int monitor_proc_exit(void); diff --git a/source/module/monitor_trace.c b/source/module/monitor_trace.c index 8a0d59a..d9344ed 100644 --- a/source/module/monitor_trace.c +++ b/source/module/monitor_trace.c @@ -135,17 +135,17 @@ static inline void save_stack_trace_user_remote(struct task_struct *tsk, const void __user *fp = (const void __user *)regs->bp; int count = 0; - if (in_atomic()) { - printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n", - tsk->pid); - return; - } + // if (in_atomic()) { + // printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n", + // tsk->pid); + // return; + // } - if (irqs_disabled()) { - printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n", - tsk->pid); - return; - } + // if (irqs_disabled()) { + // printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n", + // tsk->pid); + // return; + // } if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = regs->ip; @@ -288,15 +288,15 @@ static int diagnose_task_raw_stack_remote(struct task_struct *tsk, void *to, int ret; struct mm_struct *mm; - if (in_atomic()) { - // printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid); - return 0; - } + // if (in_atomic()) { + // printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid); + // return 0; + // } - if (irqs_disabled()) { - // printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid); - return 0; - } + // if (irqs_disabled()) { + // printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid); + // return 0; + // } if (in_atomic() || irqs_disabled()) { return 0; diff --git a/source/module/monitor_trace.h b/source/module/monitor_trace.h index 4098f1a..a0bc1c5 100644 --- a/source/module/monitor_trace.h +++ b/source/module/monitor_trace.h @@ -59,6 +59,7 @@ typedef struct { unsigned long sys_task; /** * 1->user mode 0->sys mode -1->unknown + * may not be accurate */ unsigned long user_mode; char comm[TASK_COMM_LEN]; diff --git a/source/ucli/Makefile b/source/ucli/Makefile index dd0bf10..ae7625d 100644 --- a/source/ucli/Makefile +++ b/source/ucli/Makefile @@ -1,6 +1,6 @@ OUTPUT_DIR ?= . TARGET_EXE=ucli -SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc +SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc helpfun.cc OBJECTS=$(SOURCES:.cc=.o) CFLAGS=-g -O0