ucli perf software irq

This commit is contained in:
zy
2023-12-14 02:22:17 -05:00
parent 5d9fd19ddb
commit a7e4c1e4fa
12 changed files with 207 additions and 84 deletions

View File

@@ -7,6 +7,7 @@ variable_monitor-objs := module/monitor_kernel.o \
module/monitor_timer.o \ module/monitor_timer.o \
module/monitor_trace.o \ module/monitor_trace.o \
module/monitor_proc.o \ module/monitor_proc.o \
module/monitor_perf.o \
buffer/trace_buffer.o \ buffer/trace_buffer.o \
buffer/variant_buffer.o \ buffer/variant_buffer.o \

View File

@@ -19,6 +19,9 @@ int (*orig_access_remote_vm)(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags); void *buf, int len, unsigned int gup_flags);
struct task_struct *(*orig_find_task_by_vpid)(pid_t nr); struct task_struct *(*orig_find_task_by_vpid)(pid_t nr);
void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *));
void (*orig_raise_softirq)(unsigned int nr);
/** /**
* @brief diag_kallsyms_lookup_name init * @brief diag_kallsyms_lookup_name init
* *
@@ -59,5 +62,7 @@ int init_orig_fun(void) {
get_task_type); // get_task_type | this fun is not available on 5.17.15 get_task_type); // get_task_type | this fun is not available on 5.17.15
LOOKUP_SYMS_NORET(kernfs_name); // kernfs_name LOOKUP_SYMS_NORET(kernfs_name); // kernfs_name
LOOKUP_SYMS_NORET(find_task_by_vpid); LOOKUP_SYMS_NORET(find_task_by_vpid);
LOOKUP_SYMS_NORET(open_softirq);
LOOKUP_SYMS_NORET(raise_softirq);
return 0; return 0;
} }

View File

@@ -44,3 +44,6 @@ extern int (*orig_access_remote_vm)(
unsigned int gup_flags); // read remote memory unsigned int gup_flags); // read remote memory
extern struct task_struct *(*orig_find_task_by_vpid)( extern struct task_struct *(*orig_find_task_by_vpid)(
pid_t nr); // find task by pid pid_t nr); // find task by pid
extern void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *));
extern void (*orig_raise_softirq)(unsigned int nr);

View File

@@ -10,6 +10,19 @@
#define DEVICE_NAME "variable_monitor" #define DEVICE_NAME "variable_monitor"
#include <linux/interrupt.h>
enum {
MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用
};
static void my_softirq_handler(struct softirq_action *action)
{
// 这是软中断处理函数,它不能睡眠,必须快速执行
// 在这里调用 get_raw_stack
diag_task_all();
}
// for character device // for character device
static dev_t dev_num; static dev_t dev_num;
static struct cdev *watch_cdev; static struct cdev *watch_cdev;
@@ -177,6 +190,8 @@ int init_module(void) {
// orig_X | buffer // orig_X | buffer
monitor_init(); monitor_init();
orig_open_softirq(MY_SOFTIRQ, my_softirq_handler);
return 0; return 0;
} }

View File

@@ -34,5 +34,9 @@ void clear_watch(pid_t pid); // for release
enum hrtimer_restart enum hrtimer_restart
check_variable_cb(struct hrtimer *timer); // hrtimer callback check_variable_cb(struct hrtimer *timer); // hrtimer callback
// for test
int diag_pid(int id); // for test int diag_pid(int id); // for test
int diag_tgid(int id); // for test int diag_tgid(int id); // for test
void diag_task_info(kernel_watch_timer *k_watch_timer);
void diag_task_all(void);
void diag_task_by_tgid(pid_t tgid);

View File

@@ -2,6 +2,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <linux/interrupt.h>
// #include <linux/sched/task.h> // #include <linux/sched/task.h>
// #include <linux/sched/mm.h> // #include <linux/sched/mm.h>
@@ -119,11 +120,14 @@ static void diag_tsk(struct task_struct *p, variable_monitor_task *tsk_info) {
unsigned int nr_bt; unsigned int nr_bt;
// printk(KERN_INFO "diag_tsk\n"); // printk(KERN_INFO "diag_tsk\n");
diag_task_brief(p, &tsk_info->task); // task brief diag_task_brief(p, &tsk_info->task); // task brief
// diag_task_user_stack(p, &tsk_info->user_stack); // user stack if (tsk_info->task.sys_task == 1) { // system task
nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
dump_proc_chains_argv(1, p, &mm_tree_struct, } else { // other task
&tsk_info->proc_chains); // proc chains nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack dump_proc_chains_argv(1, p, &mm_tree_struct,
&tsk_info->proc_chains); // proc chains
diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack
}
} }
static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info, static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info,
@@ -203,31 +207,25 @@ static void clear_all_watch(void) {
memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list)); memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list));
} }
void diag_task_info(kernel_watch_timer *k_watch_timer) { static void diag_vm_record(kernel_watch_timer *k_watch_timer,
if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached unsigned char is_print) {
return; static variable_monitor_record vm_record;
// printk(KERN_INFO "diag_task_info_work\n");
struct task_struct *g, *p; // g: task group; p: task
unsigned long flags;
unsigned long event_id = get_cycles();
static variable_monitor_task tsk_info = {0};
static variable_monitor_record vm_record = {0};
kernel_watch_arg *kwarg; kernel_watch_arg *kwarg;
int i;
unsigned long flags;
unsigned long event_id = get_cycles();
vm_record.id = event_id; vm_record.id = event_id;
vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE; vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE;
vm_record.tv = k_watch_timer->tv; vm_record.tv = k_watch_timer->tv;
vm_record.threshold_over_count = k_watch_timer->threshold_over_count; vm_record.threshold_over_count = k_watch_timer->threshold_over_count;
int i;
for (i = 0; i < vm_record.threshold_over_count; i++) { for (i = 0; i < vm_record.threshold_over_count; i++) {
kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]]; kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]];
k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]); k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]);
} }
rcu_read_lock(); rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags); diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
@@ -236,21 +234,92 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) {
diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record, diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record,
sizeof(variable_monitor_record)); sizeof(variable_monitor_record));
diag_variant_buffer_seal(&load_monitor_variant_buffer); diag_variant_buffer_seal(&load_monitor_variant_buffer);
// diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
// for task info diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
if (is_print) {
printk(KERN_INFO "-----------variable monitor----------\n");
printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n",
vm_record.tv, ktime_get_real() - vm_record.tv);
for (i = 0; i < vm_record.threshold_over_count; i++) {
printk(
KERN_INFO
"\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n",
vm_record.threshold_record[i].task_id,
vm_record.threshold_record[i]
.name, // Assuming name is a null-terminated string
vm_record.threshold_record[i].ptr,
vm_record.threshold_record[i].threshold,
vm_record.threshold_record[i].true_value);
}
printk(KERN_INFO "-------------------------------------\n");
}
}
void diag_task_by_tgid(pid_t tgid){
struct task_struct *tsk;
int ret;
unsigned long flags;
static variable_monitor_task tsk_info;
tsk = NULL;
rcu_read_lock();
tsk = NULL;
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(tgid);
if (!tsk) {
ret = -EINVAL;
rcu_read_unlock();
return ;
}
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
struct task_struct *thread = tsk;
unsigned long event_id = get_cycles();
while_each_thread(tsk, thread) {
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
diag_tsk(tsk, &tsk_info);
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
}
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
}
void diag_task_all(void){
pr_info("diag_task_all, tv %lld\n", ktime_get_real());
static variable_monitor_task tsk_info;
unsigned long event_id = get_cycles();
struct task_struct *g, *p; // g: task group; p: task
unsigned long flags;
rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
do_each_thread(g, p) { do_each_thread(g, p) {
if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) || if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) ||
((READ_ONCE(p->__state) & TASK_IDLE) != 0)) { ((READ_ONCE(p->__state) & TASK_IDLE) != 0)) {
get_task_struct(p); // count +1 // get_task_struct(p); // count +1
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE; tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id; tsk_info.id = event_id;
tsk_info.tv = vm_record.tv; tsk_info.tv = ktime_get_real();
diag_tsk(p, &tsk_info); diag_tsk(p, &tsk_info);
put_task_struct(p); // count -1 // put_task_struct(p); // count -1
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
} }
@@ -259,24 +328,14 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) {
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags); diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock(); rcu_read_unlock();
pr_info("diag_task_all, finish tv %lld\n", ktime_get_real());
}
printk(KERN_INFO "-----------variable monitor----------\n"); void diag_task_info(kernel_watch_timer *k_watch_timer) {
printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n", if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached
vm_record.tv, ktime_get_real() - vm_record.tv); return;
diag_task_all();
for (i = 0; i < vm_record.threshold_over_count; i++) { diag_vm_record(k_watch_timer, 1);
printk(KERN_INFO
"\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n",
vm_record.threshold_record[i].task_id,
vm_record.threshold_record[i]
.name, // Assuming name is a null-terminated string
vm_record.threshold_record[i].ptr,
vm_record.threshold_record[i].threshold,
vm_record.threshold_record[i].true_value);
}
printk(KERN_INFO "-------------------------------------\n");
return;
} }
/** /**
@@ -400,6 +459,10 @@ void clear_watch(pid_t pid) {
start_all_hrTimer(); // restart timer start_all_hrTimer(); // restart timer
} }
// enum {
// MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用
// };
/** /**
* @brief main callback function * @brief main callback function
* *
@@ -429,9 +492,13 @@ enum hrtimer_restart check_variable_cb(struct hrtimer *timer) {
{ {
k_watch_timer->threshold_over_count = j; k_watch_timer->threshold_over_count = j;
k_watch_timer->tv = ktime_get_real(); k_watch_timer->tv = ktime_get_real();
pr_info("threshold reached, tv %lld\n", k_watch_timer->tv);
// highpri_wq // highpri_wq
queue_work(system_highpri_wq, &k_watch_timer->wk); // queue_work(system_highpri_wq, &k_watch_timer->wk);
diag_task_info(k_watch_timer); diag_task_info(k_watch_timer);
// orig_raise_softirq(MY_SOFTIRQ); // for test
// restart timer after dump_reset_sec sec // restart timer after dump_reset_sec sec
hrtimer_forward(timer, timer->base->get_time(), hrtimer_forward(timer, timer->base->get_time(),
ktime_set(dump_reset_sec, 0)); ktime_set(dump_reset_sec, 0));

View File

@@ -3,14 +3,16 @@
static struct perf_event *pe; static struct perf_event *pe;
void vm_perf_overflow_callback(struct perf_event *event, void vm_perf_overflow_callback(struct perf_event *event,
struct perf_sample_data *data, struct perf_sample_data *data,
struct pt_regs *regs) { struct pt_regs *regs) {
// handle perf event data // handle perf event data
// struct perf_callchain_entry *callchain; // struct perf_callchain_entry *callchain;
// int nr, i; // int nr, i;
pr_info("perf event callback\n"); pr_info("perf event callback\n");
// perf_event_disable(event);
// 如果 perf_sample_data 有调用堆栈信息 // 如果 perf_sample_data 有调用堆栈信息
// if (data->callchain) { // if (data->callchain) {
// callchain = data->callchain; // callchain = data->callchain;
@@ -42,10 +44,10 @@ void vm_perf_overflow_callback(struct perf_event *event,
struct perf_event_attr pea = { struct perf_event_attr pea = {
.type = PERF_TYPE_SOFTWARE, .type = PERF_TYPE_SOFTWARE,
.size = sizeof(struct perf_event_attr), .size = sizeof(struct perf_event_attr),
.config = PERF_COUNT_SW_CPU_CLOCK, .config = PERF_COUNT_SW_DUMMY,
.sample_period = 1, .sample_period = 1,
.sample_type = PERF_SAMPLE_CALLCHAIN, .sample_type = PERF_SAMPLE_CALLCHAIN,
// .disabled = 1, .disabled = 1,
}; };
#include <linux/cpumask.h> #include <linux/cpumask.h>
@@ -61,32 +63,38 @@ void setup_perf_event_for_task(struct task_struct *tsk) {
pr_info("Perf event already created\n"); pr_info("Perf event already created\n");
return; return;
} }
int cpu; // int cpu;
struct perf_event **events; // struct perf_event **events;
for_each_possible_cpu(cpu) { // for_each_possible_cpu(cpu) {
struct perf_event **event = per_cpu_ptr(events, cpu); // struct perf_event **event = per_cpu_ptr(events, cpu);
if (cpu_is_offline(cpu)) { // if (cpu_is_offline(cpu)) {
*event = NULL; // pr_info("cpu %d is offline\n", cpu);
continue; // *event = NULL;
} // continue;
*event = perf_event_create_kernel_counter(&pea, cpu, tsk, // }
vm_perf_overflow_callback, NULL); // *event = perf_event_create_kernel_counter(&pea, cpu, tsk,
if (IS_ERR(*event)) { // vm_perf_overflow_callback, NULL);
printk(KERN_INFO "create perf event failure\n");
// return -1; // // perf_event_create_kernel_counter(&pea, cpu, tsk,
} // // vm_perf_overflow_callback,
} // // NULL);
// if (IS_ERR(*event)) {
// printk(KERN_INFO "create perf event failure\n");
// // return -1;
// }
// // perf_event_enable(*event);
// }
// pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk, // pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk,
// vm_perf_callback, NULL); // vm_perf_callback, NULL);
pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk, pe = perf_event_create_kernel_counter(&pea, -1, tsk,
vm_perf_overflow_callback, NULL); vm_perf_overflow_callback, NULL);
if (IS_ERR(pe)) { if (IS_ERR(pe)) {
pr_info("Error in perf_event_create_kernel_counter\n"); pr_info("Error in perf_event_create_kernel_counter\n");
return; return;
} }
// perf_event_enable(pe); // enable perf event perf_event_enable(pe); // enable perf event
} }
/** /**

View File

@@ -8,6 +8,7 @@
const char* proc_dir = "variable_monitor"; const char* proc_dir = "variable_monitor";
int def_interval_ns = DEFAULT_INTERVAL_NS; int def_interval_ns = DEFAULT_INTERVAL_NS;
int dump_reset_sec = DEFAULT_DUMP_RESET_SEC; int dump_reset_sec = DEFAULT_DUMP_RESET_SEC;
int sample_all = DEFAULT_SAMPLE_ALL;
static ssize_t read_proc(struct file *file, char __user *buf, size_t count, static ssize_t read_proc(struct file *file, char __user *buf, size_t count,
loff_t *offset, int *var) { loff_t *offset, int *var) {
@@ -54,6 +55,16 @@ static ssize_t write_proc_dump_reset_sec(struct file *file,
return write_proc(file, buf, count, offset, &dump_reset_sec); return write_proc(file, buf, count, offset, &dump_reset_sec);
} }
static ssize_t read_proc_sample_all(struct file *file, char __user *buf,
size_t count, loff_t *offset) {
return read_proc(file, buf, count, offset, &sample_all);
}
static ssize_t write_proc_sample_all(struct file *file,
const char __user *buf, size_t count,
loff_t *offset) {
return write_proc(file, buf, count, offset, &sample_all);
}
static const struct proc_ops proc_def_interval_ns_ops = { static const struct proc_ops proc_def_interval_ns_ops = {
.proc_read = read_proc_def_interval_ns, .proc_read = read_proc_def_interval_ns,
.proc_write = write_proc_def_interval_ns, .proc_write = write_proc_def_interval_ns,
@@ -64,6 +75,11 @@ static const struct proc_ops proc_dump_reset_sec_ops = {
.proc_write = write_proc_dump_reset_sec, .proc_write = write_proc_dump_reset_sec,
}; };
static const struct proc_ops proc_sample_all_ops = {
.proc_read = read_proc_sample_all,
.proc_write = write_proc_sample_all,
};
int monitor_proc_init(void) { int monitor_proc_init(void) {
struct proc_dir_entry *dir; struct proc_dir_entry *dir;
@@ -75,6 +91,7 @@ int monitor_proc_init(void) {
proc_create("def_interval_ns", 0666, dir, &proc_def_interval_ns_ops); proc_create("def_interval_ns", 0666, dir, &proc_def_interval_ns_ops);
proc_create("dump_reset_sec", 0666, dir, &proc_dump_reset_sec_ops); proc_create("dump_reset_sec", 0666, dir, &proc_dump_reset_sec_ops);
proc_create("sample_all", 0666, dir, &proc_sample_all_ops);
return 0; return 0;
} }

View File

@@ -2,10 +2,12 @@
#define MODULE_MONITOR_PROC_H #define MODULE_MONITOR_PROC_H
#define DEFAULT_INTERVAL_NS 10000 // 10us #define DEFAULT_INTERVAL_NS 10000 // 10us
#define DEFAULT_DUMP_RESET_SEC 60 // 60s #define DEFAULT_DUMP_RESET_SEC 10 // 60s
#define DEFAULT_SAMPLE_ALL 0
extern int def_interval_ns; extern int def_interval_ns;
extern int dump_reset_sec; extern int dump_reset_sec;
extern int sample_all;
int monitor_proc_init(void); int monitor_proc_init(void);
int monitor_proc_exit(void); int monitor_proc_exit(void);

View File

@@ -135,17 +135,17 @@ static inline void save_stack_trace_user_remote(struct task_struct *tsk,
const void __user *fp = (const void __user *)regs->bp; const void __user *fp = (const void __user *)regs->bp;
int count = 0; int count = 0;
if (in_atomic()) { // if (in_atomic()) {
printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n", // printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n",
tsk->pid); // tsk->pid);
return; // return;
} // }
if (irqs_disabled()) { // if (irqs_disabled()) {
printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n", // printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n",
tsk->pid); // tsk->pid);
return; // return;
} // }
if (trace->nr_entries < trace->max_entries) if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = regs->ip; trace->entries[trace->nr_entries++] = regs->ip;
@@ -288,15 +288,15 @@ static int diagnose_task_raw_stack_remote(struct task_struct *tsk, void *to,
int ret; int ret;
struct mm_struct *mm; struct mm_struct *mm;
if (in_atomic()) { // if (in_atomic()) {
// printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid); // printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid);
return 0; // return 0;
} // }
if (irqs_disabled()) { // if (irqs_disabled()) {
// printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid); // printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid);
return 0; // return 0;
} // }
if (in_atomic() || irqs_disabled()) { if (in_atomic() || irqs_disabled()) {
return 0; return 0;

View File

@@ -59,6 +59,7 @@ typedef struct {
unsigned long sys_task; unsigned long sys_task;
/** /**
* 1->user mode 0->sys mode -1->unknown * 1->user mode 0->sys mode -1->unknown
* may not be accurate
*/ */
unsigned long user_mode; unsigned long user_mode;
char comm[TASK_COMM_LEN]; char comm[TASK_COMM_LEN];

View File

@@ -1,6 +1,6 @@
OUTPUT_DIR ?= . OUTPUT_DIR ?= .
TARGET_EXE=ucli TARGET_EXE=ucli
SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc helpfun.cc
OBJECTS=$(SOURCES:.cc=.o) OBJECTS=$(SOURCES:.cc=.o)
CFLAGS=-g -O0 CFLAGS=-g -O0