ucli perf software irq

This commit is contained in:
zy
2023-12-14 02:22:17 -05:00
parent 5d9fd19ddb
commit a7e4c1e4fa
12 changed files with 207 additions and 84 deletions

View File

@@ -7,6 +7,7 @@ variable_monitor-objs := module/monitor_kernel.o \
module/monitor_timer.o \
module/monitor_trace.o \
module/monitor_proc.o \
module/monitor_perf.o \
buffer/trace_buffer.o \
buffer/variant_buffer.o \

View File

@@ -19,6 +19,9 @@ int (*orig_access_remote_vm)(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
struct task_struct *(*orig_find_task_by_vpid)(pid_t nr);
void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *));
void (*orig_raise_softirq)(unsigned int nr);
/**
* @brief diag_kallsyms_lookup_name init
*
@@ -59,5 +62,7 @@ int init_orig_fun(void) {
get_task_type); // get_task_type | this fun is not available on 5.17.15
LOOKUP_SYMS_NORET(kernfs_name); // kernfs_name
LOOKUP_SYMS_NORET(find_task_by_vpid);
LOOKUP_SYMS_NORET(open_softirq);
LOOKUP_SYMS_NORET(raise_softirq);
return 0;
}

View File

@@ -44,3 +44,6 @@ extern int (*orig_access_remote_vm)(
unsigned int gup_flags); // read remote memory
extern struct task_struct *(*orig_find_task_by_vpid)(
pid_t nr); // find task by pid
extern void (*orig_open_softirq)(int nr, void (*action)(struct softirq_action *));
extern void (*orig_raise_softirq)(unsigned int nr);

View File

@@ -10,6 +10,19 @@
#define DEVICE_NAME "variable_monitor"
#include <linux/interrupt.h>
enum {
MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用
};
static void my_softirq_handler(struct softirq_action *action)
{
// 这是软中断处理函数,它不能睡眠,必须快速执行
// 在这里调用 get_raw_stack
diag_task_all();
}
// for character device
static dev_t dev_num;
static struct cdev *watch_cdev;
@@ -177,6 +190,8 @@ int init_module(void) {
// orig_X | buffer
monitor_init();
orig_open_softirq(MY_SOFTIRQ, my_softirq_handler);
return 0;
}

View File

@@ -34,5 +34,9 @@ void clear_watch(pid_t pid); // for release
enum hrtimer_restart
check_variable_cb(struct hrtimer *timer); // hrtimer callback
// for test
int diag_pid(int id); // for test
int diag_tgid(int id); // for test
void diag_task_info(kernel_watch_timer *k_watch_timer);
void diag_task_all(void);
void diag_task_by_tgid(pid_t tgid);

View File

@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/interrupt.h>
// #include <linux/sched/task.h>
// #include <linux/sched/mm.h>
@@ -119,12 +120,15 @@ static void diag_tsk(struct task_struct *p, variable_monitor_task *tsk_info) {
unsigned int nr_bt;
// printk(KERN_INFO "diag_tsk\n");
diag_task_brief(p, &tsk_info->task); // task brief
// diag_task_user_stack(p, &tsk_info->user_stack); // user stack
if (tsk_info->task.sys_task == 1) { // system task
nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
} else { // other task
nr_bt = diag_task_kern_stack(p, &tsk_info->kern_stack); // kernel stack
dump_proc_chains_argv(1, p, &mm_tree_struct,
&tsk_info->proc_chains); // proc chains
diag_task_raw_stack(p, &tsk_info->raw_stack); // raw stack
}
}
static void push_tskinfo_2_buffer_orig(variable_monitor_task *tsk_info,
unsigned long *flags,
@@ -203,31 +207,25 @@ static void clear_all_watch(void) {
memset(kernel_wtimer_list, 0, sizeof(kernel_wtimer_list));
}
void diag_task_info(kernel_watch_timer *k_watch_timer) {
if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached
return;
static void diag_vm_record(kernel_watch_timer *k_watch_timer,
unsigned char is_print) {
static variable_monitor_record vm_record;
// printk(KERN_INFO "diag_task_info_work\n");
struct task_struct *g, *p; // g: task group; p: task
unsigned long flags;
unsigned long event_id = get_cycles();
static variable_monitor_task tsk_info = {0};
static variable_monitor_record vm_record = {0};
kernel_watch_arg *kwarg;
int i;
unsigned long flags;
unsigned long event_id = get_cycles();
vm_record.id = event_id;
vm_record.et_type = VARIABLE_MONITOR_RECORD_TYPE;
vm_record.tv = k_watch_timer->tv;
vm_record.threshold_over_count = k_watch_timer->threshold_over_count;
int i;
for (i = 0; i < vm_record.threshold_over_count; i++) {
kwarg = &k_watch_timer->k_watch_args[k_watch_timer->threshold_buffer[i]];
k_w_arg2threshold(kwarg, &vm_record.threshold_record[i]);
}
rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
@@ -236,36 +234,18 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) {
diag_variant_buffer_write_nolock(&load_monitor_variant_buffer, &vm_record,
sizeof(variable_monitor_record));
diag_variant_buffer_seal(&load_monitor_variant_buffer);
// diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
// for task info
do_each_thread(g, p) {
if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) ||
((READ_ONCE(p->__state) & TASK_IDLE) != 0)) {
get_task_struct(p); // count +1
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = vm_record.tv;
diag_tsk(p, &tsk_info);
put_task_struct(p); // count -1
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
}
}
while_each_thread(g, p);
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
if (is_print) {
printk(KERN_INFO "-----------variable monitor----------\n");
printk(KERN_INFO "threshold exceeded, Timestamp %lld, Delay %lld:\n",
vm_record.tv, ktime_get_real() - vm_record.tv);
for (i = 0; i < vm_record.threshold_over_count; i++) {
printk(KERN_INFO
printk(
KERN_INFO
"\t: pid: %d, name: %s, ptr: %p, threshold:%lld, true_value:%lld\n",
vm_record.threshold_record[i].task_id,
vm_record.threshold_record[i]
@@ -276,9 +256,88 @@ void diag_task_info(kernel_watch_timer *k_watch_timer) {
}
printk(KERN_INFO "-------------------------------------\n");
}
}
void diag_task_by_tgid(pid_t tgid){
struct task_struct *tsk;
int ret;
unsigned long flags;
static variable_monitor_task tsk_info;
tsk = NULL;
rcu_read_lock();
tsk = NULL;
if (orig_find_task_by_vpid)
tsk = orig_find_task_by_vpid(tgid);
if (!tsk) {
ret = -EINVAL;
rcu_read_unlock();
return ;
}
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
struct task_struct *thread = tsk;
unsigned long event_id = get_cycles();
while_each_thread(tsk, thread) {
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
diag_tsk(tsk, &tsk_info);
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
}
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
}
void diag_task_all(void){
pr_info("diag_task_all, tv %lld\n", ktime_get_real());
static variable_monitor_task tsk_info;
unsigned long event_id = get_cycles();
struct task_struct *g, *p; // g: task group; p: task
unsigned long flags;
rcu_read_lock();
diag_variant_buffer_spin_lock(&load_monitor_variant_buffer, flags);
do_each_thread(g, p) {
if (p->__state == TASK_RUNNING || __task_contributes_to_load(p) ||
((READ_ONCE(p->__state) & TASK_IDLE) != 0)) {
// get_task_struct(p); // count +1
tsk_info.et_type = VARIABLE_MONITOR_TASK_TYPE;
tsk_info.id = event_id;
tsk_info.tv = ktime_get_real();
diag_tsk(p, &tsk_info);
// put_task_struct(p); // count -1
push_tskinfo_22_buffer(&tsk_info, &flags); // push to buffer
}
}
while_each_thread(g, p);
diag_variant_buffer_spin_unlock(&load_monitor_variant_buffer, flags);
rcu_read_unlock();
pr_info("diag_task_all, finish tv %lld\n", ktime_get_real());
}
void diag_task_info(kernel_watch_timer *k_watch_timer) {
if (k_watch_timer->threshold_over_count <= 0) // if no threshold reached
return;
diag_task_all();
diag_vm_record(k_watch_timer, 1);
}
/**
* @brief diag task info, for work queue
*
@@ -400,6 +459,10 @@ void clear_watch(pid_t pid) {
start_all_hrTimer(); // restart timer
}
// enum {
// MY_SOFTIRQ = 11, // 这个值是示例,确保它没有被其他软中断使用
// };
/**
* @brief main callback function
*
@@ -429,9 +492,13 @@ enum hrtimer_restart check_variable_cb(struct hrtimer *timer) {
{
k_watch_timer->threshold_over_count = j;
k_watch_timer->tv = ktime_get_real();
pr_info("threshold reached, tv %lld\n", k_watch_timer->tv);
// highpri_wq
queue_work(system_highpri_wq, &k_watch_timer->wk);
// queue_work(system_highpri_wq, &k_watch_timer->wk);
diag_task_info(k_watch_timer);
// orig_raise_softirq(MY_SOFTIRQ); // for test
// restart timer after dump_reset_sec sec
hrtimer_forward(timer, timer->base->get_time(),
ktime_set(dump_reset_sec, 0));

View File

@@ -11,6 +11,8 @@ void vm_perf_overflow_callback(struct perf_event *event,
pr_info("perf event callback\n");
// perf_event_disable(event);
// 如果 perf_sample_data 有调用堆栈信息
// if (data->callchain) {
// callchain = data->callchain;
@@ -42,10 +44,10 @@ void vm_perf_overflow_callback(struct perf_event *event,
struct perf_event_attr pea = {
.type = PERF_TYPE_SOFTWARE,
.size = sizeof(struct perf_event_attr),
.config = PERF_COUNT_SW_CPU_CLOCK,
.config = PERF_COUNT_SW_DUMMY,
.sample_period = 1,
.sample_type = PERF_SAMPLE_CALLCHAIN,
// .disabled = 1,
.disabled = 1,
};
#include <linux/cpumask.h>
@@ -61,32 +63,38 @@ void setup_perf_event_for_task(struct task_struct *tsk) {
pr_info("Perf event already created\n");
return;
}
int cpu;
struct perf_event **events;
for_each_possible_cpu(cpu) {
struct perf_event **event = per_cpu_ptr(events, cpu);
if (cpu_is_offline(cpu)) {
*event = NULL;
continue;
}
*event = perf_event_create_kernel_counter(&pea, cpu, tsk,
vm_perf_overflow_callback, NULL);
if (IS_ERR(*event)) {
printk(KERN_INFO "create perf event failure\n");
// return -1;
}
}
// int cpu;
// struct perf_event **events;
// for_each_possible_cpu(cpu) {
// struct perf_event **event = per_cpu_ptr(events, cpu);
// if (cpu_is_offline(cpu)) {
// pr_info("cpu %d is offline\n", cpu);
// *event = NULL;
// continue;
// }
// *event = perf_event_create_kernel_counter(&pea, cpu, tsk,
// vm_perf_overflow_callback, NULL);
// // perf_event_create_kernel_counter(&pea, cpu, tsk,
// // vm_perf_overflow_callback,
// // NULL);
// if (IS_ERR(*event)) {
// printk(KERN_INFO "create perf event failure\n");
// // return -1;
// }
// // perf_event_enable(*event);
// }
// pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk,
// vm_perf_callback, NULL);
pe = perf_event_create_kernel_counter(&pea, tsk->on_cpu, tsk,
pe = perf_event_create_kernel_counter(&pea, -1, tsk,
vm_perf_overflow_callback, NULL);
if (IS_ERR(pe)) {
pr_info("Error in perf_event_create_kernel_counter\n");
return;
}
// perf_event_enable(pe); // enable perf event
perf_event_enable(pe); // enable perf event
}
/**

View File

@@ -8,6 +8,7 @@
const char* proc_dir = "variable_monitor";
int def_interval_ns = DEFAULT_INTERVAL_NS;
int dump_reset_sec = DEFAULT_DUMP_RESET_SEC;
int sample_all = DEFAULT_SAMPLE_ALL;
static ssize_t read_proc(struct file *file, char __user *buf, size_t count,
loff_t *offset, int *var) {
@@ -54,6 +55,16 @@ static ssize_t write_proc_dump_reset_sec(struct file *file,
return write_proc(file, buf, count, offset, &dump_reset_sec);
}
static ssize_t read_proc_sample_all(struct file *file, char __user *buf,
size_t count, loff_t *offset) {
return read_proc(file, buf, count, offset, &sample_all);
}
static ssize_t write_proc_sample_all(struct file *file,
const char __user *buf, size_t count,
loff_t *offset) {
return write_proc(file, buf, count, offset, &sample_all);
}
static const struct proc_ops proc_def_interval_ns_ops = {
.proc_read = read_proc_def_interval_ns,
.proc_write = write_proc_def_interval_ns,
@@ -64,6 +75,11 @@ static const struct proc_ops proc_dump_reset_sec_ops = {
.proc_write = write_proc_dump_reset_sec,
};
static const struct proc_ops proc_sample_all_ops = {
.proc_read = read_proc_sample_all,
.proc_write = write_proc_sample_all,
};
int monitor_proc_init(void) {
struct proc_dir_entry *dir;
@@ -75,6 +91,7 @@ int monitor_proc_init(void) {
proc_create("def_interval_ns", 0666, dir, &proc_def_interval_ns_ops);
proc_create("dump_reset_sec", 0666, dir, &proc_dump_reset_sec_ops);
proc_create("sample_all", 0666, dir, &proc_sample_all_ops);
return 0;
}

View File

@@ -2,10 +2,12 @@
#define MODULE_MONITOR_PROC_H
#define DEFAULT_INTERVAL_NS 10000 // 10us
#define DEFAULT_DUMP_RESET_SEC 60 // 60s
#define DEFAULT_DUMP_RESET_SEC 10 // 60s
#define DEFAULT_SAMPLE_ALL 0
extern int def_interval_ns;
extern int dump_reset_sec;
extern int sample_all;
int monitor_proc_init(void);
int monitor_proc_exit(void);

View File

@@ -135,17 +135,17 @@ static inline void save_stack_trace_user_remote(struct task_struct *tsk,
const void __user *fp = (const void __user *)regs->bp;
int count = 0;
if (in_atomic()) {
printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n",
tsk->pid);
return;
}
// if (in_atomic()) {
// printk(KERN_INFO "save_stack_trace_user_remote %d: task in_atomic\n",
// tsk->pid);
// return;
// }
if (irqs_disabled()) {
printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n",
tsk->pid);
return;
}
// if (irqs_disabled()) {
// printk(KERN_INFO "save_stack_trace_user_remote %d: task in irqs_disabled\n",
// tsk->pid);
// return;
// }
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = regs->ip;
@@ -288,15 +288,15 @@ static int diagnose_task_raw_stack_remote(struct task_struct *tsk, void *to,
int ret;
struct mm_struct *mm;
if (in_atomic()) {
// if (in_atomic()) {
// printk(KERN_INFO "task_raw_stack_remote %d in_atomic\n", tsk->pid);
return 0;
}
// return 0;
// }
if (irqs_disabled()) {
// if (irqs_disabled()) {
// printk(KERN_INFO "task_raw_stack_remote %d irqs_disabled\n", tsk->pid);
return 0;
}
// return 0;
// }
if (in_atomic() || irqs_disabled()) {
return 0;

View File

@@ -59,6 +59,7 @@ typedef struct {
unsigned long sys_task;
/**
* 1->user mode 0->sys mode -1->unknown
* may not be accurate
*/
unsigned long user_mode;
char comm[TASK_COMM_LEN];

View File

@@ -1,6 +1,6 @@
OUTPUT_DIR ?= .
TARGET_EXE=ucli
SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc
SOURCES=ucli.cc ucli-lib.cc unwind.cc symbol.cc accessors.cc elf.cc helpfun.cc
OBJECTS=$(SOURCES:.cc=.o)
CFLAGS=-g -O0