Linux SysRq 简介
文章目录
- 1. 前言
- 2. 背景
- 3. Linux SysRq
- 3.1 SysRq 简介
- 3.1.1 SysRq 初始化
- 3.2 通过 procfs 发起 SysRq 请求
- 3.2.1 修改内核日志等级
- 3.2.1.1 触发
- 3.2.1.2 实现简析
- 3.2.2 手动触发内核 panic
- 3.2.2.1 触发
- 3.2.2.2 实现简析
- 3.2.2.3 应用场景
- 3.2.3 其它 SysRq 请求
- 3.3 通过 特殊按键 发起 SysRq 请求
- 4. 参考资料
1. 前言
限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
2. 背景
本文基于 ARM32 + Linux 4.14
内核源码进行分析。
3. Linux SysRq
3.1 SysRq 简介
Linux SysRq
,是内核允许通过 特殊按键
和 procfs 文件节点
向系统发起一些特殊请求,用于查看系统状态 和 调试目的
。
3.1.1 SysRq 初始化
使用 SysRq
功能,需要开启内核配置项 CONFIG_MAGIC_SYSRQ
。来看一下 SysRq
的初始化:
/* drivers/tty/sysrq.c */...static struct input_handler sysrq_handler = {.filter = sysrq_filter,.connect = sysrq_connect,.disconnect = sysrq_disconnect,.name = "sysrq",.id_table = sysrq_ids,
};static bool sysrq_handler_registered;static inline void sysrq_register_handler(void)
{int error;/* 注册 以 【特殊按键】 方式发起 SysRq 的按键处理接口 */error = input_register_handler(&sysrq_handler);if (error)pr_err("Failed to register input handler, error %d", error);elsesysrq_handler_registered = true;
}...static const struct file_operations proc_sysrq_trigger_operations = {.write = write_sysrq_trigger,.llseek = noop_llseek,
};static void sysrq_init_procfs(void)
{/* 建立 /proc/sysrq-trigger 节点 */if (!proc_create("sysrq-trigger", S_IWUSR, NULL,&proc_sysrq_trigger_operations))pr_err("Failed to register proc interface\n");
}static int __init sysrq_init(void)
{sysrq_init_procfs();if (sysrq_on())sysrq_register_handler();return 0;
}
device_initcall(sysrq_init);
3.2 通过 procfs 发起 SysRq 请求
本小节讨论以 procfs 文件节点
发起 SysRq
请求的方式,它们都是以向 /proc/sysrq-trigger
写入预定义字符发起:
# echo X > /proc/sysrq-trigger
操作需要特权用户权限
。下面讲述几个常见的 SysRq
请求操作,并对它们的实现做简单分析。所有支持的 SysRq
请求列举在数据表格 sysrq_key_table[]
中:
static struct sysrq_key_op *sysrq_key_table[36] = {/* 修改内核日志等级 */&sysrq_loglevel_op, /* 0 */&sysrq_loglevel_op, /* 1 */&sysrq_loglevel_op, /* 2 */&sysrq_loglevel_op, /* 3 */&sysrq_loglevel_op, /* 4 */&sysrq_loglevel_op, /* 5 */&sysrq_loglevel_op, /* 6 */&sysrq_loglevel_op, /* 7 */&sysrq_loglevel_op, /* 8 */&sysrq_loglevel_op, /* 9 */.../* 触发系统重启,不会同步或者卸载磁盘 */&sysrq_reboot_op, /* b *//* 触发内核 crash */&sysrq_crash_op, /* c *//* 显示所有持有的锁(需开启 CONFIG_LOCKDEP) */&sysrq_showlocks_op, /* d *//* 向除 init 外的所有进程发送 SIGTERM 信号 */&sysrq_term_op, /* e *//* 触发 OOM 回收 */&sysrq_moom_op, /* f */.../* 向除 init 外的所有进程发送 SIGKILL 信号 */&sysrq_kill_op, /* i *//* 文件系统解冻操作 */
#ifdef CONFIG_BLOCK&sysrq_thaw_op, /* j */
#elseNULL, /* j */
#endif&sysrq_SAK_op, /* k *//* 显示所有活动 cpu 的栈回溯 */
#ifdef CONFIG_SMP&sysrq_showallcpus_op, /* l */
#elseNULL, /* l */
#endif/* 显示系统内存信息, 如同 cat /proc/meminfo */&sysrq_showmem_op, /* m *//* 用于将所有实时任务变成普通任务 */&sysrq_unrt_op, /* n *//* o: This will often be registered as 'Off' at init time */NULL, /* o *//* 显示 CPU 当前寄存器和标志位 */&sysrq_showregs_op, /* p *//* 显示每个 CPU 上的高精度定时器 */&sysrq_show_timers_op, /* q *//* 关闭键盘 RAW 模式 */&sysrq_unraw_op, /* r *//* 尝试同步所有的已挂载文件系统 */&sysrq_sync_op, /* s *//* 导出当前所有任务列表和它们的信息 */&sysrq_showstate_op, /* t *//* 尝试重新挂载已挂载文件系统为只读 */&sysrq_mountro_op, /* u *//* v: May be registered for frame buffer console restore */NULL, /* v *//* 导出处于不可中断状态(阻塞)的任务 */&sysrq_showstate_blocked_op, /* w *//* x: May be registered on mips for TLB dump *//* x: May be registered on ppc/powerpc for xmon *//* x: May be registered on sparc64 for global PMU dump */NULL, /* x *//* y: May be registered on sparc64 for global register dump */NULL, /* y *//* 导出 ftrace 缓存信息 */&sysrq_ftrace_dump_op, /* z */
};
对 /proc/sysrq-trigger
写入触发调用序列:
write()...write_sysrq_trigger()char c;get_user(c, buf);__handle_sysrq(c, false)void __handle_sysrq(int key, bool check_mask)
{struct sysrq_key_op *op_p;int orig_log_level;int i;rcu_sysrq_start();rcu_read_lock();/** Raise the apparent loglevel to maximum so that the sysrq header* is shown to provide the user with positive feedback. We do not* simply emit this at KERN_EMERG as that would change message* routing in the consumers of /proc/kmsg.*/orig_log_level = console_loglevel;console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;pr_info("SysRq : ");op_p = __sysrq_get_key_op(key); /* 从 sysrq_key_table[] 查找 c 对应的 SysRq 操作接口 */if (op_p) {/** Should we check for enabled operations (/proc/sysrq-trigger* should not) and is the invoked operation enabled?*/if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {pr_cont("%s\n", op_p->action_msg);console_loglevel = orig_log_level;op_p->handler(key); /* 调用 SysRq 操作接口:sysrq_handle_crash(), ... */} else {...}} else {...}rcu_read_unlock();rcu_sysrq_end();
}
3.2.1 修改内核日志等级
3.2.1.1 触发
# echo 3 > /proc/sysrq-trigger
[ 6956.852664] sysrq: SysRq : Changing Loglevel
[ 6956.856987] sysrq: Loglevel set to 3
3.2.1.2 实现简析
write()...write_sysrq_trigger()sysrq_handle_loglevel()
/* drivers/tty/sysrq.c */static void sysrq_handle_loglevel(int key)
{int i;i = key - '0';console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;pr_info("Loglevel set to %d\n", i);console_loglevel = i;
}
3.2.2 手动触发内核 panic
3.2.2.1 触发
# echo c > /proc/sysrq-trigger
[ 856.968802] sysrq: SysRq : Trigger a crash
[ 856.973059] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 856.981194] pgd = 6e7ca3d4
[ 856.984471] [00000000] *pgd=8a84c831, *pte=00000000, *ppte=00000000
[ 856.990789] Internal error: Oops: 817 [#1] PREEMPT ARM
[ 856.995946] Modules linked in:
[ 856.999020] CPU: 0 PID: 123 Comm: sh Not tainted 4.19.94-g1194fe2-dirty #102
[ 857.006095] Hardware name: Generic AM33XX (Flattened Device Tree)
[ 857.012236] PC is at sysrq_handle_crash+0x2c/0x34
[ 857.016958] LR is at sysrq_handle_crash+0x28/0x34
[ 857.021678] pc : [<c04fd9bc>] lr : [<c04fd9b8>] psr: 60080013
[ 857.027969] sp : ca8bbe38 ip : ca8bbe38 fp : ca8bbe4c
[ 857.033212] r10: 00000004 r9 : ca8bbf60 r8 : c0e2c544
[ 857.038455] r7 : 00000000 r6 : 00000063 r5 : 00000007 r4 : 00000001
[ 857.045007] r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : c0e14618
[ 857.051561] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
[ 857.058723] Control: 10c5387d Table: 8a868019 DAC: 00000051
[ 857.064493] Process sh (pid: 123, stack limit = 0xce84bd3c)
[ 857.070087] Stack: (0xca8bbe38 to 0xca8bc000)
[ 857.074462] be20: c0e0ff48 00000007
[ 857.082676] be40: ca8bbe7c ca8bbe50 c04fdf6c c04fd99c 00000055 00000002 c04fe474 00000000
[ 857.090890] be60: ca8bbf60 00000002 ca8bbf60 00000004 ca8bbe94 ca8bbe80 c04fe4d0 c04fdec8
[ 857.099104] be80: cf26a980 c04fe474 ca8bbeb4 ca8bbe98 c029e968 c04fe480 c0e03048 ca87e240
[ 857.107317] bea0: c029e908 ca8bbf60 ca8bbf2c ca8bbeb8 c02371f8 c029e914 00000000 00000000
[ 857.115532] bec0: fffffff6 c0e03048 ca8bbf34 ca8bbed8 c012e2ac c01467e4 c0112fdc 00000004
[ 857.123745] bee0: 00000007 00000000 00000000 00000000 00000000 00000000 ca873000 c012bc68
[ 857.131960] bf00: 00000100 42749327 00000002 ca87e240 000c4b38 ca8bbf60 00000002 ca8ba000
[ 857.140174] bf20: ca8bbf5c ca8bbf30 c02374b4 c02371c4 c012e42c c012e228 ca8bbf5c ca87e240
[ 857.148388] bf40: c0e03048 ca87e240 000c4b38 00000002 ca8bbf94 ca8bbf60 c023773c c0237414
[ 857.156602] bf60: 00000000 00000000 ca8bbf94 42749327 c0257618 000c2d5c 00000001 000c4b38
[ 857.164816] bf80: 00000004 c0101204 ca8bbfa4 ca8bbf98 c02377c0 c02376dc 00000000 ca8bbfa8
[ 857.173030] bfa0: c0101000 c02377bc 000c2d5c 00000001 00000001 000c4b38 00000002 00000000
[ 857.181243] bfc0: 000c2d5c 00000001 000c4b38 00000004 00000001 00000020 00000000 00091144
[ 857.189458] bfe0: 00000000 bec9a4bc 0001a908 b6f53556 60080030 00000001 00000000 00000000
[ 857.197664] Backtrace:
[ 857.200127] [<c04fd990>] (sysrq_handle_crash) from [<c04fdf6c>] (__handle_sysrq+0xb0/0x180)
[ 857.208513] r5:00000007 r4:c0e0ff48
[ 857.212106] [<c04fdebc>] (__handle_sysrq) from [<c04fe4d0>] (write_sysrq_trigger+0x5c/0x6c)
[ 857.220494] r10:00000004 r9:ca8bbf60 r8:00000002 r7:ca8bbf60 r6:00000000 r5:c04fe474
[ 857.228354] r4:00000002 r3:00000055
[ 857.231948] [<c04fe474>] (write_sysrq_trigger) from [<c029e968>] (proc_reg_write+0x60/0x90)
[ 857.240332] r5:c04fe474 r4:cf26a980
[ 857.243927] [<c029e908>] (proc_reg_write) from [<c02371f8>] (__vfs_write+0x40/0x164)
[ 857.251703] r7:ca8bbf60 r6:c029e908 r5:ca87e240 r4:c0e03048
[ 857.257387] [<c02371b8>] (__vfs_write) from [<c02374b4>] (vfs_write+0xac/0x188)
[ 857.264729] r9:ca8ba000 r8:00000002 r7:ca8bbf60 r6:000c4b38 r5:ca87e240 r4:00000002
[ 857.272506] [<c0237408>] (vfs_write) from [<c023773c>] (ksys_write+0x6c/0xe0)
[ 857.279672] r8:00000002 r7:000c4b38 r6:ca87e240 r5:c0e03048 r4:ca87e240
[ 857.286402] [<c02376d0>] (ksys_write) from [<c02377c0>] (sys_write+0x10/0x14)
[ 857.293568] r8:c0101204 r7:00000004 r6:000c4b38 r5:00000001 r4:000c2d5c
[ 857.300300] [<c02377b0>] (sys_write) from [<c0101000>] (ret_fast_syscall+0x0/0x54)
[ 857.307899] Exception stack(0xca8bbfa8 to 0xca8bbff0)
[ 857.312971] bfa0: 000c2d5c 00000001 00000001 000c4b38 00000002 00000000
[ 857.321185] bfc0: 000c2d5c 00000001 000c4b38 00000004 00000001 00000020 00000000 00091144
[ 857.329397] bfe0: 00000000 bec9a4bc 0001a908 b6f53556
[ 857.334472] Code: e5834000 f57ff04e ebf05e88 e3a03000 (e5c34000)
[ 857.343688] ---[ end trace 0caa0a25d6458889 ]---
[ 857.348334] Kernel panic - not syncing: Fatal exception
[ 857.353587] ---[ end Kernel panic - not syncing: Fatal exception ]---
3.2.2.2 实现简析
write()...write_sysrq_trigger()sysrq_handle_crash()
/* drivers/tty/sysrq.c */static void sysrq_handle_crash(int key)
{char *killer = NULL;/* we need to release the RCU read lock here,* otherwise we get an annoying* 'BUG: sleeping function called from invalid context'* complaint from the kernel before the panic.*/rcu_read_unlock();/* 强制 oops 导致内核 panic */panic_on_oops = 1; /* force panic */wmb();*killer = 1; /* 写空指针导致 页表访问异常 */
}
static struct sysrq_key_op sysrq_crash_op = {.handler = sysrq_handle_crash,.help_msg = "crash(c)",.action_msg = "Trigger a crash",.enable_mask = SYSRQ_ENABLE_DUMP,
};static struct sysrq_key_op *sysrq_key_table[36] = {...&sysrq_crash_op, /* c */...
};
假定使用 AMR32 3级分页
:
/* arch/arm/mm/fsr-3level.c */static struct fsr_info fsr_info[] = {...{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },...
};do_translation_fault() / do_page_fault()...__do_kernel_fault(mm, addr, fsr, regs)
/* arch/arm/mm/fault.c */static void
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,struct pt_regs *regs)
{...bust_spinlocks(1);pr_alert("Unable to handle kernel %s at virtual address %08lx\n",(addr < PAGE_SIZE) ? "NULL pointer dereference" :"paging request", addr);show_pte(mm, addr); /* 打印地址 @addr 的 pgd, pmd, ... pte */die("Oops", regs, fsr); /* arch/arm/kernel/trap.c */...oops_end(flags, regs, sig)...if (panic_on_oops) /* 促使 oops 导致内核 panic */panic("Fatal exception");...bust_spinlocks(0);do_exit(SIGKILL);
}
3.2.2.3 应用场景
有时候程序卡住了,可能想知道当前的调用链,可以通过 c
触发 panic
导出堆栈记录。
3.2.3 其它 SysRq 请求
感兴趣的读者可自行分析。
3.3 通过 特殊按键 发起 SysRq 请求
发起 SysRq
请求的具体按键,各个硬件平台各有不同,读者可参考文章末尾资料,或查阅相关资料了解。本小节对通过 特殊按键
的方式发起 SysRq
请求的过程做简要分析,如下:
/* drivers/input/input.c */
input_report_key()input_handle_event(dev, type, code, value)input_pass_values(dev, dev->vals, dev->num_vals)/* 将事件数据传递给挂接在输入设备 input_dev 上 input_handler 处理 */list_for_each_entry_rcu(handle, &dev->h_list, d_node)if (handle->open) {count = input_to_handler(handle, vals, count);if (handler->filter) {for (v = vals; v != vals + count; v++) {if (handler->filter(handle, v->type, v->code, v->value)) /* sysrq_filter(), ... */continue;if (end != v)*end = *v;end++;}count = end - vals;}/* 所有按键事件已被过滤处理(如 SysRq 按键事件),没有按键事件需做进一步处理 */if (!count)return 0;/* 按键事件处理 */if (handler->events)handler->events(handle, vals, count);else if (handler->event)for (v = vals; v != vals + count; v++)handler->event(handle, v->type, v->code, v->value);return count;}/* drivers/tty/sysrq.c */
sysrq_filter()sysrq_handle_keypress(sysrq, code, value)static bool sysrq_handle_keypress(struct sysrq_state *sysrq,unsigned int code, int value)
{...switch (code) {case KEY_LEFTALT:case KEY_RIGHTALT:...break;case KEY_SYSRQ:...break;default:if (sysrq->active && value && value != 2) {sysrq->need_reinject = false;__handle_sysrq(sysrq_xlate[code], true); /* 处理 SYSRQ 按键事件 */}break;}...
}
更多按键处理的细节可参考博文:Linux输入子系统简析 。
4. 参考资料
https://www.kernel.org/doc/html/latest/translations/zh_CN/admin-guide/sysrq.html