Linux:用户空间非法指针coredump简析
1. 前言
限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
2. 背景
本文分析基于 ARM32 架构
,Linux-4.14
内核代码。
3. 问题分析
3.1 测试范例
void main(void)
{*(int *)0 = 8;
}
运行程序会 coredump 。
3.2 分析
3.2.1 ARM32 3级页表(PAE使能)
上图看起有点复杂,我们简化一下,Linux 下内存的访问的过程大概是这样的:
MMU页表
虚拟地址 --------> 物理地址
3.2.2 生成 coredump
每当发生内存访问时,如果从虚拟地址到物理地址的转换路径中,不管是哪级页表没有就绪,ARM32系统都会产生缺页中断,因此我们分析的起点就是缺页中断的入口。先看看中断向量表:
@ arch/arm/kernel/entry-armv.Svector_stub dabt, ABT_MODE, 8.long __dabt_usr @ 0 (USR_26 / USR_32).long __dabt_invalid @ 1 (FIQ_26 / FIQ_32).long __dabt_invalid @ 2 (IRQ_26 / IRQ_32).long __dabt_svc @ 3 (SVC_26 / SVC_32).long __dabt_invalid @ 4.long __dabt_invalid @ 5.long __dabt_invalid @ 6.long __dabt_invalid @ 7.long __dabt_invalid @ 8.long __dabt_invalid @ 9.long __dabt_invalid @ a.long __dabt_invalid @ b.long __dabt_invalid @ c.long __dabt_invalid @ d.long __dabt_invalid @ e.long __dabt_invalid @ f.globl vector_fiq/* 各个CPU模式下的中断向量表指针 */.section .vectors, "ax", %progbits
.L__vectors_start:W(b) vector_rstW(b) vector_undW(ldr) pc, .L__vectors_start + 0x1000W(b) vector_pabtW(b) vector_dabt /* DataAbort模式的中断向量表指针 */W(b) vector_addrexcptnW(b) vector_irqW(b) vector_fiq/* 缺页(DataAbort)中断可产生于[SVC、用户]两种模式下 */
__dabt_usr: /* 用户模式缺页中断 */...dabt_helper // bl CPU_DABORT_HANDLER -> bl v7_early_abort....align 5
__dabt_svc: /* SVC模式缺页中断 */...dabt_helper // bl CPU_DABORT_HANDLER -> bl v7_early_abort...
@ arch/arm/mm/abort-ev7.S.align 5
ENTRY(v7_early_abort)mrc p15, 0, r1, c5, c0, 0 @ get FSRmrc p15, 0, r0, c6, c0, 0 @ get FARuaccess_disable ip @ disable userspace access...b do_DataAbort
ENDPROC(v7_early_abort)
/** 以3级页表举例。* arch/arm/mm/fsr-3level.c */
static struct fsr_info fsr_info[] = {.../* 缺页中断处理接口 */{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, /* 1级页目录转换接口 */{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, /* 2级页目录转换接口 */{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, /* 3级页表项转换接口 */...
};/** arch/arm/mm/fault.c */
asmlinkage void __exception
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{const struct fsr_info *inf = fsr_info + fsr_fs(fsr);struct siginfo info;/* 调用具体类型缺页中断的入口: do_translation_fault() 或 do_page_fault() */if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))return;...
}
如果第 1,2 级别页表导致的缺页中断,会进入 do_translation_fault()
:
static int __kprobes
do_translation_fault(unsigned long addr, unsigned int fsr,struct pt_regs *regs)
{if (addr < TASK_SIZE) /* 用户空间地址 */return do_page_fault(addr, fsr, regs);...
}
第3级页表导致的缺页中断,进入 do_page_fault()
,这和第1,2级页表导致缺页中断的情形殊途同归。来看 do_page_fault()
:
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{struct task_struct *tsk;...unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;...tsk = current;...fault = __do_page_fault(mm, addr, fsr, flags, tsk);...
}
static int __kprobes
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,unsigned int flags, struct task_struct *tsk)
{/** 查看addr是否存在对应的 vma ? * 如果没有的话,意味着非法地址访问.*/vma = find_vma(mm, addr);fault = VM_FAULT_BADMAP;if (unlikely(!vma))goto out;...out:return fault;
}
从 __do_page_fault()
返回 do_page_fault()
:
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{struct task_struct *tsk;...unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;...tsk = current;...fault = __do_page_fault(mm, addr, fsr, flags, tsk);...if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {if (fault & VM_FAULT_MAJOR) {tsk->maj_flt++;...} else {tsk->min_flt++;...}...}...if (fault & VM_FAULT_SIGBUS) {...} else { /* 测试代码场景走这里 *//** Something tried to access memory that* isn't in our memory map..*/sig = SIGSEGV;code = fault == VM_FAULT_BADACCESS ?SEGV_ACCERR : SEGV_MAPERR;}__do_user_fault(tsk, addr, fsr, sig, code, regs);return 0;...
}
static void
__do_user_fault(struct task_struct *tsk, unsigned long addr,unsigned int fsr, unsigned int sig, int code,struct pt_regs *regs)
{struct siginfo si;...#ifdef CONFIG_DEBUG_USERif (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||((user_debug & UDBG_BUS) && (sig == SIGBUS))) {printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",tsk->comm, sig, addr, fsr);show_pte(tsk->mm, addr);show_regs(regs);}
#endiftsk->thread.address = addr;tsk->thread.error_code = fsr;tsk->thread.trap_no = 14;si.si_signo = sig;si.si_errno = 0;si.si_code = code;si.si_addr = (void __user *)addr;force_sig_info(sig, &si, tsk); /* 给出错进程发送 SIGSEGV 信号 */
}
出错进程处理 SIGSEGV
信号:
asmlinkage int
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{...do {if (likely(thread_flags & _TIF_NEED_RESCHED)) {...} else {if (unlikely(!user_mode(regs)))return 0;if (thread_flags & _TIF_SIGPENDING) {int restart = do_signal(regs, syscall); /* 信号处理 */...} else if (thread_flags & _TIF_UPROBE) {...} else {...}}...} while (thread_flags & _TIF_WORK_MASK);return 0;
}
static int do_signal(struct pt_regs *regs, int syscall)
{...if (get_signal(&ksig)) {...} else {...}return 0;
}
int get_signal(struct ksignal *ksig)
{...for (;;) {...signr = dequeue_synchronous_signal(&ksig->info);if (!signr)signr = dequeue_signal(current, ¤t->blocked, &ksig->info);if (!signr)break; /* will return 0 */...if (ka->sa.sa_handler != SIG_DFL) {/* Run the handler. */ksig->ka = *ka;if (ka->sa.sa_flags & SA_ONESHOT)ka->sa.sa_handler = SIG_DFL;break; /* will return non-zero "signr" value */}...}fatal:if (sig_kernel_coredump(signr)) {if (print_fatal_signals)print_fatal_signal(ksig->info.si_signo);...do_coredump(&ksig->info); /* 产生 coredump 文件 */}...
}
4. 调试 coredump 问题
产生的 coredump 文件,可用 gdb
进行调试 。
5. 后记
本文涉及到信号处理,更多信号处理细节可参考 Linux信号处理简析 。本文没有对程序的 coredump 细节进行展开,它有点复杂,留待后续有机会再进行述说。