调试断点原理

调试断点，依赖于父进程和子进程之间的通信，打断点实际是在被调试的程序中，改变断点附近程序的代码，这个断点使得被调试的程序，暂时停止，然后发送信号给父进程（调试器进程），然后父进程能够得到子进程的变量和状态。达到调试的目的。

修改断点附近程序的指令地址为0xcc，这个地址的指令就是int3，含义是，是当前用户态程序发生中断，告诉内核当前程序有断点，那么内核中会向当前进程发送SIGTRAP信号，使当前进程暂停。父进程调用wait函数，等待子进程的运行状态发生改变，这时子进程由于int3中断，子进程暂停，父进程就可以开始调试子进程的程序了。

我们希望在程序执行第二个printf时，打断点，对break.o执行objdump-S break.o

#include

int main()

{

4004f4: 55 push %rbp

4004f5: 48 89 e5 mov %rsp,%rbp

printf( "~~~~~~~~~~~~>Before breakpoint\n" );

4004f8: bf 10 06 40 00 mov $0x400610,%edi

4004fd: e8 ee fe ff ff callq 4003f0

printf( "~~~~~~~~~~~~>After breakpoint\n" );

400502: bf 30 06 40 00 mov $0x400630,%edi

400507: e8 e4 fe ff ff callq 4003f0

printf( "~~~~~~~~~~~~>final\n" );

40050c: bf 4f 06 40 00 mov $0x40064f,%edi

400511: e8 da fe ff ff callq 4003f0

return 0;

400516: b8 00 00 00 00 mov $0x0,%eax

}

通过上面dump，想在输出 Afterbreakpoint这行代码打断点，可行的做法就是把地址为0x400502这一行代码的保存起来，然后把它修改成0xcc,如果想继续执行的时候，在把原来的代码复写到原来的地方，再通知子进程执行即可。

测试程序代码：

11 int main(void)

12 {

13 int wait_val;

14 int pid;

15 long addr;

16 long data;

17 long orginData;

18 struct user_regs_struct regs;

19 setvbuf(stdout,NULL,_IONBF,0); //printf stdout 默认行缓冲，setvbuf输出无buf，直接输出

20 switch (pid = fork()) {

21 case -1:

22 perror("fork");

23 break;

24 case 0:

25 ptrace(PTRACE_TRACEME,0, 0, 0); //子进程设置traceme，使得父进程trace子进程

26 execl("/home/djj/tmp/break.o", NULL, NULL);

27 default:

28 wait(&wait_val); //子进程设置了traceme，在执行exec函数的时候，内核会首先产生SIGTRAP信号，先给父进程trace子进程的一个机会。

29 ptrace(PTRACE_GETREGS,pid, 0, ?s);

30 addr = 0x400502; //需要打断点的程序地址

31 data=ptrace(PTRACE_PEEKTEXT, pid, (void *)addr,NULL); //获得程序代码

32 orginData = (data &~0xff) | 0xcc; //设置代码为int3指令，中断指令

33 ptrace(PTRACE_POKETEXT, pid, (void *)addr, orginData ); //把代码写到内存中

34 ptrace(PTRACE_CONT,pid, NULL, NULL); //通知子进程继续执行

35 wait(&wait_val); //等待子进程程序执行到断点，产生SIGTRAP信号

36 if(WIFSTOPPED(wait_val)){

37 ptrace(PTRACE_GETREGS,pid, 0, ?s); //取出rip的值

38 regs.rip-=1; //要重新执行被替换的指令，这里rip必须减一。

39 printf("break\n");

40 ptrace(PTRACE_SETREGS,pid,0,?s);

41 ptrace(PTRACE_POKETEXT,pid,(void *)addr,data);

42 ptrace(PTRACE_CONT,pid,NULL,NULL); //子进程继续执行

43 }

44 wait(NULL);

45 }

46 return 0;

47 }

运行结果：

这里值得注意的一点就是子进程在调用了traceme后，如果执行exec函数，会产生SIGTRAP信号，首先看traceme做的一些事情：

222 int ptrace_traceme(void)

223 {

224 int ret = -EPERM;

225

226 write_lock_irq(&tasklist_lock);

227 /* Are we already beingtraced? */

228 if (!current->ptrace) {

229 ret =security_ptrace_traceme(current->parent);

230 /*

231 * Check PF_EXITINGto ensure ->real_parent has not passed

232 * exit_ptrace().Otherwise we don't report the error but

233 * pretend->real_parent untraces us right after return.

234 */

235 if (!ret &&!(current->real_parent->flags & PF_EXITING)) {

236 current->ptrace = PT_PTRACED; //设置进程的ptrace为 PT_PTRACED，标志子进程被父进程trace

237 __ptrace_link(current, current->real_parent);

238 }

239 }

240 write_unlock_irq(&tasklist_lock);

241

242 return ret;

243 }

子进程ptrace标志了 PT_PTRACED,在执行exec函数的时候，会去触发SIGTRAP信号

1315 bprm->recursion_depth = depth;

1316 if (retval>= 0) {

1317 if(depth == 0)

1318 tracehook_report_exec(fmt, bprm, regs); //产生SIGTRAP信号

tracehook_report_exec函数的实现：

200 static inline voidtracehook_report_exec(struct linux_binfmt *fmt,

201 struct linux_binprm *bprm,

202 struct pt_regs *regs)

203 {

204 if(!ptrace_event(PT_TRACE_EXEC, PTRACE_EVENT_EXEC, 0) &&

205 unlikely(task_ptrace(current) & PT_PTRACED)) //如果标志了 PT_PTRACED

206 send_sig(SIGTRAP,current, 0); //那么就向当前进程发送SIGTRAP信号，使得当前进程暂停

207 }

exec函数没研究过，猜测逻辑应该是，按照object文件中的代码段，数据段设置内存结构，在最后将指向下一条指令的地址指向刚刚代码段的起始地址，那么在程序返回用户态之后，就会按照新加载代码段开始的地方开始执行程序。内核中在加载完内存结构之后，如果当前进程标志了ptrace字段，那么暂停当前进程，通知trace的父进程。

系统在初始化的过程中已经定义了int3的中断门

822 void __init early_trap_init(void)

823 {

824 set_intr_gate_ist(1,&debug, DEBUG_STACK);

825 /* int3 can be called fromall */

826 set_system_intr_gate_ist(3,&int3, DEBUG_STACK); //定义了int3的中断门

827 set_intr_gate(14,&page_fault);

828 load_idt(&idt_descr);

829 }

中断门的int3，联系到了arch/x86/kernel/entry_32.S

1476 ENTRY(int3)

1477 RING0_INT_FRAME

1478 pushl $-1 # mark this as an int

1479 CFI_ADJUST_CFA_OFFSET 4

1480 SAVE_ALL

1481 TRACE_IRQS_OFF

1482 xorl %edx,%edx #zero error code

1483 movl %esp,%eax #pt_regs pointer

1484 call do_int3 //执行do_int3函数

1485 jmp ret_from_exception

1486 CFI_ENDPROC

1487 END(int3)

可以看到最后用户态进程产生的int3中断，会触发执行do_int3函数，其中的一部分代码：

470 preempt_conditional_sti(regs);

471 do_trap(3, SIGTRAP, "int3",regs, error_code, NULL);

472 preempt_conditional_cli(regs);

最终看到调用了do_trap函数，这个函数的作用就是给当前进程发送SIGTRAP信号，使得当前进程暂停，同时这个进程的暂停，就会唤醒wait函数。使得父进程调用ptrace函数来获得子进程的相关信息。

对于一个进程想要去调试一个正在运行的进程，那么会调用ptrace，请求PTRACE_ATTACH去attach一个pid，这个原理很简单。就是通过当前namespace根据pid，得到task_struct，这个原理请参考pid Namespace浅分析。把ptrace字段设置成为PT_PTRACED。同时这个子进程向自己发送SIGSTOP信号，个人觉得这个暂停的意义就是给父进程一个机会，去设置断点等信息。

700 child =ptrace_get_task_struct(pid); //根据pid和namespace得到task_struct

701 if (IS_ERR(child)) {

702 ret =PTR_ERR(child);

703 goto out;

704 }

705

706 if (request ==PTRACE_ATTACH) { //如果是attach请求

707 ret =ptrace_attach(child); //设置ptrace字段为PT_PTRACED

708 /*

709 * Somearchitectures need to do book-keeping after

710 * a ptraceattach.

711 */

712 if (!ret)

713 arch_ptrace_attach(child);

714 gotoout_put_task_struct;

715 }

ptrace_attach函数的具体逻辑：

200 task->ptrace =PT_PTRACED; //设置ptrace字段为PT_PTRACED

201 if(capable(CAP_SYS_PTRACE))

202 task->ptrace |=PT_PTRACE_CAP;

203

204 __ptrace_link(task,current);

205 send_sig_info(SIGSTOP,SEND_SIG_FORCED, task); //向pid那个进程发送暂停信号SIGSTOP

总结：

调试的大体原理：通过设置被调试的进程ptrace字段，标志这个进程被trace，断点附近的程序代码被替换成了int3，中断程序，引发了do_int3函数，导致了被trace进程的暂停，这样父进程就能通过ptrace系统调用获得子进程的运行情况了。以上分析代表个人观点，个人水平有限，不正确的地方希望大家指出，积极讨论。

参考文章：

1.http://blog.csdn.net/dog250/article/details/5303228