Introduction series: gdb learning - view the output of disassembly and learn assembly

   this article aims to summarize the backup and facilitate future query. As it is a personal summary, if there is any error, please correct it; In addition, most of the content comes from the Internet, books, and various manuals. If you infringe, please inform us and delete the post immediately to apologize.
   QQ group No.: 513683159 [mutual learning]
Content source:
Debug Hack Chinese version #13
  Assembly language (instruction brief)
  Assembly instruction comparison table
  x86 assembly instruction set (Annotated)


  ① create source file: VIM assembly. C

int global;											//global variable
int func_op(void)									//Callback function
        return 0;
void func(void)									 	//Subfunction function
        unsigned long long val64 = 0;				//local variable
        val64 = 0xffffeeeeddddcccc;					//Direct assignment hexadecimal, easy to read assembly code
        global = 0x5555;							//Direct assignment hexadecimal, easy to read assembly code	
#define MAX_WORD 16 									// Macro definition
int main(void)										//Main function
        unsigned int i = 0;							//local variable
        char words[MAX_WORD]="Hello World";			//Character array
        char word;									//Character variable
        int (*func_pointer)(void) = &func_op;		//Function pointer assignment
        i = 0xabcd;                                 //Direct assignment hexadecimal, easy to read assembly code

        if(i !=0x1234)								//if judgment statement

        while(i == 0)								//whlie loop statement

        func();										//function call
        i = func_pointer();							//Function pointer call

        for(i = 0;i < MAX_WORD-1;i++)				//for loop
                word = words[i];
        return 0;

   ② compiler: GCC - wall - o0 assembly. C - O assembly - M32. See the following for details: gcc understanding
    - Wall: generate all warning messages
    - o0: optimize compilation and connection. 0 is not optimized
     - m32: the existing environment is 64 bit. If you want to compile to 32 bit, you need to download the corresponding library: sudo apt get install libc6-dev-i386 GCC multilib G + + - multilib
  ③ disassembly of objdump: objdump - D -- no show raw insn assembly
    -- no show raw insn: do not print instruction bytes when disassembling instructions.
    - D / - disassemble: displays the assembly mnemonics of machine instructions from objfile
   you can view the help manual through man objdump and enter the corresponding compilation options after / to find the corresponding comments. N move down and N move up.

Assembler interpretation

assemble:     file format elf32-i386
0804843b <func_op>:
 804843b:	push   %ebp
 804843c:	mov    %esp,%ebp
 804843e:	mov    $0x0,%eax
 8048443:	pop    %ebp
 8048444:	ret    

08048445 <func>:
 8048445:	push   %ebp
 8048446:	mov    %esp,%ebp
 8048448:	sub    $0x10,%esp
 804844b:	movl   $0x0,-0x8(%ebp)					//4️⃣-2
 8048452:	movl   $0x0,-0x4(%ebp)
 8048459:	movl   $0xddddcccc,-0x8(%ebp)
 8048460:	movl   $0xffffeeee,-0x4(%ebp)
 8048467:	movl   $0x5555,0x804a020				//4️⃣-3
 8048471:	nop
 8048472:	leave  
 8048473:	ret    

08048474 <main>:
 8048474:	lea    0x4(%esp),%ecx
 8048478:	and    $0xfffffff0,%esp
 804847b:	pushl  -0x4(%ecx)
 804847e:	push   %ebp								//Allocate stack frame work
 804847f:	mov    %esp,%ebp						//Allocate stack frame work
 8048481:	push   %ecx
 8048482:	sub    $0x24,%esp
 8048485:	mov    %gs:0x14,%eax
 804848b:	mov    %eax,-0xc(%ebp)
 804848e:	xor    %eax,%eax
 8048490:	movl   $0x0,-0x24(%ebp)
 8048497:	movl   $0x6c6c6548,-0x1c(%ebp)
 804849e:	movl   $0x6f57206f,-0x18(%ebp)
 80484a5:	movl   $0x646c72,-0x14(%ebp)
 80484ac:	movl   $0x0,-0x10(%ebp)
 80484b3:	movl   $0x804843b,-0x20(%ebp)
 80484ba:	movl   $0xabcd,-0x24(%ebp)			    //one ️⃣ Assign 0xabcd to - 0x24(%ebp)
 80484c1:	cmpl   $0x1234,-0x24(%ebp)			 	//two ️⃣- 1: Compare the difference between the two   
 80484c8:	je     80484d7 <main+0x63>				//two ️⃣- 2: If equal, jump there
 80484ca:	movl   $0x0,-0x24(%ebp)					//two ️⃣- 3: If not, i is assigned a value of 0
 80484d1:	jmp    80484d7 <main+0x63>				//two ️⃣- 4: Jump directly there
 80484d3:	addl   $0x1,-0x24(%ebp)					//three ️⃣- 1: Give i+1
 80484d7:	cmpl   $0x0,-0x24(%ebp)					//three ️⃣- 2: Judge that i is equal to 0
 80484db:	je     80484d3 <main+0x5f>				//three ️⃣- 3: If equal, jump there (3) ️⃣- 1)
 80484dd:	call   8048445 <func>					//four ️⃣- 1: Call func function
 80484e2:	mov    -0x20(%ebp),%eax
 80484e5:	call   *%eax							//five ️⃣:  Call of function pointer
 80484e7:	mov    %eax,-0x24(%ebp)
 80484ea:	movl   $0x0,-0x24(%ebp)					//six ️⃣- 1: i is reassigned to 0
 80484f1:	jmp    8048505 <main+0x91>				//six ️⃣- 2: Jump there unconditionally (6) ️⃣- 9)
 80484f3:	lea    -0x1c(%ebp),%edx					//six ️⃣- 3: Store the - 0x1c(%ebp) address in edx
 80484f6:	mov    -0x24(%ebp),%eax					//six ️⃣- 4: Store the - 0x24(%ebp) value in eax
 80484f9:	add    %edx,%eax						//six ️⃣- 5: Add the address of edx to the value of eax and store it in eax
 80484fb:	movzbl (%eax),%eax						//six ️⃣- 6: Take the value of eax address and store it in eax, add the target remaining bit and fill in 0
 80484fe:	mov    %al,-0x25(%ebp)					//six ️⃣- 7: al is the lower eight bits of ax, because char only needs 8 bits
 8048501:	addl   $0x1,-0x24(%ebp)					//6️⃣-8: 
 8048505:	cmpl   $0xe,-0x24(%ebp)					//six ️⃣- 9: Compare i with MAX_WORD-1
 8048509:	jbe    80484f3 <main+0x7f>				//six ️⃣- 10: If the comparison result is less than, skip to this place (6) ️⃣- 3) Continue the cycle
 804850b:	mov    $0x0,%eax						//7️⃣
 8048510:	mov    -0xc(%ebp),%ecx
 8048513:	xor    %gs:0x14,%ecx
 804851a:	je     8048521 <main+0xad>
 804851c:	call   8048310 <__stack_chk_fail@plt>
 8048521:	add    $0x24,%esp
 8048524:	pop    %ecx
 8048525:	pop    %ebp
 8048526:	lea    -0x4(%ecx),%esp
 8048529:	ret    
 804852a:	xchg   %ax,%ax
 804852c:	xchg   %ax,%ax
 804852e:	xchg   %ax,%ax

  1 ️⃣ movl S,D instructions:
      mov is the transmission instruction, which transmits S to D (S - > d)
     l indicates (unsigned) int / long / char * type = 4 bytes.
    - 0x24(%ebp) indicates the address value after subtracting 0x24 from the address value in the ebp register. That is, the address of the i variable.
   therefore: movl movl $0xabcd, - 0x24 (% EBP) = I = 0xabcd;xabcd,-0x24(%ebp)= i = 0xabcd;
  2 ️⃣ if statement:
  2 ️⃣- 1: cmpl S1,S2 command:
     cmp is the comparison instruction [compare], and the difference between S1 and S2 (S1-S2)
   therefore, cmpl cmpl $0x1234, - 0x24 (% EBP) = 0x1234-ix1234,-0x24(%ebp) = 0x1234-i
  2 ️⃣- 2: je Label Directive:
   je is a conditional jump instruction: if the comparison result is 0 (equal), jump to label. If not, continue to execute.
  je 80484d7 <main+0x63>
  2 ️⃣- 4: jmp Label Directive:
   jmp is an unconditional jump instruction: jump directly to Lable.
  3 ️⃣ while statement:
  3 ️⃣- 1: addl S,D instruction:
     add is an addition instruction, which stores D+S into D (s + D - > d)
   so addl ADDL $0x1, - 0x24 (% EBP) = I + 1x1,-0x24(%ebp)= i + 1
  4 ️⃣ Function call:
  4 ️⃣- 1: call instruction: you can jump to the function and then return. That is, you will jump to the func() function.
  4 ️⃣- 2: It can be seen that each assignment is performed twice, because this is the case when defining a 64 bit variable on a 32-bit operating system. This is to assign a value to a local variable, represented by the offset of the ebp register, and the value is written to the address in the stack.
  4 ️⃣- 3: Here is the assignment of global variables. It can be seen that the expression method of address here is different from that of local variables above. It is directly in the form of address value.
  5 ️⃣ Call of function pointer:
   call the function pointer func_op(), and the function pointer is saved in eax. To *% eax, add an asterisk (*)
  6 ️⃣ for loop:
  6 ️⃣- 3: Lea & S, D instruction:
  6 ️⃣- 6: Movzbl & S, D instruction:
The statements in the book are movzbl -0x20(%ebp,%eax,1),%eax,-0x20(%ebp,%eax,1). This is the meaning of scalable index addressing: * (- 0x20+ebp+eax*1)
     corresponding to (6) in the program ️⃣- 3)+(6 ️⃣- 4)+(6 ️⃣- 5)+(6 ️⃣- 6) These four steps are implemented: assign 1 byte on the i(%eax) bit of word(-0x20%ebp) to eax, that is, word=word[i]
The      movzbl instruction is a zero extension, which is responsible for copying a byte and filling other bits in the operand with 0.
  7 ️⃣ Range value setting:
     return statement, assign 0 to the return value
If the return value of the     return statement is less than 4 bytes such as int, the return value should be put into eax, which takes the eax register as the general register.

Tags: C GDB

Posted on Thu, 07 Oct 2021 14:41:16 -0400 by kimberlc