I'm currently playing with gcc, gdb and assembly and trying to understand it. I've went over some tutorials and got some of the key point.
So I've decided to use a small .c file, took a look at the result, and some things are not really clear.
This is the file:
#include <stdio.h>
void func1(){
int x = 8;
int y = x + 5;
}
void func2(){
int x = 12;
}
void func3(){
int x = 10+20;
}
void func4(){
int x;
x = 1;
}
void func5(){
int x;
int y;
x = 2;
y = 1;
}
void func6(){
int x;
int y;
x=15;
y=6;
y += x;
}
int main(int argc, char *argv[]) {
func1();
func2();
func3();
func4();
func5();
func6();
return 20;
}
These are the disassemble results:
Dump of assembler code for function main:
0x0000000100000f60 <+0> : push %rbp
0x0000000100000f61 <+1> : mov %rsp,%rbp
0x0000000100000f64 <+4> : sub $0x10,%rsp
0x0000000100000f68 <+8> : movl $0x0,-0x4(%rbp)
0x0000000100000f6f <+15>: mov %edi,-0x8(%rbp)
0x0000000100000f72 <+18>: mov %rsi,-0x10(%rbp)
0x0000000100000f76 <+22>: callq 0x100000ed0 <func1>
0x0000000100000f7b <+27>: callq 0x100000ef0 <func2>
0x0000000100000f80 <+32>: callq 0x100000f00 <func3>
0x0000000100000f85 <+37>: callq 0x100000f10 <func4>
0x0000000100000f8a <+42>: callq 0x100000f20 <func5>
0x0000000100000f8f <+47>: callq 0x100000f40 <func6>
0x0000000100000f94 <+52>: mov $0x14,%eax
0x0000000100000f99 <+57>: add $0x10,%rsp
0x0000000100000f9d <+61>: pop %rbp
0x0000000100000f9e <+62>: retq
Dump of assembler code for function func1:
0x0000000100000ed0 <+0> : push %rbp
0x0000000100000ed1 <+1> : mov %rsp,%rbp
0x0000000100000ed4 <+4> : movl $0x8,-0x4(%rbp)
0x0000000100000edb <+11>: mov -0x4(%rbp),%eax
0x0000000100000ede <+14>: add $0x5,%eax
0x0000000100000ee3 <+19>: mov %eax,-0x8(%rbp)
0x0000000100000ee6 <+22>: pop %rbp
0x0000000100000ee7 <+23>: retq
0x0000000100000ee8 <+24>: nopl 0x0(%rax,%rax,1)
Dump of assembler code for function func2:
0x0000000100000ef0 <+0> : push %rbp
0x0000000100000ef1 <+1> : mov %rsp,%rbp
0x0000000100000ef4 <+4> : movl $0xc,-0x4(%rbp)
0x0000000100000efb <+11>: pop %rbp
0x0000000100000efc <+12>: retq
0x0000000100000efd <+13>: nopl (%rax)
Dump of assembler code for function func3:
0x0000000100000f00 <+0> : push %rbp
0x0000000100000f01 <+1> : mov %rsp,%rbp
0x0000000100000f04 <+4> : movl $0x1e,-0x4(%rbp)
0x0000000100000f0b <+11>: pop %rbp
0x0000000100000f0c <+12>: retq
0x0000000100000f0d <+13>: nopl (%rax)
Dump of assembler code for function func4:
0x0000000100000f10 <+0> : push %rbp
0x0000000100000f11 <+1> : mov %rsp,%rbp
0x0000000100000f14 <+4> : movl $0x1,-0x4(%rbp)
0x0000000100000f1b <+11>: pop %rbp
0x0000000100000f1c <+12>: retq
0x0000000100000f1d <+13>: nopl (%rax)
Dump of assembler code for function func5:
0x0000000100000f20 <+0> : push %rbp
0x0000000100000f21 <+1> : mov %rsp,%rbp
0x0000000100000f24 <+4> : movl $0x2,-0x4(%rbp)
0x0000000100000f2b <+11>: movl $0x1,-0x8(%rbp)
0x0000000100000f32 <+18>: pop %rbp
0x0000000100000f33 <+19>: retq
0x0000000100000f34 <+20>: data16 data16 nopw %cs:0x0(%rax,%rax,1)
Dump of assembler code for function func6:
0x0000000100000f40 <+0> : push %rbp
0x0000000100000f41 <+1> : mov %rsp,%rbp
0x0000000100000f44 <+4> : movl $0xf,-0x4(%rbp)
0x0000000100000f4b <+11>: movl $0x6,-0x8(%rbp)
0x0000000100000f52 <+18>: mov -0x4(%rbp),%eax
0x0000000100000f55 <+21>: mov -0x8(%rbp),%ecx
0x0000000100000f58 <+24>: add %eax,%ecx
0x0000000100000f5a <+26>: mov %ecx,-0x8(%rbp)
0x0000000100000f5d <+29>: pop %rbp
0x0000000100000f5e <+30>: retq
0x0000000100000f5f <+31>: nop
I compile this with:
gcc -o example example.c
I'm not clear about few things:
- If all the function ends the same(in the code e.g. returns void) why
- func1 ends with nopl 0x0(%rax,%rax,1)
- func2 & func3 & func4 ends with nopl (%rax)
- func6 ends with nop
- func5 ends with data16 data16 nopw %cs:0x0(%rax,%rax,1).
- What exactly data16 data16 nopw %cs:0x0(%rax,%rax,1) means?
- In main there are
- sub $0x10,%rsp
- add $0x10,%rsp
- Are these to allocate mem for the local variables in the method? if so why are they always rounded up to 0x10, 0x20, 0x30... isn't that a bit of a waste?