3
votes

I'm currently playing with gcc, gdb and assembly and trying to understand it. I've went over some tutorials and got some of the key point.

So I've decided to use a small .c file, took a look at the result, and some things are not really clear.

This is the file:

#include <stdio.h>

void func1(){
    int x = 8;
    int y = x + 5;
}

void func2(){
    int x = 12;
}

void func3(){
    int x = 10+20;
}

void func4(){
    int x;
    x = 1;
}

void func5(){
    int x;
    int y;

    x = 2;
    y = 1;
}

void func6(){
    int x;
    int y;

    x=15;
    y=6;
    y += x;
}

int main(int argc, char *argv[]) {
    func1();
    func2();
    func3();
    func4();
    func5();
    func6();
    return 20;
}

These are the disassemble results:

Dump of assembler code for function main:
0x0000000100000f60 <+0> :   push   %rbp
0x0000000100000f61 <+1> :   mov    %rsp,%rbp
0x0000000100000f64 <+4> :   sub    $0x10,%rsp
0x0000000100000f68 <+8> :   movl   $0x0,-0x4(%rbp)
0x0000000100000f6f <+15>:   mov    %edi,-0x8(%rbp)
0x0000000100000f72 <+18>:   mov    %rsi,-0x10(%rbp)
0x0000000100000f76 <+22>:   callq  0x100000ed0 <func1>
0x0000000100000f7b <+27>:   callq  0x100000ef0 <func2>
0x0000000100000f80 <+32>:   callq  0x100000f00 <func3>
0x0000000100000f85 <+37>:   callq  0x100000f10 <func4>
0x0000000100000f8a <+42>:   callq  0x100000f20 <func5>
0x0000000100000f8f <+47>:   callq  0x100000f40 <func6>
0x0000000100000f94 <+52>:   mov    $0x14,%eax
0x0000000100000f99 <+57>:   add    $0x10,%rsp
0x0000000100000f9d <+61>:   pop    %rbp
0x0000000100000f9e <+62>:   retq

Dump of assembler code for function func1:
0x0000000100000ed0 <+0> :   push   %rbp
0x0000000100000ed1 <+1> :   mov    %rsp,%rbp
0x0000000100000ed4 <+4> :   movl   $0x8,-0x4(%rbp)
0x0000000100000edb <+11>:   mov    -0x4(%rbp),%eax
0x0000000100000ede <+14>:   add    $0x5,%eax
0x0000000100000ee3 <+19>:   mov    %eax,-0x8(%rbp)
0x0000000100000ee6 <+22>:   pop    %rbp
0x0000000100000ee7 <+23>:   retq
0x0000000100000ee8 <+24>:   nopl   0x0(%rax,%rax,1)

Dump of assembler code for function func2:
0x0000000100000ef0 <+0> :   push   %rbp
0x0000000100000ef1 <+1> :   mov    %rsp,%rbp
0x0000000100000ef4 <+4> :   movl   $0xc,-0x4(%rbp)
0x0000000100000efb <+11>:   pop    %rbp
0x0000000100000efc <+12>:   retq
0x0000000100000efd <+13>:   nopl   (%rax)

Dump of assembler code for function func3:
0x0000000100000f00 <+0> :   push   %rbp
0x0000000100000f01 <+1> :   mov    %rsp,%rbp
0x0000000100000f04 <+4> :   movl   $0x1e,-0x4(%rbp)
0x0000000100000f0b <+11>:   pop    %rbp
0x0000000100000f0c <+12>:   retq
0x0000000100000f0d <+13>:   nopl   (%rax)

Dump of assembler code for function func4:
0x0000000100000f10 <+0> :   push   %rbp
0x0000000100000f11 <+1> :   mov    %rsp,%rbp
0x0000000100000f14 <+4> :   movl   $0x1,-0x4(%rbp)
0x0000000100000f1b <+11>:   pop    %rbp
0x0000000100000f1c <+12>:   retq
0x0000000100000f1d <+13>:   nopl   (%rax)

Dump of assembler code for function func5:
0x0000000100000f20 <+0> :   push   %rbp
0x0000000100000f21 <+1> :   mov    %rsp,%rbp
0x0000000100000f24 <+4> :   movl   $0x2,-0x4(%rbp)
0x0000000100000f2b <+11>:   movl   $0x1,-0x8(%rbp)
0x0000000100000f32 <+18>:   pop    %rbp
0x0000000100000f33 <+19>:   retq
0x0000000100000f34 <+20>:   data16 data16 nopw %cs:0x0(%rax,%rax,1)

Dump of assembler code for function func6:
0x0000000100000f40 <+0> :   push   %rbp
0x0000000100000f41 <+1> :   mov    %rsp,%rbp
0x0000000100000f44 <+4> :   movl   $0xf,-0x4(%rbp)
0x0000000100000f4b <+11>:   movl   $0x6,-0x8(%rbp)
0x0000000100000f52 <+18>:   mov    -0x4(%rbp),%eax
0x0000000100000f55 <+21>:   mov    -0x8(%rbp),%ecx
0x0000000100000f58 <+24>:   add    %eax,%ecx
0x0000000100000f5a <+26>:   mov    %ecx,-0x8(%rbp)
0x0000000100000f5d <+29>:   pop    %rbp
0x0000000100000f5e <+30>:   retq
0x0000000100000f5f <+31>:   nop

I compile this with:

gcc  -o example example.c

I'm not clear about few things:

  1. If all the function ends the same(in the code e.g. returns void) why
    • func1 ends with nopl 0x0(%rax,%rax,1)
    • func2 & func3 & func4 ends with nopl (%rax)
    • func6 ends with nop
    • func5 ends with data16 data16 nopw %cs:0x0(%rax,%rax,1).
  2. What exactly data16 data16 nopw %cs:0x0(%rax,%rax,1) means?
  3. In main there are
    • sub $0x10,%rsp
    • add $0x10,%rsp
    • Are these to allocate mem for the local variables in the method? if so why are they always rounded up to 0x10, 0x20, 0x30... isn't that a bit of a waste?
2

2 Answers

3
votes

All these nopl 0x0(%rax,%rax,1), etc. instructions are variations of the nop instruction. They are used to make sure that the functions are a multiple of 16 bytes in length. You might ask why they don't just use multiple 0x90 (nop) instructions. The answer is that if these nops are being executed, it's slightly faster to execute one long multi-byte nop like data16 data16 nopw %cs:0x0(%rax,%rax,1) or nopl (%rax) instead of executing multiple short nops. Nops may be executed when they appear inside a function; code like this is generated when the compiler wants to align a jump-target for performance. The nops are generated by the assembler who doesn't know which nops might be executed and which nops won't because that's in general not decidable.

For the part about the stack: You are compiling without optimizations and you shouldn't ask about weird code generated without optimizations. The compiler is instructed to not be smart when you compile without optimizations, so why do you expect it to conserve space?

1
votes
  1. The functions end with the retq statement. The opcode shown in disassemble is just garbage for the actual execution but may be preexecuted (and discarded) in modern predicting CPUs. You can savely ignore them. There are instructions for other CPUs that have a "branch delay" but the x86 doesn't have this feature. The gap between the retq and the next 16 byte boundary is free to let the function begin at an even address. This allows a faster execution.

  2. The data16 probably means that there is a 16 bit data that doesn't match any opcode known by the disassembler. Just ignore it, it won't affect the excution.

  3. The x86 architecture allows the access to any address without respect to alignment. But the access to unaligned variable may required more than one bus cycle for the memory access. The alignment of the stack point rsp garantuees that an access to a uint64_t causes only one bus cycle.