Given the following code
#include <stdio.h>
int main(int argc, char **argv)
{
int k = 0;
for( k = 0; k < 20; ++k )
{
printf( "%d\n", k ) ;
}
}
Using GCC 5.1 or later with
-x c -std=c99 -O3 -funroll-all-loops --param max-completely-peeled-insns=1000 --param max-completely-peel-times=10000
does partially loop unrolling, it unrolls the loop ten times and then does a conditional jump.
.LC0:
.string "%d\n"
main:
pushq %rbx
xorl %ebx, %ebx
.L2:
movl %ebx, %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 1(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 2(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 3(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 4(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 5(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 6(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 7(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 8(%rbx), %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
leal 9(%rbx), %esi
xorl %eax, %eax
movl $.LC0, %edi
addl $10, %ebx
call printf
cmpl $20, %ebx
jne .L2
xorl %eax, %eax
popq %rbx
ret
But using older versions of GCC such as 4.9.2 creates the desired assemlby
.LC0:
.string "%d\n"
main:
subq $8, %rsp
xorl %edx, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $1, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $2, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $3, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $4, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $5, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $6, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $7, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $8, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $9, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $10, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $11, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $12, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $13, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $14, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $15, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $16, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $17, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $18, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
movl $19, %edx
movl $.LC0, %esi
movl $1, %edi
xorl %eax, %eax
call __printf_chk
xorl %eax, %eax
addq $8, %rsp
ret
It there a way to force the later versions of GCC to produce the same output?
Using https://godbolt.org/g/D1AR6i to produce the assembly
EDIT: No duplicated question, since the problem to completly unroll loops with later versions of GCC has not yet been solved. Passing --param max-completely-peeled-insns=1000 --param max-completely-peel-times=10000
has not effects on the generated assembly using GCC >= 5.1
k < 9
the unroll is no performed at all... – LPs