I have written a short C "wrapper" function for an asm inline assembly, as below. The assembly code consists of a while loop, computing several vector dot product using SSE2. I am using GCC 4.8.4 on Ubuntu 14.04 on an x86. The following code can be assembled "without problem" under
gcc -fpic -O2 -msse2 -S foo.c
But when I do
gcc -c foo.s
an error is triggered:
foo.c: Assembler messages:
foo.c:2: Error: unknown pseudo-op: `.while5'
I checked the assembler ouput "foo.s" and found something strange.
C file "foo.c":
#include <emmintrin.h>
void foo (int kk, double *A, double *B, double *ALPHA, double *C, int ldc) {
asm("movl %0, %%ecx\n\t" /* kk -> %ecx */
"movl %3, %%eax\n\t" /* A -> %eax */
"movl %4, %%edx\n\t" /* B -> %edx */
/* a while-loop */
".while%=\n\t"
"movsd (%%edx), %%xmm5\n\t"
"unpcklpd %%xmm5, %%xmm5\n\t"
"movapd %%xmm5, %%xmm6\n\t"
"movapd (%%eax), %%xmm4\n\t"
"mulpd %%xmm4, %%xmm6\n\t"
"movapd 16(%%eax), %%xmm7\n\t"
"addl $32, %%eax\n\t"
"addpd %%xmm6, %%xmm0\n\t"
"mulpd %%xmm7, %%xmm5\n\t"
"addpd %%xmm5, %%xmm1\n\t"
"movsd 8(%%edx), %%xmm6\n\t"
"addl $16, %%edx\n\t"
"unpcklpd %%xmm6, %%xmm6\n\t"
"mulpd %%xmm6, %%xmm4\n\t"
"addpd %%xmm4, %%xmm2\n\t"
"mulpd %%xmm6, %%xmm7\n\t"
"addpd %%xmm7, %%xmm3\n\t"
"subl $1, %%ecx\n\t" /* kk-- */
"testl %%ecx, %%ecx\n\t" /* kk = 0 ? */
"jne .while%=\n\t"
/* other input operands passing */
"movl %5, %%ecx\n\t" /* C -> %ecx */
"movl %1, %%eax\n\t" /* ALPHA -> %eax, then C0 -> %eax */
"movl %2, %%edx\n\t" /* ldc -> %edx */
/* write-back */
"movsd (%%eax), %%xmm7\n\t"
"unpcklpd %%xmm7, %%xmm7\n\t"
"leal (%%ecx,%%edx,8), %%eax\n\t" /* C0=C+ldc */
"mulpd %%xmm7, %%xmm0\n\t"
"addpd (%%ecx), %%xmm0\n\t"
"movapd %%xmm0, (%%ecx)\n\t"
"mulpd %%xmm7, %%xmm2\n\t"
"addpd (%%eax), %%xmm2\n\t"
"movapd %%xmm2, (%%eax)\n\t"
"mulpd %%xmm7, %%xmm1\n\t"
"addpd 16(%%ecx), %%xmm1\n\t"
"movapd %%xmm1, 16(%%ecx)\n\t"
"mulpd %%xmm7, %%xmm3\n\t"
"addpd 16(%%eax), %%xmm3\n\t"
"movapd %%xmm3, 16(%%eax)\n\t"
: /* no output operands */
: "m"(kk), "m"(ALPHA), "m"(ldc), "m"(A), "m"(B), "m"(C) /* input operands */
: "eax", "edx", "ecx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" /* clobbers */ );
}
assembler output (the while-loop looks odd!)
.LFB503:
.cfi_startproc
#APP
# 4 "foo.c" 1
movl 4(%esp), %ecx
movl 8(%esp), %eax
movl 12(%esp), %edx
.while5
movsd (%edx), %xmm5
unpcklpd %xmm5, %xmm5
movapd %xmm5, %xmm6
movapd (%eax), %xmm4
mulpd %xmm4, %xmm6
movapd 16(%eax), %xmm7
addl $32, %eax
addpd %xmm6, %xmm0
mulpd %xmm7, %xmm5
addpd %xmm5, %xmm1
movsd 8(%edx), %xmm6
addl $16, %edx
unpcklpd %xmm6, %xmm6
mulpd %xmm6, %xmm4
addpd %xmm4, %xmm2
mulpd %xmm6, %xmm7
addpd %xmm7, %xmm3
subl $1, %ecx
testl %ecx, %ecx
jne .while5
movl 20(%esp), %ecx
movl 16(%esp), %eax
movl 24(%esp), %edx
movsd (%eax), %xmm7
unpcklpd %xmm7, %xmm7
leal (%ecx,%edx,8), %eax
mulpd %xmm7, %xmm0
addpd (%ecx), %xmm0
movapd %xmm0, (%ecx)
mulpd %xmm7, %xmm2
addpd (%eax), %xmm2
movapd %xmm2, (%eax)
mulpd %xmm7, %xmm1
addpd 16(%ecx), %xmm1
movapd %xmm1, 16(%ecx)
mulpd %xmm7, %xmm3
addpd 16(%eax), %xmm3
movapd %xmm3, 16(%eax)
# 0 "" 2
#NO_APP
ret
.cfi_endproc
Can anyone kindly refer to me what has happened? I don't think it is my compiler's problem. There must be something wrong with my code. Thx!
.while%=:
. Note that-S
does not assemble, it just produces assembly output so it can contain errors too. – Jesterwhile
loop, but simple labels and jumps. – too honest for this site.L
, so.Lwhile:
. I thought that was an attempt to use a nonexisting pseudo-opcode named.while
. – fuzmov
instructions in inline asm, you should look for a way to let the compiler do it, so it can avoid unnecessary instructions in some cases. e.g. for x86-64, the args will already all be in integer registers, so it's silly to do a bunch ofmov reg,reg
instead of just letting the compiler choose regs for you. (see the x86 tag wiki for a link to my inline asm examples of how to let the compiler do as much as possible.) – Peter Cordes