1
votes
    the following code works fine on linux-x86, darwin-x86, but not for ios-armv7.

    the right output should be: 
        m[0]: 0.500000, v: 0.500000
        m[1]: 0.500000, v: 0.500000
        m[2]: 0.500000, v: 0.500000
        m[3]: 0.500000, v: 0.500000
        m[4]: 0.500000, v: 0.500000

    but I found the wrong output:
        m[0]: 0.500000, v: 0.500000
        m[1]: 0.500000, v: 0.000000
        m[2]: 0.500000, v: 0.000000
        m[3]: 0.500000, v: 0.000000
        m[4]: 0.500000, v: 0.000000

    I also found the stange when it's built for ios-armv7:

    [a] remove function 'func', move the function body to 'main' function, it works fine
    [b] declare the array 'm[5]' as 'double m[5]', it works fine
    [c] set the variable 'v' as 'v = 0.5 or v = sqrt(2.0f/8)', it works fine
    [d] if the gcc optimize option is '-O0', it works fine, but when it's '-O1 or -O2', wrong output occurs


    My iPad1 was cracked, so I can cross-compile a executable on my MacBook Air, and 'scp' the executable to iPad1 and run it. The following is details:

    1. cross-compile a executable on Mac:
    /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/llvm-gcc -O1 -Wall -arch armv7 -mcpu=cortex-a8 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS6.1.sdk -I/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS6.1.sdk/usr/include -D__IPHONE_OS__ -miphoneos-version-min=4.0 foo.c

    2. 'scp' the executable to iPad
    scp a.out [email protected]:~

    3. 'ssh' to iPad
    ssh [email protected] #the default password is 'alpine'

    4. run a.out on iPad
    ./a.out
#include <stdio.h>
#include <math.h>

int
func(int n) /* [a] */
{
    int i;
    float m[5]; /* [b] */
    double v;

    v = sqrt(2.0f/n); /* [c] */

    for(i=0;i<5;++i) {
        m[i]=v;
        printf("m[%d]: %f, v: %f\n", i, m[i], v);
    }

    return 0;
}


int
main(int argc, char **argv)
{
    return func(8);
}

You can also find the whole code on https://gist.github.com/ashun/5992120

The following is the assembly. you can find the difference with the help of command 'vim -d'

  1. assembly of the previous code, declare the array 'm[5]' as 'double m[5]'
        .section    __TEXT,__text,regular,pure_instructions
        .section    __TEXT,__textcoal_nt,coalesced,pure_instructions
        .section    __TEXT,__const_coal,coalesced
        .section    __TEXT,__picsymbolstub4,symbol_stubs,none,16
        .section    __TEXT,__StaticInit,regular,pure_instructions
        .syntax unified
        .section    __TEXT,__text,regular,pure_instructions
        .globl  _func
        .align  2
        .code   16
        .thumb_func _func
    _func:
        push    {r4, r5, r6, r7, lr}
        add r7, sp, #12
        str r8, [sp, #-4]!
        sub sp, #8
        vmov.f32    s0, #2.000000e+00
        movw    r8, :lower16:(L_.str-(LPC0_0+4))
        vmov    s2, r0
        movt    r8, :upper16:(L_.str-(LPC0_0+4))
        vcvt.f32.s32    d1, d1
    LPC0_0:
        add r8, pc
        movs    r4, #0
        vdiv.f32    s0, s0, s2
        vsqrt.f32   s0, s0
        vcvt.f64.f32    d16, s0
        vmov    r5, r6, d16
    LBB0_1:
        mov r1, r4
        mov r0, r8
        mov r2, r5
        mov r3, r6
        vstr.64 d16, [sp]
        adds    r4, #1
        blx _printf
        cmp r4, #5
        bne LBB0_1
        movs    r0, #0
        add sp, #8
        ldr r8, [sp], #4
        pop {r4, r5, r6, r7, pc}

        .globl  _main
        .align  2
        .code   16
        .thumb_func _main
    _main:
        push    {r7, lr}
        mov r7, sp
        movs    r0, #8
        bl  _func
        movs    r0, #0
        pop {r7, pc}

        .section    __TEXT,__cstring,cstring_literals
    L_.str:
        .asciz   "m[%d]: %f, v: %f\n"


    .subsections_via_symbols
  1. assembly of the previous code, declare the array 'm[5]' as 'double m[5]'
        .section    __TEXT,__text,regular,pure_instructions
        .section    __TEXT,__textcoal_nt,coalesced,pure_instructions
        .section    __TEXT,__const_coal,coalesced
        .section    __TEXT,__picsymbolstub4,symbol_stubs,none,16
        .section    __TEXT,__StaticInit,regular,pure_instructions
        .syntax unified
        .section    __TEXT,__text,regular,pure_instructions
        .globl  _func
        .align  2
        .code   16
        .thumb_func _func
    _func:
        push    {r4, r5, r6, r7, lr}
        add r7, sp, #12
        str r8, [sp, #-4]!
        **vpush {d8}**
        sub sp, #8
        vmov.f32    s0, #2.000000e+00
        movw    r8, :lower16:(L_.str-(LPC0_0+4))
        vmov    s2, r0
        movt    r8, :upper16:(L_.str-(LPC0_0+4))
        vcvt.f32.s32    d1, d1
    LPC0_0:
        add r8, pc
        movs    r4, #0
        vdiv.f32    s0, s0, s2
        vcvt.f64.f32    d16, s0
        vsqrt.f64   d8, d16
        vmov    r5, r6, d8
    LBB0_1:
        mov r1, r4
        mov r0, r8
        mov r2, r5
        mov r3, r6
        vstr.64 d8, [sp]
        adds    r4, #1
        blx _printf
        cmp r4, #5
        bne LBB0_1
        movs    r0, #0
        add sp, #8
        vpop    {d8}
        ldr r8, [sp], #4
        pop {r4, r5, r6, r7, pc}

        .globl  _main
        .align  2
        .code   16
        .thumb_func _main
    _main:
        push    {r7, lr}
        mov r7, sp
        movs    r0, #8
        bl  _func
        movs    r0, #0
        pop {r7, pc}

        .section    __TEXT,__cstring,cstring_literals
    L_.str:
        .asciz   "m[%d]: %f, v: %f\n"


    .subsections_via_symbols
1
That does look like a bug in the compiler indeed. Maybe you can add the generated assembly of func for the -O1 case. The loop may be unrolled in which case replacing 5 by 2 may be more appropriate, if that still reproduces the bug.Bryan Olivier
Thanks, I add the assembly of the sample code.aaashun

1 Answers

1
votes

It is not clear which assembly goes wrong as both are marked as 'declare the array m[5] as double m[5]' and unfortunately I don't have the hardware nor the cross-compiler to reproduce your problem.

Remarkably the loops in both assembly codes are very similar. The only difference being that v is located in d16 in the first and in d8 in the second. The v that is passed to the printf is located in (r5,r6) in both loops and correctly copied to (r2,r3) before calling printf. For variadic functions the floating point registers shall not be used to pass parameters, contrary to non-variadic functions. Thus both loops look correct.

The only explanation I can think of is a mismatch in the ABI used for the compiled code and the ABI of the library containing printf. Especially considering that the compiled code comes from a cross compiler and I'm assuming the printf comes from a dynamic library on the system. As the printf is called conforming to the EABI for ARM, I think the bug is in the printf of the library.

If your cross compiler allows static linking, you may try that as you will be using a library that corresponds with the compiler. Of course the application becomes bigger, but it could at least confirm suspicion on the implementation of the printf. You may want to check if the library is compiled with an EABI complying compiler. If you can step through the printf on a debugger on the iPad, then you should be able to determine where the printf is taking its floating point parameter from. It should take it from (r2,r3).

Unfortunately I can not give a conclusive answer, but I hope my pointers for further investigation are helpful.