0
votes

I am trying to compile and example sing assembly codes for NEON for cortex A8 to use this binary on BeagleBone Black board (BBB). I use eclipse tool GCC compiler and Assembler listed below,

GCC : arm-linux-gnueabi-gcc

ASSEMBLER : arm-linux-gnueabi-as

The following error occurs in an example and for every example I use I find similar errors

Description Path    Resource    Location    Type
SP not allowed in register list -- `ldmia r12,{r4-r11,r13,lr}'      EXAMPLE_NEON    line 61, external location: /tmp/ccTXrczs.s C/C++ Problem

the code i used

/************************
* neon.c * ************************/

#include <stdio.h>


__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];

void* neontest_save_buffer[16];


void
neontest(unsigned short int *a, unsigned short int *b,
                unsigned short int* q)
{
  __asm__(
"   movw        r12, #:lower16:neontest_save_buffer\n\t"
"   movt        r12, #:upper16:neontest_save_buffer\n\t"
"   stmia       r12, {r4-r11, r13, lr}        @ save registers\n\t"
"   vld1.16     {q1}, [r0:128]\n\t"
"   vld1.16     {q2}, [r1:128]\n\t"
"   vadd.i16    q0, q1, q2\n\t"
"   vst1.32     {q0}, [r2:128]\n\t"
"   movw        r12, #:lower16:neontest_save_buffer\n\t"
"   movt        r12, #:upper16:neontest_save_buffer\n\t"
"   ldmia       r12, {r4-r11, r13, lr}        @ reload all registers and return\n\t"
"finish:\n\t"
    );

}

int
main(void)
{
    int i;

    for (i=0; i<8; i++)
    {
        data1[i]=i*10;
        data2[i]=5;
        out[i]=0;
    }

    neontest(data1, data2, out);

    printf("output is: ");
    for (i=0; i<7; i++)
    {
        printf("%d, ", out[i]);
    }
    printf("%d\n", out[i]);

  return(0);
}
2
Can you provide a full sample to let people try to compile?Aif

2 Answers

0
votes

Seems like you're using Thumb32 mode, in which sp cannot be in the list of registers (from [1]).

Why do you need to save the stack if you're not setting a new one anyway? Just try to remove r13 from the stm block and ldm block.

0
votes

Default the compiler uses thumb mode, add "-marm" to the command line to compile the code in ARM mode:

arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon.c

You can also adjust the code to let the compiler do the register save/restore, this way the code can be compiled for both ARM and thumb2 instruction set:

#include <stdio.h>


__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];

void
neontest(unsigned short int *a, unsigned short int *b,
                unsigned short int* q)
{
    __asm volatile (
"   vld1.16     {q1}, [%[a]:128]\n\t"
"   vld1.16     {q2}, [%[b]:128]\n\t"
"   vadd.i16    q0, q1, q2\n\t"
"   vst1.32     {q0}, [%[q]:128]\n\t"

     : [q] "+r" (q)
     : [a] "r" (a), [b] "r" (b)
     : "q0", "q1", "q2"
    );
}

int
main(void)
{
    int i;

    for (i=0; i<8; i++)
    {
        data1[i]=i*10;
        data2[i]=5;
        out[i]=0;
    }

    neontest(data1, data2, out);

    printf("output is: ");
    for (i=0; i<7; i++)
    {
        printf("%d, ", out[i]);
    }
    printf("%d\n", out[i]);

  return(0);
}

arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon2.c

arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -mthumb neon2.c