A complete example; no other code needed.
flash.s
.cpu cortex-m0
.thumb
.thumb_func
.global _start
_start:
ldr r0,stacktop
mov sp,r0
bl notmain
b hang
.thumb_func
hang: b .
.align
stacktop: .word 0x20001000
.thumb_func
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.thumb_func
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.thumb_func
.globl dummy
dummy:
bx lr
flash.ld
MEMORY
{
rom : ORIGIN = 0x08000000, LENGTH = 0x1000
ram : ORIGIN = 0x20000000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > rom
.rodata : { *(.rodata*) } > rom
.bss : { *(.bss*) } > ram
}
notmain.c
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
#define RCCBASE 0x40023800
#define RCC_CR (RCCBASE+0x00)
#define RCC_CFGR (RCCBASE+0x08)
#define RCC_APB1RSTR (RCCBASE+0x20)
#define RCC_AHB1ENR (RCCBASE+0x30)
#define RCC_APB1ENR (RCCBASE+0x40)
#define GPIOABASE 0x40020000
#define GPIOA_MODER (GPIOABASE+0x00)
#define GPIOA_AFRL (GPIOABASE+0x20)
#define USART2BASE 0x40004400
#define USART2_SR (USART2BASE+0x00)
#define USART2_DR (USART2BASE+0x04)
#define USART2_BRR (USART2BASE+0x08)
#define USART2_CR1 (USART2BASE+0x0C)
//PA2 is USART2_TX alternate function 1
//PA3 is USART2_RX alternate function 1
static int clock_init ( void )
{
unsigned int ra;
//switch to external clock.
ra=GET32(RCC_CR);
ra|=1<<16;
PUT32(RCC_CR,ra);
while(1) if(GET32(RCC_CR)&(1<<17)) break;
ra=GET32(RCC_CFGR);
ra&=~3;
ra|=1;
PUT32(RCC_CFGR,ra);
while(1) if(((GET32(RCC_CFGR)>>2)&3)==1) break;
return(0);
}
int uart2_init ( void )
{
unsigned int ra;
ra=GET32(RCC_AHB1ENR);
ra|=1<<0; //enable port A
PUT32(RCC_AHB1ENR,ra);
ra=GET32(RCC_APB1ENR);
ra|=1<<17; //enable USART2
PUT32(RCC_APB1ENR,ra);
ra=GET32(GPIOA_MODER);
ra&=~(3<<4); //PA2
ra&=~(3<<6); //PA3
ra|=2<<4; //PA2
ra|=2<<6; //PA3
PUT32(GPIOA_MODER,ra);
ra=GET32(GPIOA_AFRL);
ra&=~(0xF<<8); //PA2
ra&=~(0xF<<12); //PA3
ra|=0x7<<8; //PA2
ra|=0x7<<12; //PA3
PUT32(GPIOA_AFRL,ra);
ra=GET32(RCC_APB1RSTR);
ra|=1<<17; //reset USART2
PUT32(RCC_APB1RSTR,ra);
ra&=~(1<<17);
PUT32(RCC_APB1RSTR,ra);
//8000000/(16*115200) = 4.34 4+5/16
PUT32(USART2_BRR,0x45);
PUT32(USART2_CR1,(1<<3)|(1<<2)|(1<<13));
return(0);
}
void uart2_send ( unsigned int x )
{
while(1) if(GET32(USART2_SR)&(1<<7)) break;
PUT32(USART2_DR,x);
}
int notmain ( void )
{
unsigned int rx;
clock_init();
uart2_init();
for(rx=0;;rx++)
{
uart2_send(0x30+(rx&7));
}
return(0);
}
build
arm-linux-gnueabi-as --warn --fatal-warnings -mcpu=cortex-m4 flash.s -o flash.o
arm-linux-gnueabi-gcc -Wall -O2 -ffreestanding -mcpu=cortex-m4 -mthumb -c notmain.c -o notmain.o
arm-linux-gnueabi-ld -nostdlib -nostartfiles -T flash.ld flash.o notmain.o -o notmain.elf
arm-linux-gnueabi-objdump -D notmain.elf > notmain.list
arm-linux-gnueabi-objcopy -O binary notmain.elf notmain.bin
arm-whatever-whatever will work...
check file
Disassembly of section .text:
08000000 <_start>:
8000000: 20001000 andcs r1, r0, r0
8000004: 08000011 stmdaeq r0, {r0, r4}
8000008: 08000017 stmdaeq r0, {r0, r1, r2, r4}
800000c: 08000017 stmdaeq r0, {r0, r1, r2, r4}
08000010 <reset>:
8000010: f000 f86e bl 80000f0 <notmain>
8000014: e7ff b.n 8000016 <hang>
08000016 <hang>:
8000016: e7fe b.n 8000016 <hang>
Vector table looks good, there is half a chance this will boot.
Copy notmain.bin to the virtual drive created when you plug the card in.
It will blast 0123456701234567 forever on the virtual com port created by the debugger end of the board (115200 8N1).
Not that I am using the rx as shown, but you only seemed to set one of the two.
I don't see that you zeroed the moder register bits before setting them.
The math looks wrong on the baud rate register unless you are magically setting the clock elsewhere then running this code after the fact (rather than normal power on/reset).
Same goes for afrl; I didn't look at the register today but any time you are changing bits (not just setting one bit to a one) you need to zero the other bits in the field. In your case 7 might be all the bits so an or equals might work, but check that.
I would recommend you do it in one register write rather than the magic volatile pointer &= then in an separate step |=. Instead x = register; x&=....x|=.... then register=x; The feature doesn't change modes twice, it just changes once. Depends on the feature and the peripheral and how it reacts to writes as to whether or not it is okay to change twice (probably fine in this case, general rule).
If you are doing some other magic for the clock they may also be messing with the uart, good idea to just reset it, in general for a peripheral like this (might have even been in the docs, have not looked in a while). Otherwise you are not at a known state (true for anything in your program if you are pre-running something else) and you can't simply touch a few fields in a few registers you have to touch the whole peripheral.
I don't think the clock init above is required, I simply switched it to use a crystal based clock rather than the on chip RC clock.
Edit
Very sorry, re-reading your question. Left above as is even though it is not what you asked, so that this modification makes it so that the uart is sending on PA9 using UART1_TX.
notmain.c
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
#define RCCBASE 0x40023800
#define RCC_CR (RCCBASE+0x00)
#define RCC_CFGR (RCCBASE+0x08)
//#define RCC_APB1RSTR (RCCBASE+0x20)
#define RCC_APB2RSTR (RCCBASE+0x24)
#define RCC_AHB1ENR (RCCBASE+0x30)
//#define RCC_APB1ENR (RCCBASE+0x40)
#define RCC_APB2ENR (RCCBASE+0x44)
#define GPIOABASE 0x40020000
#define GPIOA_MODER (GPIOABASE+0x00)
//#define GPIOA_AFRL (GPIOABASE+0x20)
#define GPIOA_AFRH (GPIOABASE+0x24)
#define USART1BASE 0x40011000
#define USART1_SR (USART1BASE+0x00)
#define USART1_DR (USART1BASE+0x04)
#define USART1_BRR (USART1BASE+0x08)
#define USART1_CR1 (USART1BASE+0x0C)
//PA9 is USART1_TX alternate function 7
static int clock_init ( void )
{
unsigned int ra;
//switch to external clock.
ra=GET32(RCC_CR);
ra|=1<<16;
PUT32(RCC_CR,ra);
while(1) if(GET32(RCC_CR)&(1<<17)) break;
ra=GET32(RCC_CFGR);
ra&=~3;
ra|=1;
PUT32(RCC_CFGR,ra);
while(1) if(((GET32(RCC_CFGR)>>2)&3)==1) break;
return(0);
}
int uart_init ( void )
{
unsigned int ra;
ra=GET32(RCC_AHB1ENR);
ra|=1<<0; //enable port A
PUT32(RCC_AHB1ENR,ra);
ra=GET32(RCC_APB2ENR);
ra|=1<<4; //enable USART1
PUT32(RCC_APB2ENR,ra);
ra=GET32(GPIOA_MODER);
ra&=~(3<<(9<<1)); //PA9
ra|= 2<<(9<<1) ; //PA9
PUT32(GPIOA_MODER,ra);
ra=GET32(GPIOA_AFRH);
ra&=~(0xF<<4); //PA9
ra|= 0x7<<4; //PA9
PUT32(GPIOA_AFRH,ra);
ra=GET32(RCC_APB2RSTR);
ra|=1<<4; //reset USART1
PUT32(RCC_APB2RSTR,ra);
ra&=~(1<<4);
PUT32(RCC_APB2RSTR,ra);
//8000000/(16*115200) = 4.34 4+5/16
PUT32(USART1_BRR,0x45);
PUT32(USART1_CR1,(1<<3)|(1<<2)|(1<<13));
return(0);
}
void uart_send ( unsigned int x )
{
while(1) if(GET32(USART1_SR)&(1<<7)) break;
PUT32(USART1_DR,x);
}
int notmain ( void )
{
unsigned int rx;
clock_init();
uart_init();
for(rx=0;;rx++)
{
uart_send(0x30+(rx&7));
}
return(0);
}
PA9 is tied to an external header pin, Arduino style data pin, very unlikely they would also use that for usb.
MODER resets to zeros for these pins so an or equal will work.
AFRL and AFRH reset to zero so an or equal will work.
To see the output you need to connect a uart device to PA9, the data does not go through the virtual com port if you want to see UART1 work.
I changed the clock from 16Mhz to 8MHz so for this chip's uart (ST has different peripherals in their library the pick and choose from when they make a chip)
//8000000/(16*115200) = 4.34 4+5/16
PUT32(USART1_BRR,0x45);
If you think about it 8000000/115200 = 69.444 = 0x45. You don't need to do the fraction math separately.
So looking at your code you are doing PB6 which is fine for USART1_TX alternate function 7. It all looks fine except for the BRR and that your delay function may be dead code and optimized out but since you look for the tx empty status bit before adding a character that should allow your code to work.
PB6 is one of the header pins so you can hook a (3.3v) uart up to it and see if your data is coming out. I would recommend if nothing else you simply try 16000000/115200 = 138.8 = 0x8A or 0x8B in the BRR, why not only takes a second.
Otherwise if you have a scope put a probe on there. I recommend instead of the letter K use U which is 0x55 which with 8N1 that comes out as a square wave when you transmit as fast as you can (no gaps between characters) and really easy to measure on a scope. then mess with your BRR register and see how that changes the frequency of the output on a scope.
This uses USART1_TX on PB6, and I removed the crystal clock init so it is using the 16MHz HSI clock.
Fundamentally the difference here is you have a different BRR setting.
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
#define RCCBASE 0x40023800
#define RCC_CR (RCCBASE+0x00)
#define RCC_CFGR (RCCBASE+0x08)
//#define RCC_APB1RSTR (RCCBASE+0x20)
#define RCC_APB2RSTR (RCCBASE+0x24)
#define RCC_AHB1ENR (RCCBASE+0x30)
//#define RCC_APB1ENR (RCCBASE+0x40)
#define RCC_APB2ENR (RCCBASE+0x44)
#define GPIOBBASE 0x40020400
#define GPIOB_MODER (GPIOBBASE+0x00)
#define GPIOB_AFRL (GPIOBBASE+0x20)
#define USART1BASE 0x40011000
#define USART1_SR (USART1BASE+0x00)
#define USART1_DR (USART1BASE+0x04)
#define USART1_BRR (USART1BASE+0x08)
#define USART1_CR1 (USART1BASE+0x0C)
int uart_init ( void )
{
unsigned int ra;
ra=GET32(RCC_AHB1ENR);
ra|=1<<1; //enable port B
PUT32(RCC_AHB1ENR,ra);
ra=GET32(RCC_APB2ENR);
ra|=1<<4; //enable USART1
PUT32(RCC_APB2ENR,ra);
ra=GET32(GPIOB_MODER);
ra&=~(3<<(6<<1)); //PB6
ra|= 2<<(6<<1) ; //PB6
PUT32(GPIOB_MODER,ra);
ra=GET32(GPIOB_AFRL);
ra&=~(0xF<<24); //PB6
ra|= 0x7<<24; //PB6
PUT32(GPIOB_AFRL,ra);
ra=GET32(RCC_APB2RSTR);
ra|=1<<4; //reset USART1
PUT32(RCC_APB2RSTR,ra);
ra&=~(1<<4);
PUT32(RCC_APB2RSTR,ra);
//16000000/115200
PUT32(USART1_BRR,0x8B);
PUT32(USART1_CR1,(1<<3)|(1<<13));
return(0);
}
void uart_send ( unsigned int x )
{
while(1) if(GET32(USART1_SR)&(1<<7)) break;
PUT32(USART1_DR,x);
}
int notmain ( void )
{
unsigned int rx;
uart_init();
for(rx=0;;rx++)
{
uart_send(0x30+(rx&7));
}
return(0);
}
Also note that when blasting at this rate, depending on data patterns it is possible for the receiver to get out of sync such that the characters received are not the ones sent, so you may need to press and hold the reset button on the board, then release and see if the receiver then sees the pattern as desired, perhaps that is why you are blasting K instead of U or something else.
The PB6 pin is two pins above the PA9 pin on the right side of the board D10 instead of D8, note that the male pins to the right are a half step below the female arduino header pins, look at the documentation for the board to find out where to hook your uart up.