vectors.s
.globl _start
_start:
mov sp,#0x8000
bl notmain
b .
notmain.c
unsigned int x;
unsigned int y=0x12345678;
void notmain ( void )
{
x=y+7;
}
memmap
MEMORY
{
bob : ORIGIN = 0x80000000, LENGTH = 0x1000
ted : ORIGIN = 0x8000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ted
.rodata : { *(.rodata*) } > ted
.bss : { *(.bss*) } > ted
.data : { *(.data*) } > ted
}
build
arm-none-eabi-as --warn --fatal-warnings vectors.s -o vectors.o
arm-none-eabi-gcc -Wall -Werror -O2 -nostdlib -nostartfiles -ffreestanding -c notmain.c -o notmain.o
arm-none-eabi-ld vectors.o notmain.o -T memmap -o notmain.elf
arm-none-eabi-objdump -D notmain.elf > notmain.list
arm-none-eabi-objcopy notmain.elf -O binary kernel.img
you can add/remove options, and name it the right kernelX.img (and if you are venturing into 64 bit then use aarch64-whatever-gcc instead of arm-whatever-gcc...
Looking at the dissassembly
Disassembly of section .text:
00008000 <_start>:
8000: e3a0d902 mov sp, #32768 ; 0x8000
8004: eb000000 bl 800c <notmain>
8008: eafffffe b 8008 <_start+0x8>
0000800c <notmain>:
800c: e59f3010 ldr r3, [pc, #16] ; 8024 <notmain+0x18>
8010: e5933000 ldr r3, [r3]
8014: e59f200c ldr r2, [pc, #12] ; 8028 <notmain+0x1c>
8018: e2833007 add r3, r3, #7
801c: e5823000 str r3, [r2]
8020: e12fff1e bx lr
8024: 00008030 andeq r8, r0, r0, lsr r0
8028: 0000802c andeq r8, r0, r12, lsr #32
Disassembly of section .bss:
0000802c <x>:
802c: 00000000 andeq r0, r0, r0
Disassembly of section .data:
00008030 <y>:
8030: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
and comparing that to the kernelX.img file
hexdump -C kernel.img
00000000 02 d9 a0 e3 00 00 00 eb fe ff ff ea 10 30 9f e5 |.............0..|
00000010 00 30 93 e5 0c 20 9f e5 07 30 83 e2 00 30 82 e5 |.0... ...0...0..|
00000020 1e ff 2f e1 30 80 00 00 2c 80 00 00 00 00 00 00 |../.0...,.......|
00000030 78 56 34 12 |xV4.|
00000034
Note that because I put .data after .bss in the linker script it put them in that order in the image. there are four bytes of zeros after the last word in .text and the 0x12345678 of .data
If you swap the positions of .bss and .data in the linker script
0000802c <y>:
802c: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
Disassembly of section .bss:
00008030 <x>:
8030: 00000000 andeq r0, r0, r0
00000000 02 d9 a0 e3 00 00 00 eb fe ff ff ea 10 30 9f e5 |.............0..|
00000010 00 30 93 e5 0c 20 9f e5 07 30 83 e2 00 30 82 e5 |.0... ...0...0..|
00000020 1e ff 2f e1 2c 80 00 00 30 80 00 00 78 56 34 12 |../.,...0...xV4.|
00000030
Ooops, no freebie. Now .bss is not zeroed and you would need to zero it in your bootstrap (if you have a .bss area and as a programming style you assume those items are zero when you first use them).
Okay so how do you find where .bss is? well that is what the tutorial and countless others are showing you.
.globl _start
_start:
mov sp,#0x8000
bl notmain
b .
linker_stuff:
.word hello_world
.word world_hello
MEMORY
{
bob : ORIGIN = 0x80000000, LENGTH = 0x1000
ted : ORIGIN = 0x8000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ted
.rodata : { *(.rodata*) } > ted
.data : { *(.data*) } > ted
hello_world = .;
.bss : { *(.bss*) } > ted
world_hello = .;
}
build and disassemble
Disassembly of section .text:
00008000 <_start>:
8000: e3a0d902 mov sp, #32768 ; 0x8000
8004: eb000002 bl 8014 <notmain>
8008: eafffffe b 8008 <_start+0x8>
0000800c <linker_stuff>:
800c: 00008038 andeq r8, r0, r8, lsr r0
8010: 0000803c andeq r8, r0, r12, lsr r0
00008014 <notmain>:
8014: e59f3010 ldr r3, [pc, #16] ; 802c <notmain+0x18>
8018: e5933000 ldr r3, [r3]
801c: e59f200c ldr r2, [pc, #12] ; 8030 <notmain+0x1c>
8020: e2833007 add r3, r3, #7
8024: e5823000 str r3, [r2]
8028: e12fff1e bx lr
802c: 00008034 andeq r8, r0, r4, lsr r0
8030: 00008038 andeq r8, r0, r8, lsr r0
Disassembly of section .data:
00008034 <y>:
8034: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
Disassembly of section .bss:
00008038 <x>:
8038: 00000000 andeq r0, r0, r0
so digging more into toolchain specific stuff we can now know either the start and end of .bss or can use math in the linker script to get size and length. From which you can write a small loop that zeros that memory (in assembly language of course, chicken and egg, in the bootstrap before you branch to the C entry point of your program).
Now say for some reason you wanted .data at some other address 0x10000000
.globl _start
_start:
mov sp,#0x8000
bl notmain
b .
MEMORY
{
bob : ORIGIN = 0x10000000, LENGTH = 0x1000
ted : ORIGIN = 0x8000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ted
.rodata : { *(.rodata*) } > ted
.bss : { *(.bss*) } > ted
.data : { *(.data*) } > bob
}
00008000 <_start>:
8000: e3a0d902 mov sp, #32768 ; 0x8000
8004: eb000000 bl 800c <notmain>
8008: eafffffe b 8008 <_start+0x8>
0000800c <notmain>:
800c: e59f3010 ldr r3, [pc, #16] ; 8024 <notmain+0x18>
8010: e5933000 ldr r3, [r3]
8014: e59f200c ldr r2, [pc, #12] ; 8028 <notmain+0x1c>
8018: e2833007 add r3, r3, #7
801c: e5823000 str r3, [r2]
8020: e12fff1e bx lr
8024: 10000000 andne r0, r0, r0
8028: 0000802c andeq r8, r0, r12, lsr #32
Disassembly of section .bss:
0000802c <x>:
802c: 00000000 andeq r0, r0, r0
Disassembly of section .data:
10000000 <y>:
10000000: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
so what is the kernel.img or -O binary format? it is just a memory image starting at the lowest address (0x8000 in this case) and filled OR PADDED to the highest address, in this case 0x10000003, so it is a 0x10000004-0x8000 byte file.
00000000 02 d9 a0 e3 00 00 00 eb fe ff ff ea 10 30 9f e5 |.............0..|
00000010 00 30 93 e5 0c 20 9f e5 07 30 83 e2 00 30 82 e5 |.0... ...0...0..|
00000020 1e ff 2f e1 00 00 00 10 2c 80 00 00 00 00 00 00 |../.....,.......|
00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
0fff8000 78 56 34 12 |xV4.|
0fff8004
That is a massive waste of disk space for this program, they padded the hell out of that. Now if for some reason you wanted to do something like this, various reasons (that generally do not apply to bare metal on the pi), you could do this instead:
MEMORY
{
bob : ORIGIN = 0x10000000, LENGTH = 0x1000
ted : ORIGIN = 0x8000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ted
.rodata : { *(.rodata*) } > ted
.bss : { *(.bss*) } > ted
.data : { *(.data*) } > bob AT > ted
}
00000000 02 d9 a0 e3 00 00 00 eb fe ff ff ea 10 30 9f e5 |.............0..|
00000010 00 30 93 e5 0c 20 9f e5 07 30 83 e2 00 30 82 e5 |.0... ...0...0..|
00000020 1e ff 2f e1 00 00 00 10 2c 80 00 00 00 00 00 00 |../.....,.......|
00000030 78 56 34 12 |xV4.|
00000034
Disassembly of section .bss:
0000802c <x>:
802c: 00000000 andeq r0, r0, r0
Disassembly of section .data:
10000000 <y>:
10000000: 12345678 eorsne r5, r4, #120, 12 ; 0x7800000
what it has done is the code is compiled and linked for .data at 0x10000000 but the binary that you carry around and load has the .data data bundled up tight, it is the job of the bootstrap to copy that data to its correct landing spot of 0x10000000 and again you have to use toolchain specific linker scripty stuff
.globl _start
_start:
mov sp,#0x8000
bl notmain
b .
linker_stuff:
.word data_start
.word data_end
MEMORY
{
bob : ORIGIN = 0x10000000, LENGTH = 0x1000
ted : ORIGIN = 0x8000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ted
.rodata : { *(.rodata*) } > ted
.bss : { *(.bss*) } > ted
data_start = .;
.data : { *(.data*) } > bob AT > ted
data_end = .;
}
0000800c <linker_stuff>:
800c: 00008038 andeq r8, r0, r8, lsr r0
8010: 10000004 andne r0, r0, r4
and clearly that didnt quite work so you have to do more linker scripy stuff to figure it out.
there is no good reason to need any of this for the raspberry pi, at best if you have .bss and dont have any .data and/or you put .bss last if you have a lot of it, then you can either take advantage of the toolchain accidentally zero padding and solving the .bss problem for you or if that is too big of a binary then you can see above how to find the .bss offset and size then add the few lines of code to zero it (ultimately costing load time either way, but not costing sd card space).
where you definitely need to learn such things is for when you are on a microcontroller where the non-volatile is treated as read-only flash, if you choose to program with a style that requires .data and/or .bss and you assume those items are implemented then you have to do the toolchain specific work to link then zero and/or copy from non-volatile flash to read/write ram before branching into the first or only C entry point of your application.
I am sure someone could come up with reasons to not pack a pi bare metal binary up nice and neat, there is always an exception...but for now you dont need to worry about those exceptions, put .bss first then .data and always make sure you have a .data item even if it is something you never use.