Skip to content
C++
Domain Deep-Dive
Expert

Bare-Metal C++ Startup & Linker Scripts

"ARM Cortex-M startup sequence in C++: vector tables, Reset_Handler, .data/.bss init, global constructors, and linker script anatomy."

TL;DR

On bare-metal, the C++ runtime (cstartup) initializes before main(): copies .data from flash to RAM, zeroes .bss, and calls global constructors. You must provide a vector table and Reset_Handler. Understand this sequence to debug startup failures and control init order.

Boot sequence (ARM Cortex-M)

cpp
Power on / Reset

CPU loads Stack Pointer from vector_table[0]
CPU loads PC (program counter) from vector_table[1] = Reset_Handler

Reset_Handler:
  1. Set up stack (if needed)
  2. Copy .data from flash to RAM
  3. Zero .bss
  4. Call __libc_init_array() → calls global constructors
  5. Call main()
  6. If main() returns → infinite loop or reset

Vector table

cpp
// startup.cpp
#include <cstdint>

// Declare handlers as weak — user can override
extern "C" {
    void Reset_Handler();
    void NMI_Handler()        __attribute__((weak, alias("Default_Handler")));
    void HardFault_Handler()  __attribute__((weak, alias("Default_Handler")));
    void MemManage_Handler()  __attribute__((weak, alias("Default_Handler")));
    void BusFault_Handler()   __attribute__((weak, alias("Default_Handler")));
    void UsageFault_Handler() __attribute__((weak, alias("Default_Handler")));
    void SVC_Handler()        __attribute__((weak, alias("Default_Handler")));
    void PendSV_Handler()     __attribute__((weak, alias("Default_Handler")));
    void SysTick_Handler()    __attribute__((weak, alias("Default_Handler")));
    // Device-specific IRQs
    void USART1_IRQHandler()  __attribute__((weak, alias("Default_Handler")));
    // ... more IRQs

    void Default_Handler() {
        while (true) {}  // infinite loop — attach debugger to find which fault
    }
}

// Linker provides these symbols
extern uint32_t _estack;   // top of stack (end of RAM)
extern uint32_t _sidata;   // start of .data in flash
extern uint32_t _sdata;    // start of .data in RAM
extern uint32_t _edata;    // end of .data in RAM
extern uint32_t _sbss;     // start of .bss
extern uint32_t _ebss;     // end of .bss

// Vector table — must be at 0x00000000 or remapped via VTOR
__attribute__((section(".isr_vector")))
const uint32_t vector_table[] = {
    (uint32_t)&_estack,          // [0] Initial stack pointer
    (uint32_t)Reset_Handler,     // [1] Reset
    (uint32_t)NMI_Handler,       // [2] NMI
    (uint32_t)HardFault_Handler, // [3] Hard Fault
    (uint32_t)MemManage_Handler, // [4] MemManage
    (uint32_t)BusFault_Handler,  // [5] BusFault
    (uint32_t)UsageFault_Handler,// [6] UsageFault
    0, 0, 0, 0,                  // [7-10] Reserved
    (uint32_t)SVC_Handler,       // [11] SVCall
    0, 0,                        // [12-13] Reserved
    (uint32_t)PendSV_Handler,    // [14] PendSV
    (uint32_t)SysTick_Handler,   // [15] SysTick
    // External interrupts start at [16]
    (uint32_t)USART1_IRQHandler,
    // ...
};

Reset_Handler — the C++ runtime init

cpp
extern "C" void Reset_Handler() {
    // 1. Copy .data from flash to RAM
    uint32_t* src = &_sidata;  // flash source
    uint32_t* dst = &_sdata;   // RAM destination
    while (dst < &_edata)
        *dst++ = *src++;

    // 2. Zero .bss
    dst = &_sbss;
    while (dst < &_ebss)
        *dst++ = 0;

    // 3. Call global constructors (C++ init)
    // __libc_init_array() calls functions in .preinit_array and .init_array
    // These include constructors for global C++ objects
    extern void __libc_init_array();
    __libc_init_array();

    // 4. Call application
    extern int main();
    main();

    // 5. Halt if main returns
    while (true) {}
}

Linker script

/* STM32F4xx example — 1MB flash, 192KB RAM */

MEMORY {
    FLASH (rx)  : ORIGIN = 0x08000000, LENGTH = 1024K
    RAM   (rwx) : ORIGIN = 0x20000000, LENGTH = 128K
    CCMRAM(rwx) : ORIGIN = 0x10000000, LENGTH = 64K   /* Core-Coupled Memory */
}

ENTRY(Reset_Handler)

SECTIONS {
    /* Vector table — must be first in flash */
    .isr_vector : {
        . = ALIGN(4);
        KEEP(*(.isr_vector))
        . = ALIGN(4);
    } > FLASH

    /* Code + read-only data */
    .text : {
        . = ALIGN(4);
        *(.text)
        *(.text*)
        *(.rodata)
        *(.rodata*)

        /* C++ init/fini arrays */
        . = ALIGN(4);
        PROVIDE_HIDDEN(__preinit_array_start = .);
        KEEP(*(.preinit_array*))
        PROVIDE_HIDDEN(__preinit_array_end = .);

        PROVIDE_HIDDEN(__init_array_start = .);
        KEEP(*(SORT(.init_array.*)))
        KEEP(*(.init_array*))
        PROVIDE_HIDDEN(__init_array_end = .);

        PROVIDE_HIDDEN(__fini_array_start = .);
        KEEP(*(SORT(.fini_array.*)))
        KEEP(*(.fini_array*))
        PROVIDE_HIDDEN(__fini_array_end = .);
    } > FLASH

    /* .data init values in flash, loaded to RAM at startup */
    _sidata = LOADADDR(.data);
    .data : {
        . = ALIGN(4);
        _sdata = .;
        *(.data)
        *(.data*)
        . = ALIGN(4);
        _edata = .;
    } > RAM AT > FLASH

    /* Zero-initialized data */
    .bss : {
        . = ALIGN(4);
        _sbss = .;
        *(.bss)
        *(.bss*)
        *(COMMON)
        . = ALIGN(4);
        _ebss = .;
    } > RAM

    /* Stack at top of RAM */
    .stack (NOLOAD) : {
        . = ALIGN(8);
        _sstack = .;
        . = . + 0x2000;  /* 8KB stack */
        . = ALIGN(8);
        _estack = .;
    } > RAM

    /* Discard C++ exception tables (if -fno-exceptions) */
    /DISCARD/ : {
        *(.eh_frame)
        *(.ARM.extab)
        *(.ARM.exidx)
    }
}

Global constructors

Global C++ objects are constructed before main() via .init_array:

cpp
// These are constructed in linker-determined order before main()
static MyClass obj1;          // constructor called pre-main
static std::array<int,10> a; // trivially constructed

// Problem: order of construction across translation units is unspecified
// If obj2 depends on obj1 being initialized first, it may fail

// Solution 1: Construct on first use (lazy init)
MyClass& getObj() {
    static MyClass obj;  // constructed on first call, thread-safe in C++11
    return obj;
}

// Solution 2: Constexpr (no runtime init)
constexpr std::array<int, 4> lookup = {1, 2, 4, 8};  // in .rodata, no constructor

// Solution 3: Trivially-constructible types
struct Config {
    int baud_rate = 115200;
    int timeout_ms = 100;
};
// Config has constexpr-compatible constructor → zero-cost init

Debugging startup failures

cpp
// HardFault_Handler with register dump
extern "C" void HardFault_Handler() {
    // Read fault registers
    volatile uint32_t cfsr = SCB->CFSR;
    volatile uint32_t hfsr = SCB->HFSR;
    volatile uint32_t mmfar = SCB->MMFAR;
    volatile uint32_t bfar = SCB->BFAR;

    // If debugger is attached, break here
    __BKPT(0);

    while (true) {}
}

// Check stack usage
extern uint32_t _estack, _sstack;

void checkStackUsage() {
    // Stack grows down — check for overflow
    uint32_t* p = &_sstack;
    while (p < &_estack && *p == 0xDEADBEEF)
        ++p;
    uint32_t used = (&_estack - p) * sizeof(uint32_t);
    printf("Stack used: %u bytes\n", used);
}

Placing code in CCMRAM

Core-Coupled Memory (CCMRAM) on STM32F4 is zero-wait-state, accessible only by the CPU core — ideal for ISRs and hot paths:

cpp
// Place ISR in CCMRAM for lowest latency
__attribute__((section(".ccmram")))
extern "C" void USART1_IRQHandler() {
    // Single-cycle access from CCMRAM
    rx_queue.push(USART1->DR & 0xFF);
}

Add to linker script:

.ccmram (NOLOAD) : {
    . = ALIGN(4);
    *(.ccmram)
    *(.ccmram*)
    . = ALIGN(4);
} > CCMRAM
Edit on GitHubUpdated 2026-05-01T00:00:00.000Z