Skip to content
C++
Domain Track
Difficulty 4/5

C++ in Embedded & IoT

"C++ for embedded systems: freestanding C++, MISRA-C++, RTOS integration (FreeRTOS, Zephyr), bare-metal patterns, deterministic timing, and zero-overhead abstractions."

C++ on microcontrollers

Modern C++ (C++14/17/20) is fully usable on bare-metal microcontrollers — ARM Cortex-M, RISC-V, AVR, ESP32. The key insight: most of C++ has zero overhead. Templates, constexpr, RAII, and inline functions generate identical machine code to their C equivalents when optimized. The overhead comes from specific features: exceptions, RTTI, dynamic allocation.


Freestanding C++

A freestanding implementation provides only a minimal subset of the standard library — no dynamic memory, no filesystem, no threads, no I/O streams. Most embedded compilers target freestanding mode.

cpp
// Available in freestanding C++
#include <cstdint>      // uint8_t, uint32_t, ...
#include <cstddef>      // size_t, ptrdiff_t, nullptr_t
#include <type_traits>  // all type trait templates
#include <limits>       // std::numeric_limits
#include <bit>          // std::bit_cast, popcount (C++20)
#include <array>        // std::array (C++23 made more of this freestanding)
#include <span>         // std::span (C++23 freestanding)

// NOT available without hosted stdlib:
// #include <vector>        // dynamic memory
// #include <string>        // dynamic memory
// #include <iostream>      // file I/O, streams
// #include <thread>        // OS threads

Disable exceptions and RTTI

cmake
# CMakeLists.txt for embedded
target_compile_options(firmware PRIVATE
    -fno-exceptions     # disables exception handling code
    -fno-rtti           # disables typeid and dynamic_cast
    -fno-threadsafe-statics  # no mutex for static local init
    -Os                 # optimize for size
)

Zero-overhead abstractions for hardware

Register abstraction with constexpr

cpp
// Model a peripheral register with compile-time address
template<std::uintptr_t Address, typename T = uint32_t>
struct Register {
    static volatile T& ref() {
        return *reinterpret_cast<volatile T*>(Address);
    }
    static T read() { return ref(); }
    static void write(T val) { ref() = val; }
    static void set(T mask) { ref() |= mask; }
    static void clear(T mask) { ref() &= ~mask; }
};

// STM32F4 GPIOA ODR (Output Data Register)
using GPIOA_ODR = Register<0x40020014>;

GPIOA_ODR::set(1 << 5);    // set pin PA5
GPIOA_ODR::clear(1 << 5);  // clear pin PA5
// Generates: LDR/ORR/STR — same as hand-written C

GPIO abstraction without virtual dispatch

cpp
template<uint32_t Port, uint8_t Pin>
struct GPIO {
    static constexpr uint32_t mask = 1u << Pin;

    static void set() noexcept {
        *reinterpret_cast<volatile uint32_t*>(Port + 0x14) = mask;
    }
    static void clear() noexcept {
        *reinterpret_cast<volatile uint32_t*>(Port + 0x18) = mask;
    }
    static bool read() noexcept {
        return (*reinterpret_cast<volatile uint32_t*>(Port + 0x10) & mask) != 0;
    }
};

// Usage — all calls inlined, zero overhead
using LED = GPIO<0x40020000, 5>;  // GPIOA Pin 5
LED::set();
LED::clear();

RAII on bare metal

RAII works perfectly without the heap:

cpp
// Interrupt guard — disable interrupts for a critical section
class CriticalSection {
public:
    CriticalSection() noexcept : saved_primask_(__get_PRIMASK()) {
        __disable_irq();
    }
    ~CriticalSection() noexcept {
        if (!saved_primask_) __enable_irq();
    }
    CriticalSection(const CriticalSection&) = delete;
private:
    uint32_t saved_primask_;
};

void update_shared_state() {
    CriticalSection cs;  // interrupts disabled here
    shared_data_.value = compute();
    // cs destructor re-enables interrupts automatically
}  // even if exception (if enabled) or early return

Static memory — no heap required

cpp
// Static buffer instead of std::vector
template<typename T, std::size_t N>
class StaticVector {
    alignas(T) std::byte storage_[sizeof(T) * N];
    std::size_t size_ = 0;
public:
    bool push_back(const T& val) noexcept {
        if (size_ >= N) return false;
        new(storage_ + size_ * sizeof(T)) T{val};
        ++size_;
        return true;
    }
    T& operator[](std::size_t i) noexcept {
        return *std::launder(reinterpret_cast<T*>(storage_ + i * sizeof(T)));
    }
    std::size_t size() const noexcept { return size_; }
    std::size_t capacity() const noexcept { return N; }
};

StaticVector<SensorReading, 64> readings;
readings.push_back({timestamp, value});

RTOS integration

FreeRTOS with C++ tasks

cpp
#include "FreeRTOS.h"
#include "task.h"

// Wrap FreeRTOS task in a C++ class
class SensorTask {
    TaskHandle_t handle_{nullptr};
    StaticTask_t task_buffer_;
    StackType_t  stack_[256];

public:
    void start(UBaseType_t priority) {
        handle_ = xTaskCreateStatic(
            &SensorTask::task_func,
            "Sensor",
            256,
            this,
            priority,
            stack_,
            &task_buffer_
        );
    }

    void run() {
        TickType_t last_wake = xTaskGetTickCount();
        for (;;) {
            vTaskDelayUntil(&last_wake, pdMS_TO_TICKS(10));
            read_sensor();
        }
    }

private:
    static void task_func(void* param) {
        static_cast<SensorTask*>(param)->run();
    }

    void read_sensor() {
        // ADC read, filter, enqueue
    }
};

// Static allocation — no heap needed for the task itself
SensorTask sensor_task;

Thread-safe queue between tasks

cpp
#include "queue.h"

template<typename T, std::size_t N>
class RTOSQueue {
    QueueHandle_t handle_;
    StaticQueue_t queue_def_;
    uint8_t       storage_[N * sizeof(T)];

public:
    RTOSQueue() {
        handle_ = xQueueCreateStatic(N, sizeof(T), storage_, &queue_def_);
    }

    bool send(const T& item, TickType_t timeout = 0) noexcept {
        return xQueueSend(handle_, &item, timeout) == pdTRUE;
    }

    bool receive(T& item, TickType_t timeout = portMAX_DELAY) noexcept {
        return xQueueReceive(handle_, &item, timeout) == pdTRUE;
    }
};

RTOSQueue<SensorReading, 16> sensor_queue;

MISRA-C++ guidelines

MISRA-C++ (2023 edition) is a subset of C++ for safety-critical systems (automotive, aerospace, medical). Key rules:

RuleRationale
No dynamic memory allocation after initHeap fragmentation, non-deterministic timing
No recursionStack depth unpredictable
No exceptionsOverhead, non-determinism
No RTTICode size, overhead
Limited use of templatesComplexity, code bloat
All paths must return a valueUndefined behavior if not
No implicit integral conversionsWidth-dependent bugs
cpp
// MISRA compliant pattern
// Explicit types, no implicit conversion
uint32_t read_adc() noexcept;  // noexcept — no exceptions

// Explicit cast where widening occurs
uint32_t raw = read_adc();
uint64_t accumulated = static_cast<uint64_t>(raw) + prev_sum;

// No VLAs, no alloca — use fixed-size arrays
void process(uint8_t data[32], uint8_t len) noexcept;
// Better with span (C++20 / MISRA 2023):
void process(std::span<const uint8_t, 32> data) noexcept;

Linker script essentials

/* Typical ARM Cortex-M linker script */
MEMORY {
    FLASH  (rx)  : ORIGIN = 0x08000000, LENGTH = 256K
    SRAM   (rwx) : ORIGIN = 0x20000000, LENGTH = 64K
}

SECTIONS {
    .text : { *(.text*) *(.rodata*) } > FLASH
    .data : {
        _data_start = .;
        *(.data*)
        _data_end = .;
    } > SRAM AT > FLASH         /* stored in FLASH, runs from SRAM */
    .bss  : {
        _bss_start = .;
        *(.bss*)
        *(COMMON)
        _bss_end = .;
    } > SRAM
    _stack_top = ORIGIN(SRAM) + LENGTH(SRAM);
}

C++ startup must call global constructors (.init_array) before main():

cpp
// startup.cpp — called before main
extern "C" void __libc_init_array();  // calls global constructors

extern "C" void Reset_Handler() {
    // 1. Copy .data from FLASH to SRAM
    // 2. Zero .bss
    // 3. Call global constructors
    __libc_init_array();
    // 4. Call main
    main();
    // 5. Loop forever (never return from main on embedded)
    for (;;) {}
}

Size optimization

bash
# Optimize for size
-Os                     # size-optimized (similar to -O2 but avoids size-increasing opts)
-Oz                     # (Clang) even more aggressive size reduction

# Strip debug symbols for release
-s                      # strip all symbols
objcopy --strip-debug firmware.elf firmware_stripped.elf

# Check binary size breakdown
size firmware.elf       # text/data/bss breakdown
nm --size-sort firmware.elf | tail -20  # largest symbols

# Link-time optimization — removes dead code across TUs
-flto

Toolchains

TargetToolchain
ARM Cortex-M (STM32, NXP)arm-none-eabi-gcc / arm-none-eabi-g++
RISC-Vriscv32-unknown-elf-g++
AVR (Arduino)avr-g++
ESP32 (Xtensa)ESP-IDF with xtensa-esp32-elf-g++
ESP32-C3 (RISC-V)ESP-IDF with riscv32-esp-elf-g++
Nordic nRF5xarm-none-eabi-g++ + nRF SDK / Zephyr