diff --git a/.gitignore b/.gitignore index 8fc76fb5..0c1bc2fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ binaries +binaries-stack-sizes target **/.gdb_history ctap-2-1-pre.pdf diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 43f2ff8c..20b0e518 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -166,6 +166,18 @@ build-firmware: - artifacts - binaries +stack-sizes: + image: registry.git.nitrokey.com/nitrokey/nitrokey-3-firmware/nitrokey3:latest + rules: + - if: '$CI_PIPELINE_SOURCE == "push"' + tags: + - docker + stage: build + script: + - make binaries-stack-sizes + - ./utils/stack-sizes/print-stack-sizes.rs < binaries-stack-sizes/firmware-nk3am.elf + - ./utils/stack-sizes/print-stack-sizes.rs < binaries-stack-sizes/firmware-nk3xn.elf + # metrics stage metrics: diff --git a/Cargo.toml b/Cargo.toml index 53ebcc43..cbb33baf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,10 @@ debug = true lto = "thin" inherits = "release" +[profile.release-no-strip] +strip = false +inherits = "release" + [profile.release.package.salty] opt-level = 2 diff --git a/Dockerfile b/Dockerfile index 4f0cb531..6a897ad8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,5 @@ RUN cargo install flip-link cargo-binutils RUN rustup target add thumbv7em-none-eabihf thumbv8m.main-none-eabi RUN rustup component add llvm-tools-preview clippy rustfmt RUN cargo install --git https://github.com/Nitrokey/repometrics --rev 73d8bf0e8834b04182dc0dbb6019d998b4decf81 +RUN rustup +nightly target add thumbv7em-none-eabihf thumbv8m.main-none-eabi WORKDIR /app diff --git a/Makefile b/Makefile index 4ca47c6c..08bcb80f 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,16 @@ binaries: $(MAKE) -C runners/nkpk build FEATURES=provisioner cp runners/nkpk/artifacts/runner-nkpk.bin.ihex binaries/provisioner-nkpk.ihex +.PHONY: binaries-stack-sizes +binaries-stack-sizes: export RUSTFLAGS=-Z emit-stack-sizes +binaries-stack-sizes: export FEATURES=emit-stack-sizes +binaries-stack-sizes: export RUSTUP_TOOLCHAIN=nightly +binaries-stack-sizes: + mkdir -p binaries-stack-sizes + $(MAKE) -C runners/embedded build-all + cp runners/embedded/artifacts/runner-lpc55-nk3xn.elf $@/firmware-nk3xn.elf + cp runners/embedded/artifacts/runner-nrf52-bootloader-nk3am.elf $@/firmware-nk3am.elf + .PHONY: metrics metrics: binaries repometrics generate > metrics.toml diff --git a/runners/embedded/Cargo.toml b/runners/embedded/Cargo.toml index 717bb7bf..9c572cb8 100644 --- a/runners/embedded/Cargo.toml +++ b/runners/embedded/Cargo.toml @@ -99,6 +99,8 @@ log-warnP = [ "log-warn", "log-error" ] log-rtt = ["boards/log-rtt"] log-semihosting = ["boards/log-semihosting"] +emit-stack-sizes = [] + [[bin]] name = "nrf52_runner" path = "src/bin/app-nrf.rs" diff --git a/runners/embedded/Makefile b/runners/embedded/Makefile index e1c092df..c011b106 100644 --- a/runners/embedded/Makefile +++ b/runners/embedded/Makefile @@ -28,7 +28,7 @@ SYMBOLS ?= symbols-$(BUILD_ID).txt OUT_BIN = $(ARTIFACTS)/runner-$(BUILD_ID).bin OUT_ELF = $(ARTIFACTS)/runner-$(BUILD_ID).elf OUT_IHEX = $(OUT_BIN).ihex -CUSTOM_PROFILE=$(shell python3 -c "p = 'release-thin-lto' if '$(BOARD)' == 'nk3am' and 'test' in '$(FEATURES)'.split(',') else 'release'; print(p); " ) +CUSTOM_PROFILE=$(shell python3 -c "features = '$(FEATURES)'.split(','); p = 'release-thin-lto' if '$(BOARD)' == 'nk3am' and 'test' in features else 'release-no-strip' if 'emit-stack-sizes' in features else 'release'; print(p); " ) NO_DELOG_FEATURE=$(shell python3 -c "print('no-delog') if 'no-delog' not in '$(FEATURES)'.split(',') and 'log-semihosting' not in '$(FEATURES)'.split(',') and 'log-rtt' not in '$(FEATURES)'.split(',') else None; ") RAW_OUT = $(CARGO_TARGET_DIR)/$(TARGET)/$(CUSTOM_PROFILE)/$(SOC)_runner @@ -101,7 +101,6 @@ clean-all: ############################################################################### build: build-banner check-var-BOARD check-var-SOC - cargo --version cargo build --target $(TARGET) \ diff --git a/runners/embedded/build.rs b/runners/embedded/build.rs index 5bcf896a..a657c576 100644 --- a/runners/embedded/build.rs +++ b/runners/embedded/build.rs @@ -107,7 +107,15 @@ fn main() -> Result<(), Box> { .find(|p| p.name.as_str() == "cortex-m-rt"); if let Some(p) = pkg_cortex_m_rt { - println!("cargo:rustc-link-arg=-Tcortex-m-rt_{}_link.x", p.version); + let variant = if cfg!(feature = "emit-stack-sizes") { + "_stack_sizes" + } else { + "" + }; + println!( + "cargo:rustc-link-arg=-Tcortex-m-rt_{}_link{}.x", + p.version, variant + ); } Ok(()) diff --git a/runners/ld/cortex-m-rt_0.6.15_link_stack_sizes.x b/runners/ld/cortex-m-rt_0.6.15_link_stack_sizes.x new file mode 100644 index 00000000..cab226e2 --- /dev/null +++ b/runners/ld/cortex-m-rt_0.6.15_link_stack_sizes.x @@ -0,0 +1,239 @@ +/* # Developer notes + +- Symbols that start with a double underscore (__) are considered "private" + +- Symbols that start with a single underscore (_) are considered "semi-public"; they can be + overridden in a user linker script, but should not be referred from user code (e.g. `extern "C" { + static mut __sbss }`). + +- `EXTERN` forces the linker to keep a symbol in the final binary. We use this to make sure a + symbol if not dropped if it appears in or near the front of the linker arguments and "it's not + needed" by any of the preceding objects (linker arguments) + +- `PROVIDE` is used to provide default values that can be overridden by a user linker script + +- On alignment: it's important for correctness that the VMA boundaries of both .bss and .data *and* + the LMA of .data are all 4-byte aligned. These alignments are assumed by the RAM initialization + routine. There's also a second benefit: 4-byte aligned boundaries means that you won't see + "Address (..) is out of bounds" in the disassembly produced by `objdump`. +*/ + +/* Provides information about the memory layout of the device */ +/* This will be provided by the user (see `memory.x`) or by a Board Support Crate */ +/* + Board Support Crates install `memory.x` in their $OUT_DIR and add a path to it + to rustc-link-search, making it impossible for the user to override it with a + custom copy of the file (due to search order of link search paths). + Therefore rename this file to give back control to the user. +*/ +INCLUDE custom_memory.x + +/* # Entry point = reset vector */ +ENTRY(Reset); +EXTERN(__RESET_VECTOR); /* depends on the `Reset` symbol */ + +/* # Exception vectors */ +/* This is effectively weak aliasing at the linker level */ +/* The user can override any of these aliases by defining the corresponding symbol themselves (cf. + the `exception!` macro) */ +EXTERN(__EXCEPTIONS); /* depends on all the these PROVIDED symbols */ + +EXTERN(DefaultHandler); + +PROVIDE(NonMaskableInt = DefaultHandler); +EXTERN(HardFaultTrampoline); +PROVIDE(MemoryManagement = DefaultHandler); +PROVIDE(BusFault = DefaultHandler); +PROVIDE(UsageFault = DefaultHandler); +PROVIDE(SecureFault = DefaultHandler); +PROVIDE(SVCall = DefaultHandler); +PROVIDE(DebugMonitor = DefaultHandler); +PROVIDE(PendSV = DefaultHandler); +PROVIDE(SysTick = DefaultHandler); + +PROVIDE(DefaultHandler = DefaultHandler_); +PROVIDE(HardFault = HardFault_); + +/* # Interrupt vectors */ +EXTERN(__INTERRUPTS); /* `static` variable similar to `__EXCEPTIONS` */ + +/* # Pre-initialization function */ +/* If the user overrides this using the `pre_init!` macro or by creating a `__pre_init` function, + then the function this points to will be called before the RAM is initialized. */ +PROVIDE(__pre_init = DefaultPreInit); + +/* # Sections */ +SECTIONS +{ + PROVIDE(_stack_start = ORIGIN(RAM) + LENGTH(RAM)); + + /* ## Sections in FLASH */ + /* ### Vector table */ + .vector_table ORIGIN(FLASH) : + { + /* Initial Stack Pointer (SP) value */ + LONG(_stack_start); + + /* Reset vector */ + KEEP(*(.vector_table.reset_vector)); /* this is the `__RESET_VECTOR` symbol */ + __reset_vector = .; + + /* Exceptions */ + KEEP(*(.vector_table.exceptions)); /* this is the `__EXCEPTIONS` symbol */ + __eexceptions = .; + + /* Device specific interrupts */ + KEEP(*(.vector_table.interrupts)); /* this is the `__INTERRUPTS` symbol */ + } > FLASH + + PROVIDE(_stext = ADDR(.vector_table) + SIZEOF(.vector_table)); + + /* ### .text */ + .text _stext : + { + /* place these 2 close to each other or the `b` instruction will fail to link */ + *(.PreResetTrampoline); + *(.Reset); + + *(.text .text.*); + *(.HardFaultTrampoline); + *(.HardFault.*); + . = ALIGN(4); + __etext = .; + } > FLASH + + /* ### .rodata */ + .rodata __etext : ALIGN(4) + { + *(.rodata .rodata.*); + + /* 4-byte align the end (VMA) of this section. + This is required by LLD to ensure the LMA of the following .data + section will have the correct alignment. */ + . = ALIGN(4); + __erodata = .; + } > FLASH + + /* ## Sections in RAM */ + /* ### .data */ + .data : ALIGN(4) + { + . = ALIGN(4); + __sdata = .; + *(.data .data.*); + . = ALIGN(4); /* 4-byte align the end (VMA) of this section */ + __edata = .; + } > RAM AT>FLASH + + /* LMA of .data */ + __sidata = LOADADDR(.data); + + /* ### .bss */ + .bss (NOLOAD) : ALIGN(4) + { + . = ALIGN(4); + __sbss = .; + *(.bss .bss.*); + . = ALIGN(4); /* 4-byte align the end (VMA) of this section */ + __ebss = .; + } > RAM + + /* ### .uninit */ + .uninit (NOLOAD) : ALIGN(4) + { + . = ALIGN(4); + *(.uninit .uninit.*); + . = ALIGN(4); + } > RAM + + /* Place the heap right after `.uninit` */ + . = ALIGN(4); + __sheap = .; + + /* ## .got */ + /* Dynamic relocations are unsupported. This section is only used to detect relocatable code in + the input files and raise an error if relocatable code is found */ + .got (NOLOAD) : + { + KEEP(*(.got .got.*)); + } + + /* ## Discarded sections */ + /DISCARD/ : + { + /* Unused exception related info that only wastes space */ + *(.ARM.exidx); + *(.ARM.exidx.*); + *(.ARM.extab.*); + } + + /* `INFO` makes the section not allocatable so it won't be loaded into memory */ + .stack_sizes (INFO) : + { + KEEP(*(.stack_sizes)); + } +} + +/* Do not exceed this mark in the error messages below | */ +/* # Alignment checks */ +ASSERT(ORIGIN(FLASH) % 4 == 0, " +ERROR(cortex-m-rt): the start of the FLASH region must be 4-byte aligned"); + +ASSERT(ORIGIN(RAM) % 4 == 0, " +ERROR(cortex-m-rt): the start of the RAM region must be 4-byte aligned"); + +ASSERT(__sdata % 4 == 0 && __edata % 4 == 0, " +BUG(cortex-m-rt): .data is not 4-byte aligned"); + +ASSERT(__sidata % 4 == 0, " +BUG(cortex-m-rt): the LMA of .data is not 4-byte aligned"); + +ASSERT(__sbss % 4 == 0 && __ebss % 4 == 0, " +BUG(cortex-m-rt): .bss is not 4-byte aligned"); + +ASSERT(__sheap % 4 == 0, " +BUG(cortex-m-rt): start of .heap is not 4-byte aligned"); + +/* # Position checks */ + +/* ## .vector_table */ +ASSERT(__reset_vector == ADDR(.vector_table) + 0x8, " +BUG(cortex-m-rt): the reset vector is missing"); + +ASSERT(__eexceptions == ADDR(.vector_table) + 0x40, " +BUG(cortex-m-rt): the exception vectors are missing"); + +ASSERT(SIZEOF(.vector_table) > 0x40, " +ERROR(cortex-m-rt): The interrupt vectors are missing. +Possible solutions, from most likely to less likely: +- Link to a svd2rust generated device crate +- Disable the 'device' feature of cortex-m-rt to build a generic application (a dependency +may be enabling it) +- Supply the interrupt handlers yourself. Check the documentation for details."); + +/* ## .text */ +ASSERT(ADDR(.vector_table) + SIZEOF(.vector_table) <= _stext, " +ERROR(cortex-m-rt): The .text section can't be placed inside the .vector_table section +Set _stext to an address greater than the end of .vector_table (See output of `nm`)"); + +ASSERT(_stext + SIZEOF(.text) < ORIGIN(FLASH) + LENGTH(FLASH), " +ERROR(cortex-m-rt): The .text section must be placed inside the FLASH memory. +Set _stext to an address smaller than 'ORIGIN(FLASH) + LENGTH(FLASH)'"); + +/* # Other checks */ +ASSERT(SIZEOF(.got) == 0, " +ERROR(cortex-m-rt): .got section detected in the input object files +Dynamic relocations are not supported. If you are linking to C code compiled using +the 'cc' crate then modify your build script to compile the C code _without_ +the -fPIC flag. See the documentation of the `cc::Build.pic` method for details."); +/* Do not exceed this mark in the error messages above | */ + +/* Provides weak aliases (cf. PROVIDED) for device specific interrupt handlers */ +/* This will usually be provided by a device crate generated using svd2rust (see `device.x`) */ +INCLUDE device.x + +ASSERT(SIZEOF(.vector_table) <= 0x400, " +There can't be more than 240 interrupt handlers. This may be a bug in +your device crate, or you may have registered more than 240 interrupt +handlers."); + diff --git a/utils/stack-sizes/print-stack-sizes.rs b/utils/stack-sizes/print-stack-sizes.rs new file mode 100755 index 00000000..095ee563 --- /dev/null +++ b/utils/stack-sizes/print-stack-sizes.rs @@ -0,0 +1,38 @@ +#!/usr/bin/env -S cargo +nightly -Zscript +```cargo +[package] +edition = "2021" +[dependencies] +stack-sizes = "0.5.0" +symbolic = { version = "12.4.1", features = ["demangle"] } +``` + +use std::io::Read; +use std::io; + +use stack_sizes::analyze_executable; +use symbolic::demangle; + +fn main() { + let mut stdin = io::stdin().lock(); + let mut buffer = Vec::new(); + stdin.read_to_end(&mut buffer).unwrap(); + let functions = analyze_executable(&buffer).unwrap(); + let mut sorted: Vec<_> = functions.defined.into_values().collect(); + sorted.sort_by_key(|f| f.stack().unwrap_or(0)); + for f in sorted.into_iter().rev() { + if let Some(stack) = f.stack() { + print!("{:6}", stack); + } else { + print!("None"); + } + print!(" {:6} ", f.size()); + for (i, name) in f.names().into_iter().enumerate() { + if i > 0 { + print!(", "); + } + print!("{}", demangle::demangle(name)); + } + println!(); + } +}