From d48c1d692546287411ceaee6273d68483142820a Mon Sep 17 00:00:00 2001 From: Lawrence Esswood Date: Thu, 11 Jul 2024 18:46:41 +0000 Subject: [PATCH] Squashed changes All the changes for 64-bit and CHERI. Change-Id: I2bff6c51a8675739feaca971b5d4c20430b4a13c --- AppMakefile.mk | 15 ++ Configuration.mk | 176 +++++++++++++++--- examples/revoke_test/Makefile | 13 ++ examples/revoke_test/main.c | 111 ++++++++++++ examples/vun/Makefile | 11 ++ examples/vun/main.c | 30 +++ libtock/console.c | 16 +- libtock/crt0.c | 333 +++++++++++++++++++++++++++------- libtock/revoke.c | 155 ++++++++++++++++ libtock/revoke.h | 35 ++++ libtock/sys.c | 84 ++++++++- libtock/tock.c | 235 ++++++++++++++---------- libtock/tock.h | 47 ++++- userland_generic.ld | 215 +++++++++++++--------- 14 files changed, 1188 insertions(+), 288 deletions(-) create mode 100644 examples/revoke_test/Makefile create mode 100644 examples/revoke_test/main.c create mode 100644 examples/vun/Makefile create mode 100644 examples/vun/main.c create mode 100644 libtock/revoke.c create mode 100644 libtock/revoke.h diff --git a/AppMakefile.mk b/AppMakefile.mk index dd3361c5..4eaef800 100644 --- a/AppMakefile.mk +++ b/AppMakefile.mk @@ -274,7 +274,22 @@ $(foreach platform, $(TOCK_TARGETS), $(eval $(call BUILD_RULES,$(call ARCH_FN,$( $(BUILDDIR)/$(PACKAGE_NAME).tab: $(foreach platform, $(TOCK_TARGETS), $(BUILDDIR)/$(call ARCH_FN,$(platform))/$(call OUTPUT_NAME_FN,$(platform)).elf) $(Q)$(ELF2TAB) $(ELF2TAB_ARGS) -o $@ $^ +# +# These don't really belong here, but I wanted to provide an easy way to try +# running the examples on cheri QEMU. + +THIS_FILE := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) +TOCK_DIR ?= $(THIS_FILE)../tock + +# Run a hybrid app +.PHONY: run_hybrid +run_hybrid : $(BUILDDIR)/$(PACKAGE_NAME).tab + APP_BIN=$(abspath $(BUILDDIR)/rv64imacxcheri/rv64imacxcheri.tbf) make -C $(TOCK_DIR)/boards/qemu_cheri_virt run_app +# Run a purecap app +.PHONY: run_pure +run_pure : $(BUILDDIR)/$(PACKAGE_NAME).tab + APP_BIN=$(abspath $(BUILDDIR)/rv64imacxcheripure/rv64imacxcheripure.tbf) make -C $(TOCK_DIR)/boards/qemu_cheri_virt run_app # Rules for building apps .PHONY: all diff --git a/Configuration.mk b/Configuration.mk index d9777af6..1cc5da78 100644 --- a/Configuration.mk +++ b/Configuration.mk @@ -11,6 +11,7 @@ MAKEFLAGS += -r MAKEFLAGS += -R # Toolchain programs +ifeq ($(CHERI),) AR := -ar AS := -as CXX := -g++ @@ -18,6 +19,20 @@ OBJDUMP := -objdump RANLIB := -ranlib READELF := -readelf SIZE := -size +else +# AppMakefile.mk tries to append these to the "TOOLCHAIN_x" parameter. +# Sadly, the compiler for the CHERI toolchain is not llvm-clang, it is just clang, +# so we need the names without a hyphen unlike above. +AR := ar +AS := as +CXX := clang++ +OBJDUMP := objdump +RANLIB := ranlib +READELF := readelf +SIZE := size +STRIP := strip +OBJCOPY := objcopy +endif # Set default region sizes STACK_SIZE ?= 2048 @@ -65,6 +80,12 @@ OPENTITAN_TOCK_TARGETS := rv32imc|rv32imc.0x20030080.0x10005000|0x20030080|0x100 ARTY_E21_TOCK_TARGETS := rv32imac|rv32imac.0x40430060.0x80004000|0x40430060|0x80004000\ rv32imac|rv32imac.0x40440060.0x80007000|0x40440060|0x80007000 +CHERI_TARGETS := rv64imacxcheri\ + rv64imacxcheripure + +ifneq ($(CHERI),) +TOCK_TARGETS ?= $(CHERI_TARGETS) +else # Include the RISC-V targets. # rv32imac|rv32imac.0x20040060.0x80002800 # RISC-V for HiFive1b # rv32imac|rv32imac.0x403B0060.0x3FCC0000 # RISC-V for ESP32-C3 @@ -81,26 +102,28 @@ TOCK_TARGETS ?= cortex-m0\ $(OPENTITAN_TOCK_TARGETS) \ $(ARTY_E21_TOCK_TARGETS) endif +endif # Generate TOCK_ARCHS, the set of architectures listed in TOCK_TARGETS TOCK_ARCHS := $(sort $(foreach target, $(TOCK_TARGETS), $(firstword $(subst |, ,$(target))))) # Check if elf2tab exists, if not, install it using cargo. ELF2TAB ?= elf2tab -ELF2TAB_REQUIRED_VERSION := 0.7.0 +ELF2TAB_REQUIRED_VERSION := 0.10.2 ELF2TAB_EXISTS := $(shell $(SHELL) -c "command -v $(ELF2TAB)") ELF2TAB_VERSION := $(shell $(SHELL) -c "$(ELF2TAB) --version | cut -d ' ' -f 2") # Check elf2tab version UPGRADE_ELF2TAB := $(shell $(SHELL) -c "printf '%s\n%s\n' '$(ELF2TAB_REQUIRED_VERSION)' '$(ELF2TAB_VERSION)' | sort --check=quiet --version-sort || echo yes") -ifeq ($(UPGRADE_ELF2TAB),yes) - $(info Trying to update elf2tab to >= $(ELF2TAB_REQUIRED_VERSION)) +# Ensure that we install exactly the required version +ifneq ($(ELF2TAB_REQUIRED_VERSION),$(ELF2TAB_VERSION)) + $(info Trying to update elf2tab to $(ELF2TAB_REQUIRED_VERSION)) ELF2TAB_EXISTS = endif ifndef ELF2TAB_EXISTS - $(shell cargo install elf2tab) + $(shell cargo install -f --version $(ELF2TAB_REQUIRED_VERSION) elf2tab) # Check elf2tab version after install ELF2TAB_VERSION := $(shell $(SHELL) -c "$(ELF2TAB) --version | cut -d ' ' -f 2") UPGRADE_ELF2TAB := $(shell $(SHELL) -c "printf '%s\n%s\n' '$(ELF2TAB_REQUIRED_VERSION)' '$(ELF2TAB_VERSION)' | sort --check=quiet --version-sort || echo yes") @@ -111,6 +134,8 @@ endif ELF2TAB_ARGS += -n $(PACKAGE_NAME) ELF2TAB_ARGS += --stack $(STACK_SIZE) --app-heap $(APP_HEAP_SIZE) --kernel-heap $(KERNEL_HEAP_SIZE) --kernel-major $(KERNEL_MAJOR_VERSION) --kernel-minor $(KERNEL_MINOR_VERSION) +# This helps keep the program nice and aligned. Otherwise the header ends up 76 bytes. +ELF2TAB_ARGS += --protected-region-size 96 # Setup the correct toolchain for each architecture. TOOLCHAIN_cortex-m0 := arm-none-eabi @@ -118,27 +143,41 @@ TOOLCHAIN_cortex-m3 := arm-none-eabi TOOLCHAIN_cortex-m4 := arm-none-eabi TOOLCHAIN_cortex-m7 := arm-none-eabi +THIS_FILE := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) +CHERI_SDK ?= $(abspath ${THIS_FILE}/../cheri/output/sdk) + # RISC-V toolchains, irrespective of their name-tuple, can compile for # essentially any target. Thus, try a few known names and choose the one for # which a compiler is found. -ifneq (,$(shell which riscv64-none-elf-gcc 2>/dev/null)) - TOOLCHAIN_rv32i := riscv64-none-elf + +ifneq (,$(shell which $(CHERI_SDK)/bin/clang)) + TOOLCHAIN_x := $(CHERI_SDK)/bin/ +else ifneq (,$(shell which ~/cheri/output/sdk/bin/clang)) + TOOLCHAIN_x := ~/cheri/output/sdk/bin/ +else ifneq (,$(shell which riscv64-none-elf-gcc 2>/dev/null)) + TOOLCHAIN_x := riscv64-none-elf else ifneq (,$(shell which riscv32-none-elf-gcc 2>/dev/null)) - TOOLCHAIN_rv32i := riscv32-none-elf + TOOLCHAIN_x := riscv32-none-elf else ifneq (,$(shell which riscv64-elf-gcc 2>/dev/null)) - TOOLCHAIN_rv32i := riscv64-elf + TOOLCHAIN_x := riscv64-elf else ifneq (,$(shell which riscv64-unknown-elf-clang 2>/dev/null)) - TOOLCHAIN_rv32i := riscv64-unknown-elf + TOOLCHAIN_x := riscv64-unknown-elf else ifneq (,$(shell which riscv32-unknown-elf-clang 2>/dev/null)) - TOOLCHAIN_rv32i := riscv32-unknown-elf + TOOLCHAIN_x := riscv32-unknown-elf else # Fallback option. We don't particularly want to throw an error (even if # RISCV=1 is set) as this configuration makefile can be useful without a # proper toolchain. - TOOLCHAIN_rv32i := riscv64-unknown-elf + TOOLCHAIN_x := riscv64-unknown-elf endif -TOOLCHAIN_rv32imac := $(TOOLCHAIN_rv32i) -TOOLCHAIN_rv32imc := $(TOOLCHAIN_rv32i) + +TOOLCHAIN_rv32i := $(TOOLCHAIN_x) +TOOLCHAIN_rv32imac := $(TOOLCHAIN_x) +TOOLCHAIN_rv32imc := $(TOOLCHAIN_x) +TOOLCHAIN_rv32imacxcheri := $(TOOLCHAIN_x) +TOOLCHAIN_rv64imac := $(TOOLCHAIN_x) +TOOLCHAIN_rv64imacxcheri := $(TOOLCHAIN_x) +TOOLCHAIN_rv64imacxcheripure := $(TOOLCHAIN_x) # Setup the correct compiler. For cortex-m we only support GCC as it is the only # toolchain with the PIC support we need for Tock userspace apps. @@ -153,14 +192,24 @@ CC_cortex-m7 := $(CC_cortex-m) # default to that. ifeq ($(CLANG),) # Default to GCC - CC_rv32 := -gcc + CC_X := -gcc else # If `CLANG=1` on command line, use -clang - CC_rv32 := -clang + # With the toolchain built for the CHERI, the names are different + ifeq ($(CHERI),) + CC_X := -clang + else + CC_X := clang + endif endif -CC_rv32i := $(CC_rv32) -CC_rv32imc := $(CC_rv32) -CC_rv32imac := $(CC_rv32) +CC_rv32 := $(CC_X) +CC_rv32i := $(CC_X) +CC_rv32imc := $(CC_X) +CC_rv32imac := $(CC_X) +CC_rv32imacxcheri :=$(CC_X) +CC_rv64imac := $(CC_X) +CC_rv64imacxcheri := $(CC_X) +CC_rv64imacxcheripure := $(CC_X) # Flags for building app Assembly, C, C++ files # n.b. make convention is that CPPFLAGS are shared for C and C++ sources @@ -173,13 +222,17 @@ override CPPFLAGS += \ -Os\ -fdata-sections -ffunction-sections\ -fstack-usage\ + -fno-rtti\ + -fno-exceptions\ -Wall\ -Wextra + override WLFLAGS += \ -Wl,--warn-common\ -Wl,--gc-sections\ -Wl,--build-id=none + # Various flags for a specific toolchain. Different compilers may have different # supported features. For GCC we warn if the compiler estimates the stack usage # will be greater than the allocated stack size. @@ -208,6 +261,7 @@ endif # fully relocatable. Therefore, just including these flags is not sufficient to # build a full PIC app for Tock. So, we split these out, and only include them # for architectures where we have full PIC support. + override CPPFLAGS_PIC += \ -Wl,--emit-relocs\ -fPIC @@ -220,7 +274,9 @@ override CFLAGS_rv32imc += $(CFLAGS_rv32) override CFLAGS_rv32imac += $(CFLAGS_rv32) override CPPFLAGS_rv32 += \ - $(CPPFLAGS_toolchain_rv32) + $(CPPFLAGS_toolchain_rv32)\ + -fPIE\ + --target=riscv32-none-elf\ # Add different flags for different architectures override CPPFLAGS_rv32i += $(CPPFLAGS_rv32) \ @@ -228,32 +284,85 @@ override CPPFLAGS_rv32i += $(CPPFLAGS_rv32) \ -mabi=ilp32\ -mcmodel=medlow -override WLFLAGS_rv32i += \ - -Wl,--no-relax # Prevent use of global_pointer for riscv +WLFLAGS_x = -Wl,--no-relax # Prevent use of global_pointer for riscv + +ifneq ($(CHERI),) + WLFLAGS_x += -Wl,-z,norelro,--no-rosegment,-pie,-zrel,-znow,-znotext,--no-dynamic-linker +endif + +override WLFLAGS_rv32i += $(WLFLAGS_x) override CPPFLAGS_rv32imc += $(CPPFLAGS_rv32) \ -march=rv32imc\ -mabi=ilp32\ -mcmodel=medlow -override WLFLAGS_rv32imc += \ - -Wl,--no-relax # Prevent use of global_pointer for riscv +override WLFLAGS_rv32imc += $(WLFLAGS_x) override CPPFLAGS_rv32imac += $(CPPFLAGS_rv32) \ -march=rv32imac\ -mabi=ilp32\ -mcmodel=medlow -override WLFLAGS_rv32imac += \ - -Wl,--no-relax # Prevent use of global_pointer for riscv + +override WLFLAGS_rv32imac += $(WLFLAGS_x) + +override CPPFLAGS_rv32imacxcheri += $(CPPFLAGS_rv32) \ + -march=rv32imacxcheri\ + -mabi=ilp32\ + -mcmodel=medlow + +override WLFLAGS_rv32imacxcheri += $(WLFLAGS_x) + +override CPPFLAGS_rv64imac += $(CPPFLAGS_rv32) \ + --target=riscv64-none-elf\ + -march=rv64imac\ + -mabi=lp64\ + -mcmodel=medany + +override WLFLAGS_rv64imac += $(WLFLAGS_x) + +override CPPFLAGS_rv64imacxcheri += $(CPPFLAGS_rv32) \ + --target=riscv64-none-elf\ + -march=rv64imacxcheri\ + -mabi=lp64\ + -mcmodel=medany + +override WLFLAGS_rv64imacxcheri += $(WLFLAGS_x) + +override CPPFLAGS_rv64imacxcheripure += $(CPPFLAGS_rv32) \ + --target=riscv64-none-elf\ + -march=rv64imacxcheri\ + -mabi=l64pc128\ + -mcmodel=medany + +override WLFLAGS_rv64imacxcheripure += $(WLFLAGS_x) + +ifneq ($(CHERI),) +# On CHERI, I want to provide my own libc +# By default, we should look within the CHERI_SDK +LIBC_ROOT ?= ${CHERI_SDK}/baremetal + +override LINK_LIBS_cheri += -lc -lm -lgcc + +override LINK_LIBS_rv32 += $(LINK_LIBS_cheri) + +override CPPFLAGS_rv32imac += --sysroot=${LIBC_ROOT}/baremetal-newlib-riscv32/riscv32-unknown-elf + +override CPPFLAGS_rv32imacxcheri += --sysroot=${LIBC_ROOT}/baremetal-newlib-riscv32-hybrid/riscv32-unknown-elf + +override CPPFLAGS_rv64imac += --sysroot=${LIBC_ROOT}/baremetal-newlib-riscv64/riscv64-unknown-elf + +override CPPFLAGS_rv64imacxcheri += --sysroot=${LIBC_ROOT}/baremetal-newlib-riscv64-hybrid/riscv64-unknown-elf + +override CPPFLAGS_rv64imacxcheripure += --sysroot=${LIBC_ROOT}/baremetal-newlib-riscv64-purecap/riscv64-unknown-elf + +else +# Otherwise use the provided one override LINK_LIBS_rv32 += \ -lgcc -lstdc++ -lsupc++ -override LINK_LIBS_rv32i += $(LINK_LIBS_rv32) -override LINK_LIBS_rv32imc += $(LINK_LIBS_rv32) -override LINK_LIBS_rv32imac += $(LINK_LIBS_rv32) - override LEGACY_LIBS_rv32i += \ $(TOCK_USERLAND_BASE_DIR)/newlib/rv32/rv32i/libc.a\ $(TOCK_USERLAND_BASE_DIR)/newlib/rv32/rv32i/libm.a @@ -267,6 +376,15 @@ override LEGACY_LIBS_rv32imc += $(LEGACY_LIBS_rv32im) override LEGACY_LIBS_rv32imac += \ $(TOCK_USERLAND_BASE_DIR)/newlib/rv32/rv32imac/libc.a\ $(TOCK_USERLAND_BASE_DIR)/newlib/rv32/rv32imac/libm.a +endif + +override LINK_LIBS_rv32i += $(LINK_LIBS_rv32) +override LINK_LIBS_rv32imc += $(LINK_LIBS_rv32) +override LINK_LIBS_rv32imac += $(LINK_LIBS_rv32) +override LINK_LIBS_rv64imac += $(LINK_LIBS_rv32) +override LINK_LIBS_rv32imacxcheri += $(LINK_LIBS_rv32) +override LINK_LIBS_rv64imacxcheri += $(LINK_LIBS_rv32) +override LINK_LIBS_rv64imacxcheripure += $(LINK_LIBS_rv32) override CFLAGS_cortex-m += \ $(CFLAGS_toolchain_cortex-m) diff --git a/examples/revoke_test/Makefile b/examples/revoke_test/Makefile new file mode 100644 index 00000000..a61bab58 --- /dev/null +++ b/examples/revoke_test/Makefile @@ -0,0 +1,13 @@ +# Makefile for user application + +# Specify this directory relative to the current application. +TOCK_USERLAND_BASE_DIR = ../.. + +# Which files to compile. +C_SRCS := $(wildcard *.c) + +STACK_SIZE = 8192 + +# Include userland makefile. Contains rules and flags for actually +# building the application. +include $(TOCK_USERLAND_BASE_DIR)/AppMakefile.mk diff --git a/examples/revoke_test/main.c b/examples/revoke_test/main.c new file mode 100644 index 00000000..f14aec11 --- /dev/null +++ b/examples/revoke_test/main.c @@ -0,0 +1,111 @@ +#include "console.h" +#include "revoke.h" +#include "tock.h" +#include +#include +#include + +#ifndef __CHERI_PURE_CAPABILITY__ + +int main(void) { + printf("Error: running revoke test on non-cheri"); +} + +#else + +char some_memory_area[777]; + +// Each element of the map is BITMAP_T_BITS. Each bit covers GRANULE_SIZE. +// So, each element of the map covers (BITMAP_T_BITS * GRANULE_SIZE). +// We need to some_memory_area, so we divide and add one for the edge effect. +#define MAP_ELEMENTS \ + (sizeof(some_memory_area) / (BITMAP_T_BITS * GRANULE_SIZE)) + 1 + +bitmap_t map_memory[MAP_ELEMENTS]; + +volatile epoch_t current_epoch; + +int main(void) { + printf("Hello from revoke!\n"); + + printf("Setting map:\n"); + + // Create a map the shadows some_memory_area + int ret = revoke_register(map_memory, MAP_ELEMENTS, (size_t)some_memory_area, ¤t_epoch); + assert(ret == 0); + + // Two caps + char* volatile cap1 = cheri_bounds_set(&some_memory_area[10], 1); + char* volatile cap2 = cheri_bounds_set(&some_memory_area[300], 1); + + // Both should be tagged + assert(cheri_tag_get(cap1) == 1); + assert(cheri_tag_get(cap2) == 1); + + // Set some range to revoke + ret = set_revoke_range((size_t)&some_memory_area[300], (size_t)&some_memory_area[301], 1); + assert(ret == 0); + + for (size_t j = 0; j != MAP_ELEMENTS; j++) { + printf("map from userspace: %zx\n", map_memory[j]); + } + + // Make a sweep happen + revoke_wait_for_next_epoch(); + + printf("Checking\n"); + + // Now one revoked, but not the other + assert(cheri_tag_get(cap1) == 1); + assert(cheri_tag_get(cap2) == 0); + + // Test with allowing + // Console chosen arbitrarily as it has two allow slots we can try + // stash caps in. + char* to_allow1 = cheri_bounds_set(&some_memory_area[400], 100); + char* to_allow2 = cheri_bounds_set(&some_memory_area[500], 100); + +#define STR1 "hello revoke1" +#define STR2 "hello revoke2" + memcpy(to_allow1, STR1, sizeof(STR1)); + memcpy(to_allow2, STR2, sizeof(STR2)); + + allow_ro_return_t result = allow_readonly(DRIVER_NUM_CONSOLE, + 0, + to_allow1, + 14); + + ret = tock_allow_ro_return_to_returncode(result); + assert(ret == 0); + + result = allow_readonly(DRIVER_NUM_CONSOLE, + 1, + to_allow2, + 14); + + ret = tock_allow_ro_return_to_returncode(result); + assert(ret == 0); + + ret = set_revoke_range((size_t)&some_memory_area[400], (size_t)&some_memory_area[414], 1); + assert(ret == 0); + + revoke_wait_for_next_epoch(); + + // This should read back the allows + result = allow_readonly(DRIVER_NUM_CONSOLE, + 0, + NULL, + 0); + assert(result.ptr == NULL); + result = allow_readonly(DRIVER_NUM_CONSOLE, + 1, + NULL, + 0); + assert(result.ptr != NULL); + + printf("Revocation test success\n"); + + return 0; +} + +#endif // __CHERI_PURE_CAPABILITY__ \ No newline at end of file diff --git a/examples/vun/Makefile b/examples/vun/Makefile new file mode 100644 index 00000000..7506d936 --- /dev/null +++ b/examples/vun/Makefile @@ -0,0 +1,11 @@ +# Makefile for user application + +# Specify this directory relative to the current application. +TOCK_USERLAND_BASE_DIR = ../.. + +# Which files to compile. +C_SRCS := $(wildcard *.c) + +# Include userland makefile. Contains rules and flags for actually +# building the application. +include $(TOCK_USERLAND_BASE_DIR)/AppMakefile.mk diff --git a/examples/vun/main.c b/examples/vun/main.c new file mode 100644 index 00000000..cef5b216 --- /dev/null +++ b/examples/vun/main.c @@ -0,0 +1,30 @@ +/* vim: set sw=2 expandtab tw=80: */ + +#include +#include +#include +#include + +#include + +static void nop( + int a __attribute__((unused)), + int b __attribute__((unused)), + int c __attribute__((unused)), + void* d __attribute__((unused))) {} + +int main(void) { + + // Some data it is very important not to leak + char very_secret_key[] = {"Very secret indeed"}; + // Otherwise the compiler thinks its oh so smart and optimises this away + __asm("" ::"r" ((size_t)very_secret_key) : "memory"); + + // Declare some message + char msg[] = "Hello world!"; + size_t msg_len = sizeof(msg) * 8; // Lengths are in bits, right? + + // Print the message + putnstr_async(msg, msg_len, nop, NULL); + tock_exit(0); +} diff --git a/libtock/console.c b/libtock/console.c index 84c11d9c..bbcb8581 100644 --- a/libtock/console.c +++ b/libtock/console.c @@ -5,7 +5,7 @@ #include "console.h" typedef struct putstr_data { - char* buf; + const char* buf; int len; bool called; struct putstr_data* next; @@ -37,17 +37,21 @@ static void putstr_upcall(int _x __attribute__ ((unused)), int putnstr(const char *str, size_t len) { int ret = RETURNCODE_SUCCESS; - putstr_data_t* data = (putstr_data_t*)malloc(sizeof(putstr_data_t)); - if (data == NULL) return RETURNCODE_ENOMEM; + putstr_data_t local; + + putstr_data_t* data = (putstr_data_t*)&local; data->len = len; data->called = false; - data->buf = (char*)malloc(len * sizeof(char)); + // Note: if the user modifies their string in an upcall, they deserve garbage + // on their console. + data->buf = str; + if (data->buf == NULL) { ret = RETURNCODE_ENOMEM; goto putnstr_fail_buf_alloc; } - strncpy(data->buf, str, len); + data->next = NULL; if (putstr_tail == NULL) { @@ -64,9 +68,7 @@ int putnstr(const char *str, size_t len) { yield_for(&data->called); putnstr_fail_async: - free(data->buf); putnstr_fail_buf_alloc: - free(data); return ret; } diff --git a/libtock/crt0.c b/libtock/crt0.c index b06a38c8..ab823c14 100644 --- a/libtock/crt0.c +++ b/libtock/crt0.c @@ -12,6 +12,15 @@ #error Fixed STACK_SIZE. #endif +#ifdef __CHERI_PURE_CAPABILITY__ +// Setting this will ensure caprelocs take into account the fact that the process has been relocated +#define CHERI_INIT_GLOBALS_USE_OFFSET +#include "cheri_init_globals.h" +#endif + +// The program has been loaded contiguously and does not need data relocating +#define CONTIGUOUS 1 + extern int main(void); // Allow _start to go undeclared @@ -22,34 +31,21 @@ extern int main(void); // text segment. It represents sizes and offsets from the text segment of // sections that need some sort of loading and/or relocation. struct hdr { - // 0: Offset of GOT symbols in flash from the start of the application - // binary. - uint32_t got_sym_start; - // 4: Offset of where the GOT section needs to be placed in memory from the - // start of the application's memory region. - uint32_t got_start; - // 8: Size of GOT section. - uint32_t got_size; - // 12: Offset of data symbols in flash from the start of the application - // binary. - uint32_t data_sym_start; - // 16: Offset of where the data section needs to be placed in memory from the - // start of the application's memory region. - uint32_t data_start; - // 20: Size of data section. - uint32_t data_size; - // 24: Offset of where the BSS section needs to be placed in memory from the - // start of the application's memory region. + uint32_t stack_location; + uint32_t stack_size; uint32_t bss_start; - // 28: Size of BSS section. uint32_t bss_size; - // 32: First address offset after program flash, where elf2tab places - // .rel.data section - uint32_t reldata_start; - // 36: The size of the stack requested by this application. - uint32_t stack_size; + uint32_t rel_start; + uint32_t rel_size; }; +#define OFF_STACK_LOC "0x00" +#define OFF_STACK_SZ "0x04" +#define OFF_BSS_START "0x08" +#define OFF_BSS_SIZE "0x0C" +#define OFF_REL_START "0x10" +#define OFF_REL_SIZE "0x14" + // The structure of the relative data section. This structure comes from the // compiler. struct reldata { @@ -79,15 +75,19 @@ void _start(void* app_start __attribute__((unused)), // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf __asm__ volatile ( - // Compute the stack top + // entry: + // r0 = hdr + // r1 = mem_start + // r2 = + // r3 = initial_brk + // Compute the stack top. // - // struct hdr* myhdr = (struct hdr*)app_start; - // uint32_t stacktop = (((uint32_t)mem_start + myhdr->stack_size + 7) & 0xfffffff8); - "ldr r4, [r0, #36]\n" // r4 = myhdr->stack_size - "add r4, #7\n" // r4 = myhdr->stack_size + 7 - "add r4, r4, r1\n" // r4 = mem_start + myhdr->stack_size + 7 - "movs r5, #7\n" - "bic r4, r4, r5\n" // r4 = (mem_start + myhdr->stack_size + 7) & ~0x7 + // struct hdr* myhdr = (struct hdr*) app_start; + // uint32_t stacktop = mem_start + myhdr->stack_size + myhdr->stack_location + "ldr r4, [r0, " OFF_STACK_SZ "]\n" // r4 = myhdr->stack_size + "ldr r5, [r0, " OFF_STACK_LOC "]\n" // r5 = myhdr->stack_location + "add r5, r5, r1\n" + "add r4, r4, r5\n" // r4 = stacktop // // Compute the app data size and where initial app brk should go. // This includes the GOT, data, and BSS sections. However, we can't be sure @@ -97,15 +97,31 @@ void _start(void* app_start __attribute__((unused)), // to the end of the BSS section. // // uint32_t app_brk = mem_start + myhdr->bss_start + myhdr->bss_size; - "ldr r5, [r0, #24]\n" // r6 = myhdr->bss_start - "ldr r6, [r0, #28]\n" // r6 = myhdr->bss_size - "add r5, r5, r6\n" // r5 = bss_start + bss_size - "add r5, r5, r1\n" // r5 = mem_start + bss_start + bss_size = app_brk + "ldr r5, [r0, " OFF_BSS_START "]\n" // r5 = myhdr->bss_start + "ldr r6, [r0, " OFF_BSS_SIZE "]\n" // v3 = myhdr->bss_size + "add r5, r5, r1\n" // r5 = bss_start + mem_start + "add r5, r5, r6\n" // r5 = mem_start + bss_start + bss_size = app_brk // // Move registers we need to keep over to callee-saved locations "movs r6, r0\n" // r6 = app_start "movs r7, r1\n" // r7 = mem_start - // + + "mov r1, r5\n" // r1 = app_brk +#if CONTIGUOUS + // For the contiguous load, we overlap BSS and relocations so they can + // be reclaimed. If our relocations are large, we need to move the app + // break to be past them + + "ldr r0, [r6, " OFF_REL_START "]\n" + "ldr r2, [r6, " OFF_REL_SIZE "]\n" + "add r0, r0, r7\n" // r0 = reloc_start + "add r2, r2, r0\n" // r2 = reloc_end + + "cmp r2, r1 \n" + "it ge \n" + "movge r1, r2\n" // r1 = reloc_end >= app_brk ? reloc_end : app_brk + +#else // Now we may want to move the stack pointer. If the kernel set the // `app_heap_break` larger than we need (and we are going to call `brk()` // to reduce it) then our stack pointer will fit and we can move it now. @@ -119,12 +135,14 @@ void _start(void* app_start __attribute__((unused)), "mov sp, r4\n" // Update the stack pointer. // "skip_set_sp:\n" // Back to regularly scheduled programming. +#endif + // // Call `brk` to set to requested memory // - // memop(0, app_brk); + // memop(0, max(app_brk, reloc_end)); "movs r0, #0\n" - "movs r1, r5\n" + // r1 setup earlier "svc 5\n" // memop // // Setup initial stack pointer for normal execution. If we did this before @@ -145,6 +163,48 @@ void _start(void* app_start __attribute__((unused)), "movs r0, #11\n" "movs r1, r5\n" "svc 5\n" // memop + +#if CONTIGUOUS + // Process relocations. These have all been put in one segment for us and should + // be either Elf64_Rel or Elf32_Rel. Don't process these in C, they overlap the stack + + "ldr r0, [r6, " OFF_REL_START "]\n" + "ldr r1, [r6, " OFF_REL_SIZE "]\n" + "add r0, r0, r7\n" // r0 = reloc_start + "add r1, r1, r0\n" // r1 = reloc_end + + "mov r2, #0x17\n" // r2 = R_ARM_RELATIVE. + "b loop_footer\n" + + "reloc_loop:\n" + "ldr r3, [r0, %[ARCH_BYTES]]\n" // r3 = info + "ldr r4, [r0, 0]\n" // r4 = offset + + "cmp r3, r2\n" // check relocation of right type + "bne panic\n" + + "add r4, r4, r7\n" // r4 = relocation location + "ldr r3, [r4]\n" // r3 = addend + "add r3, r3, r7\n" // r3 = addend + mem_start + "str r3, [r4]\n" // store new value + + "add r0, r0, %[RELOC_SZ]\n" // increment reloc_start + + "loop_footer:\n" + "cmp r0, r1 \n" + "bne reloc_loop\n" + + // And if the break was set too high (e.g. reloc_end > app_brk), + // move it back + "cmp r1, r5\n" + "ble skip_second_brk\n" + // memop(0, app_brk); + "mov r0, #0\n" + "mov r1, r5\n" + "svc 5\n" // memop + "skip_second_brk:\n" + +#else // !CONTIGUOUS // // Set the special PIC register r9. This has to be set to the address of the // beginning of the GOT section. The PIC code uses this as a reference point @@ -152,29 +212,46 @@ void _start(void* app_start __attribute__((unused)), "ldr r0, [r6, #4]\n" // r0 = myhdr->got_start "add r0, r0, r7\n" // r0 = myhdr->got_start + mem_start "mov r9, r0\n" // r9 = r0 +#endif // // Call into the rest of startup. // This should never return, if it does, trigger a breakpoint (which will // promote to a HardFault in the absence of a debugger) "movs r0, r6\n" // first arg is app_start "movs r1, r7\n" // second arg is mem_start +#if !CONTIGUOUS "bl _c_start_pic\n" +#else + "bl _c_start_noflash\n" +#endif + "panic: \n" "bkpt #255\n" + : + : [ARCH_BYTES] "n" (sizeof(size_t)), + [RELOC_SZ] "n" (sizeof(size_t) * 2) ); #elif defined(__riscv) +#ifdef __CHERI_PURE_CAPABILITY__ +#define PRFX "c" +#define ZREG "cnull" +#else +#define PRFX "" +#define ZREG "zero" +#endif + __asm__ volatile ( // Compute the stack top. // // struct hdr* myhdr = (struct hdr*) app_start; - // uint32_t stacktop = (((uint32_t) mem_start + myhdr->stack_size + 7) & 0xfffffff8); - "lw t0, 36(a0)\n" // t0 = myhdr->stack_size - "addi t0, t0, 7\n" // t0 = myhdr->stack_size + 7 - "add t0, t0, a1\n" // t0 = mem_start + myhdr->stack_size + 7 - "li t1, 7\n" // t1 = 7 - "not t1, t1\n" // t1 = ~0x7 - "and t0, t0, t1\n" // t0 = (mem_start + myhdr->stack_size + 7) & ~0x7 + // uint32_t stacktop = mem_start + myhdr->stack_size + myhdr->stack_location + + PRFX "lw t0, " OFF_STACK_SZ "("PRFX "a0)\n" // t0 = myhdr->stack_size + PRFX "lw t1, " OFF_STACK_LOC "("PRFX "a0)\n" // t1 = myhdr->stack_location + "add t0, t0, a1\n" + "add t0, t0, t1\n" + // // Compute the app data size and where initial app brk should go. // This includes the GOT, data, and BSS sections. However, we can't be sure @@ -184,8 +261,8 @@ void _start(void* app_start __attribute__((unused)), // to the end of the BSS section. // // uint32_t app_brk = mem_start + myhdr->bss_start + myhdr->bss_size; - "lw t1, 24(a0)\n" // t1 = myhdr->bss_start - "lw t2, 28(a0)\n" // t2 = myhdr->bss_size + PRFX "lw t1, " OFF_BSS_START "("PRFX "a0)\n" // t1 = myhdr->bss_start + PRFX "lw t2, " OFF_BSS_SIZE "("PRFX "a0)\n" // t2 = myhdr->bss_size "add t1, t1, t2\n" // t1 = bss_start + bss_size "add t1, t1, a1\n" // t1 = mem_start + bss_start + bss_size = app_brk // @@ -193,6 +270,27 @@ void _start(void* app_start __attribute__((unused)), "mv s0, a0\n" // s0 = void* app_start "mv s1, t0\n" // s1 = stack_top "mv s2, a1\n" // s2 = mem_start + + // + // Setup initial stack pointer for normal execution + "mv sp, s1\n" // sp = stacktop + + // We have overlapped the our BSS/HEAP with our relocations. If our + // relocations are larger, then we need to move the break to include + // relocations. Once we have processed relocations, we will move them + // back. + + PRFX "lw a0, " OFF_REL_START "("PRFX "s0)\n" + PRFX "lw a1, " OFF_REL_SIZE "(" PRFX "s0)\n" + "add a0, a0, s2 // a0 = reloc_start\n" + "add s3, a0, a1 // a1 = reloc_end\n" + + "bgt s3, t1, relocs_larger_than_bss\n" + "mv s3, t1\n" + "relocs_larger_than_bss:\n" + + // s3 is now the larger of the two + // // Now we may want to move the stack pointer. If the kernel set the // `app_heap_break` larger than we need (and we are going to call `brk()` @@ -200,24 +298,20 @@ void _start(void* app_start __attribute__((unused)), // Otherwise after the first syscall (the memop to set the brk), the return // will use a stack that is outside of the process accessible memory. // - "bgt t1, a3, skip_set_sp\n" // Compare `app_heap_break` with new brk. - // If our current `app_heap_break` is larger - // then we need to move the stack pointer - // before we call the `brk` syscall. - "mv sp, t0\n" // Update the stack pointer - - "skip_set_sp:\n" // Back to regularly scheduled programming. + "ble s3, a3, skip_brk\n" // Compare `app_heap_break` with new brk. + // Skip setting if we don't need // Call `brk` to set to requested memory - // memop(0, stacktop + appdata_size); + // memop(0, max(end_of_bss,end_of_relocs)); "li a4, 5\n" // a4 = 5 // memop syscall "li a0, 0\n" // a0 = 0 - "mv a1, t1\n" // a1 = app_brk + "mv a1, s3\n" // a1 = app_brk "ecall\n" // memop - - // - // Setup initial stack pointer for normal execution - "mv sp, s1\n" // sp = stacktop +#if __has_feature(capabilities) + // On CHERI, brk returns a capability to authorise the new break + "cspecialw ddc, ca1\n" +#endif + "skip_brk:\n" // // Debug support, tell the kernel the stack location @@ -236,11 +330,106 @@ void _start(void* app_start __attribute__((unused)), "mv a1, t1\n" // a1 = app_brk "ecall\n" // memop + // Process relocations. These have all been put in one segment for us and should + // be either Elf64_Rel or Elf32_Rel. Don't process these in C, they overlap the stack + + ".set ARCH_BYTES, %[ARCH_BYTES]\n" + + /* Store word on 32-bit, or double word on 64-bit */ + ".macro sx val, offset, base\n" + ".if ARCH_BYTES == 4\n" + PRFX "sw \\val, \\offset("PRFX "\\base)\n" + ".else\n" + PRFX "sd \\val, \\offset("PRFX "\\base)\n" + ".endif\n" + ".endmacro\n" + + /* Load word on 32-bit, or double word on 64-bit */ + ".macro lx val, offset, base\n" + ".if ARCH_BYTES == 4\n" + PRFX "lw \\val, \\offset("PRFX "\\base)\n" + ".else\n" + PRFX "ld \\val, \\offset("PRFX "\\base)\n" + ".endif\n" + ".endmacro\n" + + ".set r_offset, 0\n" + ".set r_info, ARCH_BYTES\n" + ".set ent_size, (ARCH_BYTES*2)\n" + + PRFX "lw a0, " OFF_REL_START "("PRFX "s0)\n" + PRFX "lw a1, " OFF_REL_SIZE "(" PRFX "s0)\n" + "add a0, a0, s2 // a0 = reloc_start\n" + "add a1, a0, a1 // a1 = reloc_end\n" + + "li t0, 3 // t0 = R_RISCV_RELATIVE. The only relocation\n" + "// we should see.\n" + "beq a0, a1, skip_loop\n" + "reloc_loop:\n" + // Relocations are relative to a symbol, the table for which we have stripped. + // However, all the remaining relocations should use the special "0" symbol, + // and encode the values required in the addend. + "lx a2, r_info, a0 // a2 = info\n" + "lx a3, r_offset, a0 // a3 = offset\n" + "bne a2, t0, panic // Only processing this relocation.\n" + "add a3, a3, s2 // a3 = offset + reloc_offset\n" + "lx a4, 0, a3 // a4 = addend\n" + "add a4, a4, s2 // a4 = addend + reloc_offset\n" + "// Store new value\n" + "sx a4, 0, a3\n" + "skip_relocate:\n" + "add a0, a0, ent_size\n" + "loop_footer:\n" + "bne a0, a1, reloc_loop\n" + "skip_loop:\n" + + // Now relocations have been processed. If we moved our break too much, move it back. + // t1 still has the end of bss. a1 has the end of the relocs. + "bgt t1, a1, skip_second_brk\n" + "li a4, 5\n" // a4 = 5 // memop syscall + "li a0, 0\n" // a0 = 0 + "mv a1, t1\n" // a1 = app_brk + "ecall\n" // memop + "skip_second_brk:\n" + // Call into the rest of startup. This should never return. "mv a0, s0\n" // first arg is app_start "mv s0, sp\n" // Set the frame pointer to sp. "mv a1, s2\n" // second arg is mem_start - "jal _c_start_nopic\n" + +#ifdef __CHERI_PURE_CAPABILITY__ + // By convention we are starting in non-cap mode and this startup code was run with integers. Change into cap mode: + // auipcc is actually auipc because we are not in cap mode + "1: auipcc ct0, %%pcrel_hi(cap_mode_tramp) \n" + "cincoffset ct0, ct0, %%pcrel_lo(1b) \n" + "cspecialr ct1, pcc \n" + "csetaddr ct1, ct1, t0 \n" + "li t0, 1 \n" + "csetflags ct1, ct1, t0 \n" + "jr.cap ct1 \n" + "cap_mode_tramp: \n" + // Now we are in cap-mode instructions will have the encoding we expect + // Rederive app_start/mem_start/sp from ddc + "cspecialr ct0, ddc \n" + "csetaddr ca0, ct0, a0 \n" // app_start + "csetaddr ca1, ct0, a1 \n" // mem_start + "csetaddr csp, ct0, sp \n" // sp + // Also bounds SP: + "clw t0, " OFF_STACK_SZ "(ca0) \n" + "neg t1, t0\n" + "cincoffset csp, csp, t1 \n" + "csetbounds csp, csp, t0 \n" + "cincoffset csp, csp, t0 \n" +#endif + + // Call into the rest of startup. This should never return. + PRFX "jal _c_start_noflash \n" + + "panic:\n" + PRFX "lw t0, 0(" ZREG ")\n" + : + : [align] "n" (sizeof(void*) - 1), + [ARCH_BYTES] "n" (sizeof(size_t)) ); #else @@ -248,6 +437,9 @@ void _start(void* app_start __attribute__((unused)), #endif } + +#if !CONTIGUOUS + // C startup routine that configures memory for the process. This also handles // PIC fixups that are required for the application. // @@ -333,17 +525,21 @@ void _c_start_pic(uint32_t app_start, uint32_t mem_start) { } } +#endif + // C startup routine for apps compiled with fixed addresses (i.e. no PIC). // // Arguments: // - `app_start`: The address of where the app binary starts in flash. This does // not include the TBF header or any padding before the app. +// on CHERI hybrid, app_start may not be covered by DDC so is an explicit cap. // - `mem_start`: The starting address of the memory region assigned to this // app. __attribute__((noreturn)) -void _c_start_nopic(uint32_t app_start, uint32_t mem_start) { +void _c_start_noflash(uintptr_t app_start, uintptr_t mem_start) { struct hdr* myhdr = (struct hdr*)app_start; +#if !CONTIGUOUS // Copy over the Global Offset Table (GOT). The GOT seems to still get created // and used in some cases, even though nothing is being relocated and the // addresses are static. So, all we need to do is copy the GOT entries from @@ -358,12 +554,19 @@ void _c_start_nopic(uint32_t app_start, uint32_t mem_start) { void* data_start = (void*)(myhdr->data_start + mem_start); void* data_sym_start = (void*)(myhdr->data_sym_start + app_start); memcpy(data_start, data_sym_start, myhdr->data_size); +#endif - // Zero BSS segment. Again, we know where this should be in the process RAM - // based on the crt0 header. + // We always do the clear because we may have used BSS for init char* bss_start = (char*)(myhdr->bss_start + mem_start); memset(bss_start, 0, myhdr->bss_size); +#ifdef __CHERI_PURE_CAPABILITY__ + cheri_init_globals(); + // We no longer need the default capability: + __asm(" cmove ct0, cnull \n" + " cspecialw ddc, ct0 \n" ::: "ct0"); +#endif + main(); while (1) { yield(); diff --git a/libtock/revoke.c b/libtock/revoke.c new file mode 100644 index 00000000..c18382e6 --- /dev/null +++ b/libtock/revoke.c @@ -0,0 +1,155 @@ +#include "revoke.h" + +// Globally registered map +bitmap_t* global_map; +size_t global_map_elements; +// Shadows a space starting at +size_t global_at_base; + +volatile epoch_t* global_epoch_ctr; + +/* Register for revocation. Map should be a bitmap covering GRANULE_SIZE granules + * starting at base. Epoch_ctr will be incremented on each sweep. */ +int revoke_register(bitmap_t* map, size_t map_elements, size_t base, volatile epoch_t* epoch_ctr) { + // Pre-align base so the kernel will accept it + base = base & ~((1 << (GRANULE_POW_2 + 3)) - 1); + + // Allow the map + allow_ro_return_t result = allow_readonly(CHERI_DRIVER_NUM, + 0, + (void*)map, + map_elements * sizeof(bitmap_t)); + int ret = tock_allow_ro_return_to_returncode(result); + if (ret < 0) + return ret; + + // Allow the ctr + allow_rw_return_t result_rw = allow_readwrite(CHERI_DRIVER_NUM, + 0, + (void*)epoch_ctr, + sizeof(epoch_t)); + ret = tock_allow_rw_return_to_returncode(result_rw); + if (ret < 0) + return ret; + + // Then register it + syscall_return_t res = command(CHERI_DRIVER_NUM, COMMAND_NUM_SET_BASE, base, 0); + ret = tock_command_return_novalue_to_returncode(res); + + if (ret < 0) + return ret; + + // Set global values (we get the epoch ctr in individual wait calls) + global_map = map; + global_at_base = base; + global_map_elements = map_elements; + global_epoch_ctr = epoch_ctr; + return ret; +} + +static inline void on_epoch(__unused size_t r1, __unused size_t r2, __unused size_t r3, + __unused void* data) { + // r1 is new epoch, but we can also just read that from shared memory, + // which may be more up to date if several epochs pass. + // revoke_wait_for_next_epoch is waiting for the epoch to be incremented. + // This is done for us by the kernel so this function is empty. + // We still need the callback as otherwise we may never wake up from yield(). +} + +/* Wait for (any non-zero number) of epochs to elapse */ +int revoke_wait_for_next_epoch(void) { + int ret; + + volatile epoch_t* epoch_ctr = global_epoch_ctr; + + if (!epoch_ctr) { + return -1; + } + + uint32_t epoch_now = *epoch_ctr; + + // Register for upcalls for epoch changing + subscribe_return_t result = subscribe(CHERI_DRIVER_NUM, + 0, + (subscribe_upcall*)&on_epoch, + NULL); + ret = tock_subscribe_return_to_returncode(result); + + if (ret < 0) + return ret; + + // Issue request that another sweep happens + syscall_return_t res = command(CHERI_DRIVER_NUM, COMMAND_NUM_DO_SWEEP, 0, 0); + ret = tock_command_return_novalue_to_returncode(res); + + if (ret < 0) + return ret; + + // Yield waiting for the epoch to change + while (*epoch_ctr == epoch_now) { + yield(); + } + + // Unregister + result = subscribe(CHERI_DRIVER_NUM, + 0, + NULL, + NULL); + + return tock_subscribe_return_to_returncode(result); +} + +/* Paint the revocation bitmap from address [base, top) */ +int set_revoke_range(size_t base, size_t top, int should_revoke) { + + // First offset, align, and shift range. + // This will give two numbers that are indices into the _bits_ of the map, + // not the bytes. + + size_t align_base = (size_t)(base - global_at_base) >> GRANULE_POW_2; + // Also make top inclusive by subtracting one extra + size_t align_top = (top - global_at_base + GRANULE_MASK - 1) >> GRANULE_POW_2; + + // Bounds check + if (align_base >= align_top || align_top >= (global_map_elements * BITMAP_T_BITS)) { + return -1; + } + + // Mask for first byte. 1's in higher bits. + bitmap_t mask_first = BITMAP_T_ONES << (align_base & (BITMAP_T_BITS - 1)); + // Mask for last byte. 1's in lower bits. + bitmap_t mask_last = BITMAP_T_ONES >> ((BITMAP_T_BITS - 1) - (align_top & (BITMAP_T_BITS - 1))); + + bitmap_t* word_ptr = global_map + (align_base >> BITMAP_T_BITS_LOG_2); + bitmap_t* last_word_ptr = global_map + (align_top >> BITMAP_T_BITS_LOG_2); + + bitmap_t set_mask; + if (should_revoke) { + set_mask = BITMAP_T_ONES; + } else { + set_mask = 0; + } + + // Mask for the first word (first iteration) + bitmap_t select_mask = mask_first; + + // loop though bits setting bits to set_mask, masking which to set by + // select_mask. + while (word_ptr <= last_word_ptr) { + if (word_ptr == last_word_ptr) { + // Mask for the last word (last iteration + select_mask &= mask_last; + } + + bitmap_t inv_mask = ~select_mask; + + bitmap_t word = *word_ptr; + word = (word & inv_mask) | (set_mask & select_mask); + *word_ptr = word; + + word_ptr++; + select_mask = BITMAP_T_ONES; // mask for most iterations all ones + } + + return 0; +} diff --git a/libtock/revoke.h b/libtock/revoke.h new file mode 100644 index 00000000..7369b616 --- /dev/null +++ b/libtock/revoke.h @@ -0,0 +1,35 @@ +#ifndef LIBTOCK_REVOKE_H_ +#define LIBTOCK_REVOKE_H_ + +#include "tock.h" + +typedef uint32_t epoch_t; + +// The type used the bitmap (optimised for fast set/clear). The kernel will +// accept byte aligned, but we insist on greater alignment + +typedef volatile size_t bitmap_t; // We could use uintptr_t if we had a good way to mask them +#define BITMAP_T_BITS (sizeof(bitmap_t) * 8) +#define BITMAP_T_BITS_LOG_2 __builtin_ctz(BITMAP_T_BITS) +#define BITMAP_T_ONES ((bitmap_t)(~0)) + +// Revocation granule +#define GRANULE_POW_2 4 +#define GRANULE_SIZE (1 << GRANULE_POW_2) +#define GRANULE_MASK (GRANULE_SIZE - 1) + +#define CHERI_DRIVER_NUM 0x10003 +#define COMMAND_NUM_SET_BASE 1 +#define COMMAND_NUM_DO_SWEEP 2 + +/* Register for revocation. Map should a bitmap covering GRANULE_SIZE granules + * starting at base. Epoch_ctr will be incremented on each sweep. */ +int revoke_register(bitmap_t* map, size_t map_elements, size_t base, volatile epoch_t* epoch_ctr); + +/* Wait for (any non-zero number) of epochs to elapse */ +int revoke_wait_for_next_epoch(void); + +/* Paint the revocation bitmap from address [base, top) */ +int set_revoke_range(size_t base, size_t top, int should_revoke); + +#endif //LIBTOCK_REVOKE_H_ \ No newline at end of file diff --git a/libtock/sys.c b/libtock/sys.c index a6641dd7..3a27f31f 100644 --- a/libtock/sys.c +++ b/libtock/sys.c @@ -62,9 +62,11 @@ int _read(int fd, void *buf, uint32_t count) { return 0; // k_read(fd, (uint8_t*) buf, count); } + +__attribute__ ((noreturn)); void _exit(int __status) { - while (666) {} + tock_exit(__status); } int _getpid(void) { @@ -75,13 +77,91 @@ int _kill(pid_t pid, int sig) return -1; } +__attribute__((alias("_read"))) +int read(int fd, void *buf, uint32_t count); +__attribute__((alias("_close"))) +int close(int fd); +__attribute__((alias("_fstat"))) +int fstat(int fd, struct stat *st); +__attribute__((alias("_isatty"))) +int isatty(int fd); +__attribute__((alias("_lseek"))) +int lseek(int fd, uint32_t offset, int whence); +__attribute__((alias("_write"))) +int write(int fd, const void *buf, uint32_t count); +__attribute__((alias("_lseek"))) +int lseek64(int fd, const void *buf, uint32_t count); + +/* + mallocr.c from newlib is not careful enough with using the pointer with the + right provenance to create new allocations. It makes two calls to sbrk one + to allocate an amount it desires and then a second to align to 0x1000. + However, it will use the capability from the first to allocate space in the + region authorised by the second. This hack preempts this behavior by always + requesting 0x1000 aligned chunks and returning capabilities that authorise + access to the next boundary. + This is done in preference to fixing newlib as we will likely use a different + libc soon. We can remove this once this change has happened. + */ +#ifdef __CHERI_PURE_CAPABILITY__ +#define NEWLIB_MALLOC_HACK 1 +#define NEWLIB_HACK_MASK ((size_t)0x1000 - (size_t)1) +#endif + caddr_t _sbrk(int incr) { +#ifdef NEWLIB_MALLOC_HACK + // Last break is where the effective break is, and will always authorise + // up to the end of the next 0x1000 boundary. If it falls on such a boundary, + // it authorises no more bytes. + static caddr_t last_break = NULL; + if (last_break == NULL) { + // First call: find the current break... + size_t current_break_addr = (size_t)memop(1, 0).data; + // .. and align it up to a page + last_break = (caddr_t)__builtin_cheri_address_set( + memop(1, (-current_break_addr) & NEWLIB_HACK_MASK).data, + current_break_addr); + } + // If the incr fits within the same page as the last break, we can just + // return it: + if (incr <= (ssize_t)((-(size_t)last_break) & NEWLIB_HACK_MASK)) { + caddr_t result = last_break; + last_break += incr; + return result; + } + // Otherwise, round up incr to ensure 0x1000 alignent + int original_incr = incr; + incr = (incr + NEWLIB_HACK_MASK) & ~NEWLIB_HACK_MASK; +#endif + memop_return_t ret; ret = memop(1, incr); if (ret.status != TOCK_STATUSCODE_SUCCESS) { errno = ENOMEM; return (caddr_t) -1; } - return (caddr_t) ret.data; + +#if __has_feature(capabilities) +#ifndef __CHERI_PURE_CAPABILITY__ + // In CHERI hybrid, we need to set DDC to authorise any accesses to the new region + __asm("cspecialw ddc, %[new_ddc] " :: [new_ddc] "C" (ret.data) : "memory"); + // For CHERI purecap, caddr_t will be a capability and authorise the new + // region. As long as it gets provenance correct, everything should just + // work. +#endif +#endif + +#ifdef NEWLIB_MALLOC_HACK + // Result is new authorising capability but with the address of the last break + caddr_t result = (caddr_t)__builtin_cheri_address_set(ret.data, (size_t)last_break); + // And store last_break + last_break = result + original_incr; + return result; +#endif + + return ccast(caddr_t, ret.data); } + +__attribute__((alias("_sbrk"))) +caddr_t sbrk(int incr); diff --git a/libtock/tock.c b/libtock/tock.c index 04787bf7..b24dfd8b 100644 --- a/libtock/tock.c +++ b/libtock/tock.c @@ -8,9 +8,9 @@ typedef struct { subscribe_upcall *cb; - int arg0; - int arg1; - int arg2; + size_t arg0; + size_t arg1; + size_t arg2; void* ud; } tock_task_t; @@ -19,7 +19,7 @@ static tock_task_t task_queue[TASK_QUEUE_SIZE]; static int task_cur = 0; static int task_last = 0; -int tock_enqueue(subscribe_upcall cb, int arg0, int arg1, int arg2, void* ud) { +int tock_enqueue(subscribe_upcall cb, size_t arg0, size_t arg1, size_t arg2, void* ud) { int next_task_last = (task_last + 1) % TASK_QUEUE_SIZE; if (next_task_last == task_cur) { return -1; @@ -55,10 +55,10 @@ int tock_command_return_novalue_to_returncode(syscall_return_t command_return) { int tock_command_return_u32_to_returncode(syscall_return_t command_return, uint32_t* val) { if (command_return.type == TOCK_SYSCALL_SUCCESS_U32) { - *val = command_return.data[0]; + *val = (uint32_t)command_return.data[0]; return RETURNCODE_SUCCESS; } else if (command_return.type == TOCK_SYSCALL_FAILURE) { - return tock_status_to_returncode(command_return.data[0]); + return (uint32_t)tock_status_to_returncode(command_return.data[0]); } else { // The remaining SyscallReturn variants must never happen if using this // function. We return `EBADRVAL` to signal an unexpected return variant. @@ -232,10 +232,10 @@ subscribe_return_t subscribe(uint32_t driver, uint32_t subscribe, : "r" (r0), "r" (r1), "r" (r2), "r" (r3) : "memory"); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { + if (rtype == TOCK_SYSCALL_SUCCESS) { subscribe_return_t rval = {true, (subscribe_upcall*)rv1, (void*)rv2, 0}; return rval; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { + } else if (rtype == TOCK_SYSCALL_FAILURE) { subscribe_return_t rval = {false, (subscribe_upcall*)rv2, (void*)rv3, (statuscode_t)rv1}; return rval; } else { @@ -244,7 +244,7 @@ subscribe_return_t subscribe(uint32_t driver, uint32_t subscribe, } syscall_return_t command(uint32_t driver, uint32_t command, - int arg1, int arg2) { + size_t arg1, size_t arg2) { register uint32_t r0 __asm__ ("r0") = driver; register uint32_t r1 __asm__ ("r1") = command; register uint32_t r2 __asm__ ("r2") = arg1; @@ -278,10 +278,10 @@ allow_ro_return_t allow_readonly(uint32_t driver, uint32_t allow, const void* pt : "r" (r0), "r" (r1), "r" (r2), "r" (r3) : "memory" ); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { + if (rtype == TOCK_SYSCALL_SUCCESS) { allow_ro_return_t rv = {true, (const void*)rv1, (size_t)rv2, 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { + } else if (rtype == TOCK_SYSCALL_FAILURE) { allow_ro_return_t rv = {false, (const void*)rv2, (size_t)rv3, (statuscode_t)rv1}; return rv; } else { @@ -305,10 +305,10 @@ allow_rw_return_t allow_readwrite(uint32_t driver, uint32_t allow, void* ptr, si : "r" (r0), "r" (r1), "r" (r2), "r" (r3) : "memory" ); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { + if (rtype == TOCK_SYSCALL_SUCCESS) { allow_rw_return_t rv = {true, (void*)rv1, (size_t)rv2, 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { + } else if (rtype == TOCK_SYSCALL_FAILURE) { allow_rw_return_t rv = {false, (void*)rv2, (size_t)rv3, (statuscode_t)rv1}; return rv; } else { @@ -335,10 +335,10 @@ allow_userspace_r_return_t allow_userspace_read(uint32_t driver, : "r" (r0), "r" (r1), "r" (r2), "r" (r3) : "memory" ); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { + if (rtype == TOCK_SYSCALL_SUCCESS) { allow_userspace_r_return_t rv = {true, (void*)rv1, (size_t)rv2, 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { + } else if (rtype == TOCK_SYSCALL_FAILURE) { allow_userspace_r_return_t rv = {false, (void*)rv2, (size_t)rv3, (statuscode_t)rv1}; return rv; } else { @@ -347,10 +347,10 @@ allow_userspace_r_return_t allow_userspace_read(uint32_t driver, } } -memop_return_t memop(uint32_t op_type, int arg1) { +memop_return_t memop(uint32_t op_type, size_t arg1) { register uint32_t r0 __asm__ ("r0") = op_type; register int r1 __asm__ ("r1") = arg1; - register uint32_t val __asm__ ("r1"); + register void* val __asm__ ("r1"); register uint32_t code __asm__ ("r0"); __asm__ volatile ( "svc 5" @@ -358,10 +358,7 @@ memop_return_t memop(uint32_t op_type, int arg1) { : "r" (r0), "r" (r1) : "memory" ); - if (code == TOCK_SYSCALL_SUCCESS) { - memop_return_t rv = {TOCK_STATUSCODE_SUCCESS, 0}; - return rv; - } else if (code == TOCK_SYSCALL_SUCCESS_U32) { + if (code == TOCK_SYSCALL_SUCCESS || code == TOCK_SYSCALL_SUCCESS_U32 || code == TOCK_SYSCALL_SUCCESS_U64) { memop_return_t rv = {TOCK_STATUSCODE_SUCCESS, val}; return rv; } else if (code == TOCK_SYSCALL_FAILURE) { @@ -381,6 +378,30 @@ memop_return_t memop(uint32_t op_type, int arg1) { // the syscall number is put in a4, and the required arguments are specified in // a0-a3. Nothing specifically syscall related is pushed to the process stack. +// On CHERI, every pointer-typed argument _to the kernel_ is promoted to a full capability, even for non-purecap applications. +// These wrappers maintain the same interface, however. + +#if __has_feature(capabilities) + #define PTR_REG_TYPE() "C" + #define PTR_REG(X) "c" X + #ifdef __CHERI_PURE_CAPABILITY__ + #define AS_KERN_PTR(X) X + #define AS_KERN_CODE_PTR(X) X + #else + #define AS_KERN_PTR(X) __builtin_cheri_address_set(cheri_ddc_get(), (size_t)X) + #define AS_KERN_CODE_PTR(X) __builtin_cheri_address_set(__builtin_cheri_program_counter_get(), (size_t)X) + #endif +#else + #define PTR_REG_TYPE() "r" + #define PTR_REG(X) X + #define AS_KERN_PTR(X) X + #define AS_KERN_CODE_PTR(X) X + #ifndef __capability +// Define this so as not to break builds without a cheri.h + #define __capability + #endif +#endif + void yield(void) { if (yield_check_tasks()) { return; @@ -405,12 +426,12 @@ int yield_no_wait(void) { } else { uint8_t result = 0; register uint32_t a0 __asm__ ("a0") = 0; // yield-no-wait - register uint8_t* a1 __asm__ ("a1") = &result; + register uint8_t* __capability a1 __asm__ (PTR_REG("a1")) = AS_KERN_PTR(&result); __asm__ volatile ( "li a4, 0\n" "ecall\n" : - : "r" (a0), "r" (a1) + : "r" (a0), PTR_REG_TYPE() (a1) : "memory", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "ra" ); @@ -447,23 +468,24 @@ subscribe_return_t subscribe(uint32_t driver, uint32_t subscribe, subscribe_upcall uc, void* userdata) { register uint32_t a0 __asm__ ("a0") = driver; register uint32_t a1 __asm__ ("a1") = subscribe; - register void* a2 __asm__ ("a2") = uc; - register void* a3 __asm__ ("a3") = userdata; + register void* __capability a2 __asm__ (PTR_REG("a2")) = AS_KERN_CODE_PTR(uc); + register void* __capability a3 __asm__ (PTR_REG("a3")) = AS_KERN_PTR(userdata); register uint32_t a4 __asm__ ("a4") = 1; + // TODO: Also cheri-fy return args here register int rtype __asm__ ("a0"); - register int rv1 __asm__ ("a1"); - register int rv2 __asm__ ("a2"); - register int rv3 __asm__ ("a3"); + register void* __capability rv1 __asm__ (PTR_REG("a1")); + register void* __capability rv2 __asm__ (PTR_REG("a2")); + register void* __capability rv3 __asm__ (PTR_REG("a3")); __asm__ volatile ( "ecall\n" : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) - : "r" (a0), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "r" (a0), "r" (a1), PTR_REG_TYPE() (a2), PTR_REG_TYPE() (a3), "r" (a4) : "memory"); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { - subscribe_return_t rval = {true, (subscribe_upcall*)rv1, (void*)rv2, 0}; + if (rtype == TOCK_SYSCALL_SUCCESS) { + subscribe_return_t rval = {true, ccast(subscribe_upcall*, rv1), ccast(void*, rv2), 0}; return rval; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { - subscribe_return_t rval = {false, (subscribe_upcall*)rv2, (void*)rv3, (statuscode_t)rv1}; + } else if (rtype == TOCK_SYSCALL_FAILURE) { + subscribe_return_t rval = {false, ccast(subscribe_upcall*, rv2), ccast(void*, rv3), (statuscode_t)acast(rv1)}; return rval; } else { exit(1); @@ -471,16 +493,17 @@ subscribe_return_t subscribe(uint32_t driver, uint32_t subscribe, } syscall_return_t command(uint32_t driver, uint32_t command, - int arg1, int arg2) { - register uint32_t a0 __asm__ ("a0") = driver; - register uint32_t a1 __asm__ ("a1") = command; - register uint32_t a2 __asm__ ("a2") = arg1; - register uint32_t a3 __asm__ ("a3") = arg2; - register uint32_t a4 __asm__ ("a4") = 2; - register int rtype __asm__ ("a0"); - register int rv1 __asm__ ("a1"); - register int rv2 __asm__ ("a2"); - register int rv3 __asm__ ("a3"); + size_t arg1, size_t arg2) { + register size_t a0 __asm__ ("a0") = driver; + register size_t a1 __asm__ ("a1") = command; + register size_t a2 __asm__ ("a2") = arg1; + register size_t a3 __asm__ ("a3") = arg2; + register size_t a4 __asm__ ("a4") = 2; + + register size_t rtype __asm__ ("a0"); + register size_t rv1 __asm__ ("a1"); + register size_t rv2 __asm__ ("a2"); + register size_t rv3 __asm__ ("a3"); __asm__ volatile ( "ecall\n" : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) @@ -490,27 +513,49 @@ syscall_return_t command(uint32_t driver, uint32_t command, return rval; } +syscall_return_t command3(uint32_t driver, uint32_t command, + size_t arg1, size_t arg2, size_t arg3) { + register size_t a0 __asm__ ("a0") = driver; + register size_t a1 __asm__ ("a1") = command; + register size_t a2 __asm__ ("a2") = arg1; + register size_t a3 __asm__ ("a3") = arg2; + register size_t a4 __asm__ ("a4") = 2; + register size_t a5 __asm__ ("a5") = arg3; + register size_t rtype __asm__ ("a0"); + register size_t rv1 __asm__ ("a1"); + register size_t rv2 __asm__ ("a2"); + register size_t rv3 __asm__ ("a3"); + __asm__ volatile ( + "ecall\n" + : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) + : "r" (a0), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) + : "memory"); + syscall_return_t rval = {rtype, {rv1, rv2, rv3}}; + return rval; +} + allow_rw_return_t allow_readwrite(uint32_t driver, uint32_t allow, void* ptr, size_t size) { register uint32_t a0 __asm__ ("a0") = driver; register uint32_t a1 __asm__ ("a1") = allow; - register void* a2 __asm__ ("a2") = ptr; + register void* __capability a2 __asm__ (PTR_REG("a2")) = AS_KERN_PTR(ptr); register size_t a3 __asm__ ("a3") = size; register uint32_t a4 __asm__ ("a4") = 3; register int rtype __asm__ ("a0"); - register int rv1 __asm__ ("a1"); - register int rv2 __asm__ ("a2"); - register int rv3 __asm__ ("a3"); + // TODO: Might be worth swapping the return arguments around so the pointer would always be in the same register. + register void* __capability rv1 __asm__ (PTR_REG("a1")); + register void* __capability rv2 __asm__ (PTR_REG("a2")); + register size_t rv3 __asm__ ("a3"); __asm__ volatile ( "ecall\n" - : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) - : "r" (a0), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "=r" (rtype), "="PTR_REG_TYPE ()(rv1), "="PTR_REG_TYPE ()(rv2), "=r" (rv3) + : "r" (a0), "r" (a1), PTR_REG_TYPE() (a2), "r" (a3), "r" (a4) : "memory"); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { - allow_rw_return_t rv = {true, (void*)rv1, (size_t)rv2, 0}; + if (rtype == TOCK_SYSCALL_SUCCESS) { + allow_rw_return_t rv = {true, ccast(void*, rv1), acast(rv2), 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { - allow_rw_return_t rv = {false, (void*)rv2, (size_t)rv3, (statuscode_t)rv1}; + } else if (rtype == TOCK_SYSCALL_FAILURE) { + allow_rw_return_t rv = {false, ccast(void*, rv2), rv3, (statuscode_t)acast(rv1)}; return rv; } else { // Invalid return type @@ -521,25 +566,26 @@ allow_rw_return_t allow_readwrite(uint32_t driver, uint32_t allow, allow_userspace_r_return_t allow_userspace_read(uint32_t driver, uint32_t allow, void* ptr, size_t size) { - register uint32_t a0 __asm__ ("a0") = driver; + register uint32_t a0 __asm__ ("0") = driver; register uint32_t a1 __asm__ ("a1") = allow; - register void* a2 __asm__ ("a2") = ptr; - register size_t a3 __asm__ ("a3") = size; + register void* __capability a2 __asm__ (PTR_REG("a2")) = AS_KERN_PTR(ptr); + register size_t a3 __asm__ ("a3") = size; register int rtype __asm__ ("a0"); - register int rv1 __asm__ ("a1"); - register int rv2 __asm__ ("a2"); - register int rv3 __asm__ ("a3"); + // TODO: Might be worth swapping the return arguments around so the pointer would always be in the same register. + register void* __capability rv1 __asm__ (PTR_REG("a1")); + register void* __capability rv2 __asm__ (PTR_REG("a2")); + register size_t rv3 __asm__ ("a3"); __asm__ volatile ( "li a4, 7\n" "ecall\n" - : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) - : "r" (a0), "r" (a1), "r" (a2), "r" (a3) + : "=r" (rtype), "="PTR_REG_TYPE ()(rv1), "="PTR_REG_TYPE ()(rv2), "=r" (rv3) + : "r" (a0), "r" (a1), PTR_REG_TYPE() (a2), "r" (a3) : "memory"); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { - allow_userspace_r_return_t rv = {true, (void*)rv1, (size_t)rv2, 0}; + if (rtype == TOCK_SYSCALL_SUCCESS) { + allow_userspace_r_return_t rv = {true, ccast(void*, rv1), acast(rv2), 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { - allow_userspace_r_return_t rv = {false, (void*)rv2, (size_t)rv3, (statuscode_t)rv1}; + } else if (rtype == TOCK_SYSCALL_FAILURE) { + allow_userspace_r_return_t rv = {false, ccast(void*, rv2), rv3, (statuscode_t)acast(rv1)}; return rv; } else { // Invalid return type @@ -549,25 +595,25 @@ allow_userspace_r_return_t allow_userspace_read(uint32_t driver, allow_ro_return_t allow_readonly(uint32_t driver, uint32_t allow, const void* ptr, size_t size) { - register uint32_t a0 __asm__ ("a0") = driver; - register uint32_t a1 __asm__ ("a1") = allow; - register const void* a2 __asm__ ("a2") = ptr; - register size_t a3 __asm__ ("a3") = size; - register uint32_t a4 __asm__ ("a4") = 4; + register uint32_t a0 __asm__ ("a0") = driver; + register uint32_t a1 __asm__ ("a1") = allow; + register const void* __capability a2 __asm__ (PTR_REG("a2")) = AS_KERN_PTR(ptr); + register size_t a3 __asm__ ("a3") = size; + register uint32_t a4 __asm__ ("a4") = 4; register int rtype __asm__ ("a0"); - register int rv1 __asm__ ("a1"); - register int rv2 __asm__ ("a2"); - register int rv3 __asm__ ("a3"); + register void* __capability rv1 __asm__ (PTR_REG("a1")); + register void* __capability rv2 __asm__ (PTR_REG("a2")); + register size_t rv3 __asm__ ("a3"); __asm__ volatile ( "ecall\n" - : "=r" (rtype), "=r" (rv1), "=r" (rv2), "=r" (rv3) - : "r" (a0), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "=r" (rtype), "="PTR_REG_TYPE ()(rv1), "="PTR_REG_TYPE ()(rv2), "=r" (rv3) + : "r" (a0), "r" (a1), PTR_REG_TYPE() (a2), "r" (a3), "r" (a4) : "memory"); - if (rtype == TOCK_SYSCALL_SUCCESS_U32_U32) { - allow_ro_return_t rv = {true, (const void*)rv1, (size_t)rv2, 0}; + if (rtype == TOCK_SYSCALL_SUCCESS) { + allow_ro_return_t rv = {true, ccast(const void*, rv1), acast(rv2), 0}; return rv; - } else if (rtype == TOCK_SYSCALL_FAILURE_U32_U32) { - allow_ro_return_t rv = {false, (const void*)rv2, (size_t)rv3, (statuscode_t)rv1}; + } else if (rtype == TOCK_SYSCALL_FAILURE) { + allow_ro_return_t rv = {false, ccast(const void*,rv2), rv3, (statuscode_t)acast(rv1)}; return rv; } else { // Invalid return type @@ -575,26 +621,23 @@ allow_ro_return_t allow_readonly(uint32_t driver, uint32_t allow, } } -memop_return_t memop(uint32_t op_type, int arg1) { +memop_return_t memop(uint32_t op_type, size_t arg1) { register uint32_t a0 __asm__ ("a0") = op_type; - register int a1 __asm__ ("a1") = arg1; + register size_t a1 __asm__ ("a1") = arg1; register uint32_t a4 __asm__ ("a4") = 5; - register uint32_t val __asm__ ("a1"); + register void* __capability val __asm__ (PTR_REG("a1")); register uint32_t code __asm__ ("a0"); __asm__ volatile ( "ecall\n" - : "=r" (code), "=r" (val) + : "=r" (code), "="PTR_REG_TYPE ()(val) : "r" (a0), "r" (a1), "r" (a4) : "memory" ); - if (code == TOCK_SYSCALL_SUCCESS) { - memop_return_t rv = {TOCK_STATUSCODE_SUCCESS, 0}; - return rv; - } else if (code == TOCK_SYSCALL_SUCCESS_U32) { + if (code == TOCK_SYSCALL_SUCCESS || code == TOCK_SYSCALL_SUCCESS_U32 || code == TOCK_SYSCALL_SUCCESS_U64) { memop_return_t rv = {TOCK_STATUSCODE_SUCCESS, val}; return rv; } else if (code == TOCK_SYSCALL_FAILURE) { - memop_return_t rv = {(statuscode_t) val, 0}; + memop_return_t rv = {(statuscode_t) acast(val), 0}; return rv; } else { // Invalid return type @@ -607,28 +650,28 @@ memop_return_t memop(uint32_t op_type, int arg1) { // Returns the address where the process's RAM region starts. void* tock_app_memory_begins_at(void) { memop_return_t ret = memop(2, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } // Returns the address immediately after the end of the process's RAM region. void* tock_app_memory_ends_at(void) { memop_return_t ret = memop(3, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } // Returns the address where the process's flash region starts. void* tock_app_flash_begins_at(void) { memop_return_t ret = memop(4, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } // Returns the address immediately after the end of the process's flash region. void* tock_app_flash_ends_at(void) { memop_return_t ret = memop(5, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } @@ -636,7 +679,7 @@ void* tock_app_flash_ends_at(void) { // by the kernel) begins. void* tock_app_grant_begins_at(void) { memop_return_t ret = memop(6, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } @@ -644,7 +687,7 @@ void* tock_app_grant_begins_at(void) { // header. int tock_app_number_writeable_flash_regions(void) { memop_return_t ret = memop(7, 0); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (int) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return acast(ret.data); else return 0; } @@ -653,7 +696,7 @@ int tock_app_number_writeable_flash_regions(void) { // does not exist. void* tock_app_writeable_flash_region_begins_at(int region_index) { memop_return_t ret = memop(8, region_index); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } @@ -662,7 +705,7 @@ void* tock_app_writeable_flash_region_begins_at(int region_index) { // does not exist. void* tock_app_writeable_flash_region_ends_at(int region_index) { memop_return_t ret = memop(9, region_index); - if (ret.status == TOCK_STATUSCODE_SUCCESS) return (void*) ret.data; + if (ret.status == TOCK_STATUSCODE_SUCCESS) return ccast(void*, ret.data); else return NULL; } diff --git a/libtock/tock.h b/libtock/tock.h index 1e41f6d4..acf85476 100644 --- a/libtock/tock.h +++ b/libtock/tock.h @@ -4,12 +4,44 @@ #include #include +// CHERI casting seems a little broken on hybrid, so this is a workaround. + +// ccast casts from an expression that has type "x* __capability" to a pointer type T +// acast casts from an expeession that has type "x* __capability" to size_t +#if __has_feature(capabilities) + #include + #ifdef __CHERI_PURE_CAPABILITY__ + #define ccast(T, expr) ((T)expr) + #define acast(expr) ((size_t)(__builtin_cheri_address_get(expr))) + #else + #define ccast(T, expr) ((T)(__builtin_cheri_address_get(expr))) + #define acast(expr) ((size_t)(__builtin_cheri_address_get(expr))) + #endif +#else + #define __capability + #define ccast(T, expr) ((T)expr) + #define acast(expr) ((size_t)expr) +#endif + #ifdef __cplusplus extern "C" { #endif typedef void (subscribe_upcall)(int, int, int, void*); +// TODO: refactor for this prototype. b/288076215 +// The Tock kernel uses usize for r0/r1/r2, so size_t is appropriate +// On CHERI, the kernel is using cptr for appdata, but might need updating +// to use cptr for all arguments as some (for example ipc) pass pointers +// in other arguments. +typedef void (subscribe_upcall_full)(size_t, size_t, size_t, void*); + +// A common callback that simply sets a flag that the event has occured. +static inline void subscribe_upcall_set_flag(__unused int r0, __unused int r1, + __unused int r3, void* flag) { + *(bool*)flag = true; +} + //////////////////////////////////////////////////////////////////////////////// /// /// RETURN AND ERROR TYPES @@ -76,9 +108,10 @@ typedef enum { } statuscode_t; // Generic return structure from a system call. +// Commands are not allowed to return capabilities, so the type here is size_t typedef struct { syscall_rtype_t type; - uint32_t data[3]; + size_t data[3]; } syscall_return_t; // Return structure from a subscribe syscall. The `subscribe()` implementation @@ -124,7 +157,7 @@ typedef struct { statuscode_t status; // Optional return data depending on the memop variant called. Only set if // status is `TOCK_STATUSCODE_SUCCESS`. - uint32_t data; + void* __capability data; } memop_return_t; //////////////////////////////////////////////////////////////////////////////// @@ -161,7 +194,7 @@ int tock_allow_rw_return_to_returncode(allow_rw_return_t); // Convert a `allow_ro_return_t` to a `returncode_t`. int tock_allow_ro_return_to_returncode(allow_ro_return_t); -int tock_enqueue(subscribe_upcall cb, int arg0, int arg1, int arg2, void* ud); +int tock_enqueue(subscribe_upcall cb, size_t arg0, size_t arg1, size_t arg2, void* ud); int yield_check_tasks(void); void yield(void); @@ -172,7 +205,11 @@ void tock_exit(uint32_t completion_code) __attribute__ ((noreturn)); void tock_restart(uint32_t completion_code) __attribute__ ((noreturn)); __attribute__ ((warn_unused_result)) -syscall_return_t command(uint32_t driver, uint32_t command, int arg1, int arg2); +syscall_return_t command(uint32_t driver, uint32_t command, size_t arg1, size_t arg2); + +__attribute__ ((warn_unused_result)) +syscall_return_t command3(uint32_t driver, uint32_t command, + size_t arg1, size_t arg2, size_t arg3); // Pass this to the subscribe syscall as a function pointer to // be the Null Upcall. @@ -193,7 +230,7 @@ __attribute__ ((warn_unused_result)) allow_ro_return_t allow_readonly(uint32_t driver, uint32_t allow, const void* ptr, size_t size); // Call the memop syscall. -memop_return_t memop(uint32_t op_type, int arg1); +memop_return_t memop(uint32_t op_type, size_t arg1); // Wrappers around memop to support app introspection void* tock_app_memory_begins_at(void); diff --git a/userland_generic.ld b/userland_generic.ld index 3056ef65..d19cfa61 100644 --- a/userland_generic.ld +++ b/userland_generic.ld @@ -4,9 +4,10 @@ * is not known. Therefore, this script over provisions space on some platforms. */ -/* Memory Spaces Definitions, 448K flash, 64K ram */ -PROG_LENGTH = 0x00070000; -RAM_LENGTH = 0x00010000; +/* NOTE: this is only really a confidence check for the linker script, these are + * are mostly placeholder values. Set to 2MB for now. */ +PROG_LENGTH = 0x00200000; +RAM_LENGTH = 0x00200000; ENTRY(_start) @@ -15,61 +16,84 @@ ENTRY(_start) * The application loader will select the actual location in flash where the app * is placed. On platforms where apps are compiled for fixed addresses, these * addresses will be changed automatically before the linking step. + * */ + MEMORY { FLASH (rx) : ORIGIN = 0x80000000, LENGTH = PROG_LENGTH SRAM (RWX) : ORIGIN = 0x00000000, LENGTH = RAM_LENGTH } +/* We could specify this per target. But it does not really matter. 16 + * is the largest size we need */ +WORD_ALIGN = 16; + +/* TODO: This needs doing properly. */ +/* I just found APP_HEAP_SIZE in the makefiles. Why don't I see it here? */ +/* I will check nothing else is broken, and then just use that. */ +HEAP_SIZE = 0x4000; + SECTIONS { + + /* Need to reserve room for the stack in the linker file. This makes the + * _got addresses used by the compiler match what they will be when the + * app is loaded into memory. This is not necessary for full PIC supported + * platforms (like Cortex-M), but is needed when an app is compiled for a + * fixed address. + * + * NOLOAD will ensure that filsize is 0. Not doing this causes problems + * as some linkers will pad the file and then the elf2tab tool gets + * very confused because of the way tock abuses physical and virtual + * address in the elf format in order to achieve relocation. + * + * Putting this first ensures that stack overflows are caught. + */ + .stack (NOLOAD): + { + /* Be conservative about our alignment for the stack. Different + * architectures require different values (8 for ARM, 16 for RISC-V), + * so we choose the largest value. In practice, this likely will not + * matter since the start of SRAM is unlikely to be at a very peculiar + * address. + */ + . = ALIGN(16); + _stack = .; + . = _stack + STACK_SIZE; + . = ALIGN(16); + } > SRAM + /* Section for just the app crt0 header. - * This must be first so that the app can find it. + * This must be at sram_orgin to be found by crt0. */ .crt0_header : { + /* elf2tab requires that the `_SRAM_ORIGIN` symbol be present to + * mark the first address in the SRAM memory. Since ELF files do + * not really need to specify this address as they only care about + * loading into flash, we need to manually mark this address for + * elf2tab. elf2tab will use it to add a fixed address header in the + * TBF header if needed. + */ + _sram_origin = .; /** * Populate the header expected by `crt0`: * * struct hdr { - * uint32_t got_sym_start; - * uint32_t got_start; - * uint32_t got_size; - * uint32_t data_sym_start; - * uint32_t data_start; - * uint32_t data_size; + * uint32_t stack_location; + * uint32_t stack_size; * uint32_t bss_start; * uint32_t bss_size; - * uint32_t reldata_start; - * uint32_t stack_size; + * uint32_t rel_start; + * uint32_t rel_size; * }; */ - /* Offset of GOT symbols in flash from the start of the application - * binary. */ - LONG(LOADADDR(.got) - ORIGIN(FLASH)); - /* Offset of where the GOT section will be placed in memory from the - * beginning of the app's assigned memory. */ - LONG(_got - ORIGIN(SRAM)); - /* Size of GOT section. */ - LONG(SIZEOF(.got)); - /* Offset of data symbols in flash from the start of the application - * binary. */ - LONG(LOADADDR(.data) - ORIGIN(FLASH)); - /* Offset of where the data section will be placed in memory from the - * beginning of the app's assigned memory. */ - LONG(_data - ORIGIN(SRAM)); - /* Size of data section. */ - LONG(SIZEOF(.data)); - /* Offset of where the BSS section will be placed in memory from the - * beginning of the app's assigned memory. */ + LONG(_stack - ORIGIN(SRAM)); + LONG(STACK_SIZE); LONG(_bss - ORIGIN(SRAM)); - /* Size of BSS section */ LONG(SIZEOF(.bss)); - /* First address offset after program flash, where elf2tab places - * .rel.data section */ - LONG(LOADADDR(.endflash) - ORIGIN(FLASH)); - /* The size of the stack requested by this application */ - LONG(STACK_SIZE); - } > FLASH =0xFF + LONG(_data_rel_start - ORIGIN(SRAM)); + LONG(_data_rel_fake_end - _data_rel_fake_start); + } >SRAM AT > FLASH =0xFF /* App state section. Used for persistent app data. * We put this first so that if the app code changes but the persistent @@ -78,13 +102,13 @@ SECTIONS { .wfr.app_state : { KEEP (*(.app_state)) - . = ALIGN(4); /* Make sure we're word-aligned here */ - } > FLASH =0xFF + . = ALIGN(WORD_ALIGN); /* Make sure we're word-aligned here */ + } > SRAM AT > FLASH =0xFF /* Text section, Code! */ .text : { - . = ALIGN(4); + . = ALIGN(WORD_ALIGN); _text = .; KEEP (*(.start)) *(.text*) @@ -93,47 +117,22 @@ SECTIONS { KEEP (*(.syscalls)) _etext = .; *(.ARM.extab*) - . = ALIGN(4); /* Make sure we're word-aligned here */ - } > FLASH =0xFF - - /* Need to reserve room for the stack in the linker file. This makes the - * _got addresses used by the compiler match what they will be when the - * app is loaded into memory. This is not necessary for full PIC supported - * platforms (like Cortex-M), but is needed when an app is compiled for a - * fixed address. - */ - .stack : - { - /* elf2tab requires that the `_SRAM_ORIGIN` symbol be present to - * mark the first address in the SRAM memory. Since ELF files do - * not really need to specify this address as they only care about - * loading into flash, we need to manually mark this address for - * elf2tab. elf2tab will use it to add a fixed address header in the - * TBF header if needed. - */ - _sram_origin = .; - - /* Be conservative about our alignment for the stack. Different - * architectures require different values (8 for ARM, 16 for RISC-V), - * so we choose the largest value. In practice, this likely will not - * matter since the start of SRAM is unlikely to be at a very peculiar - * address. - */ - . = ALIGN(16); - _stack = .; - . = _stack + STACK_SIZE; - . = ALIGN(16); - } > SRAM - + . = ALIGN(WORD_ALIGN); + // TODO: reclaim these like we did the other ELF relocations + __start___cap_relocs = .; + *(__cap_relocs*) /* For CHERI */ + __stop___cap_relocs = .; + . = ALIGN(WORD_ALIGN); /* Make sure we're word-aligned here */ + } > SRAM AT > FLASH =0xFF /* Global Offset Table */ .got : { - . = ALIGN(4); /* Make sure we're word-aligned here */ + . = ALIGN(WORD_ALIGN); /* Make sure we're word-aligned here */ _got = .; *(.got*) *(.got.plt*) - . = ALIGN(4); + . = ALIGN(WORD_ALIGN); } > SRAM AT > FLASH /* Data section, static initialized variables @@ -142,31 +141,70 @@ SECTIONS { */ .data : { - . = ALIGN(4); /* Make sure we're word-aligned here */ + . = ALIGN(WORD_ALIGN); /* Make sure we're word-aligned here */ _data = .; KEEP(*(.data*)) /* Include the "small data" in the data section. Otherwise it will be * dropped when the TBF is created. */ KEEP(*(.sdata*)) - . = ALIGN(4); /* Make sure we're word-aligned at the end of flash */ + *(.captable*) /* For CHERI. Weirdly, not BSS */ + /* Mis-align for the purposes of rel */ + . += (4 - (. % WORD_ALIGN)) % WORD_ALIGN; } > SRAM AT > FLASH - /* BSS section, static uninitialized variables */ - .bss : + /* End of flash. */ + .endflash : + { + } > FLASH + + /* Working around is ELF2TAB becoming tiresome at this point. How it + * currently works: + * ELF2 incorrectly uses section headers, not segment headers, to load + * data in the resulting binary. Putting the rel in a PT_LOAD segment + * therefore has no effect. + * ON THE OTHER HAND, ELF2TAB will find any section named exactly "rel.X" + * (where X is the name of any other section that has both W and R flags) + * and (ignoring where they request being placed) will chuck that section + * out at the end of the binary, preceded by a 4-byte length field. + * This will result in the .rel.X section always being placed after + * "endofflash". We have to produce the section, but not increment + * the cursor because we actually want it to overlap where the stack + * and BSS would be. Overlapping the STACK / BSS allows us to reclaim + * ram once relocations have been processed. This works because no + * relocations target the stack/BSS (yet another reason to use rel, not + * rela). + */ + + _data_rel_start = . + 4; + /* Must be called .rel.data */ + .rel.data : { + /* Dynamic relocations. We should not get any rel.plt. Hopefully. */ + _data_rel_fake_start = .; + KEEP(*(.rel.dyn*)); + _data_rel_fake_end = .; + } > SRAM = 0xaa + . = _data_rel_start; + + + /* BSS section, static uninitialized variables + * Note: Also "placing" this section in Flash (with 0 size file size) will help merge + * the program headers. + */ + .bss (NOLOAD): { - . = ALIGN(4); /* Make sure we're word-aligned here */ + . = ALIGN(WORD_ALIGN); /* Make sure we're word-aligned here */ _bss = .; KEEP(*(.bss*)) KEEP(*(.sbss*)) /* for RISC-V */ *(COMMON) - . = ALIGN(4); + . = ALIGN(WORD_ALIGN); } > SRAM - /* End of flash. */ - .endflash : + .heap (NOLOAD): { - } > FLASH + . += HEAP_SIZE; + } > SRAM /* ARM Exception support * @@ -183,13 +221,22 @@ SECTIONS { * it to the binary in elf2tab. If it was before the RAM sections, it would * through off our calculations of the header. */ + /* This seems to generate an out of range relocation with contiguous loading + * Almost certainly due to the fake locations for flash and RAM being too + * far apart. PROVIDE_HIDDEN (__exidx_start = .); .ARM.exidx : { - /* (C++) Index entries for section unwinding */ + /* (C++) Index entries for section unwinding * *(.ARM.exidx* .gnu.linkonce.armexidx.*) } > FLASH PROVIDE_HIDDEN (__exidx_end = .); + */ + /* Sections we do not need. */ + /DISCARD/ : + { + *(.eh_frame .gnu.hash .dynsym .dynstr .hash .dynamic) + } } ASSERT(_got <= _bss, "