From 898720fdb7cd6db936cf4812171caea8b7143817 Mon Sep 17 00:00:00 2001 From: Frances Wingerter Date: Mon, 22 Jan 2024 11:41:31 -0500 Subject: [PATCH] runtime: trace madvise syscall madvise(MADV_DONTNEED) is equivalent to zeroing memory; we cannot allow this to be performed cross-compartment because some other 'advices' may also have similar effects, for now we simply forbid all cross-compartment madvise(advice) calls regardless of the value of advice --- runtime/mmap_event.c | 2 ++ runtime/mmap_event.h | 8 ++++++++ runtime/seccomp_filter.c | 1 + runtime/track_memory_map.c | 22 ++++++++++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/runtime/mmap_event.c b/runtime/mmap_event.c index 070d11ac2..6b57162d3 100644 --- a/runtime/mmap_event.c +++ b/runtime/mmap_event.c @@ -10,6 +10,8 @@ enum mmap_event event_from_syscall(uint64_t rax) { return EVENT_MUNMAP; case __NR_mremap: return EVENT_MREMAP; + case __NR_madvise: + return EVENT_MADVISE; case __NR_mprotect: return EVENT_MPROTECT; case __NR_pkey_mprotect: diff --git a/runtime/mmap_event.h b/runtime/mmap_event.h index 0530c63f2..892c73bac 100644 --- a/runtime/mmap_event.h +++ b/runtime/mmap_event.h @@ -22,6 +22,11 @@ struct mremap_info { unsigned char pkey; }; +struct madvise_info { + struct range range; + unsigned char pkey; +}; + struct mprotect_info { struct range range; int prot; @@ -39,6 +44,7 @@ union event_info { struct mmap_info mmap; struct munmap_info munmap; struct mremap_info mremap; + struct madvise_info madvise; struct mprotect_info mprotect; struct pkey_mprotect_info pkey_mprotect; }; @@ -47,6 +53,7 @@ enum mmap_event { EVENT_MMAP, EVENT_MUNMAP, EVENT_MREMAP, + EVENT_MADVISE, EVENT_MPROTECT, EVENT_PKEY_MPROTECT, EVENT_CLONE, @@ -58,6 +65,7 @@ static const char *event_names[] = { "MMAP", "MUNMAP", "MREMAP", + "MADVISE", "MPROTECT", "PKEY_MPROTECT", "CLONE", diff --git a/runtime/seccomp_filter.c b/runtime/seccomp_filter.c index 951cb547e..605e1faef 100644 --- a/runtime/seccomp_filter.c +++ b/runtime/seccomp_filter.c @@ -34,6 +34,7 @@ struct sock_filter ia2_filter[] = { BPF_SYSCALL_POLICY(mprotect, TRACE), BPF_SYSCALL_POLICY(mremap, TRACE), BPF_SYSCALL_POLICY(munmap, TRACE), + BPF_SYSCALL_POLICY(madvise, TRACE), /* pkey syscalls */ BPF_SYSCALL_POLICY(pkey_alloc, ALLOW), BPF_SYSCALL_POLICY(pkey_mprotect, TRACE), diff --git a/runtime/track_memory_map.c b/runtime/track_memory_map.c index 9fd7a575e..c627f60ea 100644 --- a/runtime/track_memory_map.c +++ b/runtime/track_memory_map.c @@ -56,6 +56,11 @@ static bool is_op_permitted(struct memory_map *map, int event, map, info->mremap.old_range, info->mremap.pkey)) return true; break; + case EVENT_MADVISE: + if (memory_map_all_overlapping_regions_have_pkey( + map, info->madvise.range, info->madvise.pkey)) + return true; + break; case EVENT_MPROTECT: { /* allow mprotecting memory that has not been mprotected */ bool impacts_only_unprotected_memory = @@ -153,6 +158,11 @@ static bool update_memory_map(struct memory_map *map, int event, } break; } + case EVENT_MADVISE: + /* madvise does not modify the memory map state we care about here, but can + clear memory contents with MADV_DONTNEED */ + return true; + break; case EVENT_MPROTECT: return memory_map_mprotect_region(map, info->mprotect.range, info->mprotect.prot); @@ -282,6 +292,18 @@ static bool interpret_syscall(struct user_regs_struct *regs, unsigned char pkey, info->new_range.len); break; } + case EVENT_MADVISE: { + struct madvise_info *info = &event_info->madvise; + info->range.start = regs->rdi; + info->range.len = regs->rsi; + info->pkey = pkey; + + int advice = regs->rdx; + + debug_op("compartment %d madvise (%08zx, %zd) with advice=%d\n", info->pkey, + info->range.start, info->range.len, advice); + break; + } case EVENT_MPROTECT: { struct mprotect_info *info = &event_info->mprotect; info->range.start = regs->rdi;