From 05cfc406786419951862dbd71a57950db4a5e667 Mon Sep 17 00:00:00 2001
From: ChinYikMing <yikming2222@gmail.com>
Date: Mon, 13 May 2024 00:52:03 +0800
Subject: [PATCH] Support emulating memory management unit(MMU)

The purpose of this commit is to boot 32-bit RISC-V Linux in the future.
The virtual memory scheme to support is Sv32. There are one change to
original code base to adapt the MMU:
   The prototype of riscv_io_t interface needs to be changed.
   Particularly, add a RISC-V instance(riscv_t) as the first parameter.
   MMU related callbacks require to access the satp CSR to perform a
   page table walk during virtual memory translation but satp CSR is
   stored in RISC-V instance(riscv_t), thus it should have a way to
   access the satp CSR. The trivial solution is adding RISC-V
   instance(riscv_t) to the prototype of riscv_io_t interface.
After this change, we can reuse riscv_io_t for system emulation
afterward.

The rest of changes are implementing the Sv32 virtual memory scheme. For
every memory access, it has to walk through the page table to get the
corresponding PTE. Depends on the retrieval of PTE, there are several
page faults to be handled if necessary, so there are three exceptions
handlers have been introduced which are insn_pgfault, load_pgfault, and
store_pgfault and they are used in MMU_CHECK_FAULT. In this commit, the
access fault are not handled well since they are related to PMA and PMP
and they might not the must to boot 32-bit RISC-V Linux (tested on
semu). Some S-mode CSRs are added to riscv_internal to support S-mode.
PTE, S-mode and M-mode CSR helper macro are introduced as well.

Related: #310
---
 src/common.h        |   2 +
 src/emulate.c       | 278 ++++++++++++++++++++++++++++++++++++++++++--
 src/riscv.c         |  10 +-
 src/riscv.h         |  75 ++++++++++--
 src/riscv_private.h |  14 +++
 src/rv32_template.c |  76 ++++++------
 6 files changed, 397 insertions(+), 58 deletions(-)

diff --git a/src/common.h b/src/common.h
index f6337f8db..ab16daa55 100644
--- a/src/common.h
+++ b/src/common.h
@@ -25,6 +25,8 @@
 
 #define ARRAYS_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 
+#define MASK(n) (~(1 << n))
+
 /* Alignment macro */
 #if defined(__GNUC__) || defined(__clang__)
 #define __ALIGNED(x) __attribute__((aligned(x)))
diff --git a/src/emulate.c b/src/emulate.c
index 3742909ca..89df65d1a 100644
--- a/src/emulate.c
+++ b/src/emulate.c
@@ -24,6 +24,7 @@ extern struct target_ops gdbstub_ops;
 #endif
 
 #include "decode.h"
+#include "io.h"
 #include "mpool.h"
 #include "riscv.h"
 #include "riscv_private.h"
@@ -51,7 +52,10 @@ extern struct target_ops gdbstub_ops;
     _(breakpoint, 3)       /* Breakpoint */                        \
     _(load_misaligned, 4)  /* Load address misaligned */           \
     _(store_misaligned, 6) /* Store/AMO address misaligned */      \
-    _(ecall_M, 11)         /* Environment call from M-mode */
+    _(ecall_M, 11)         /* Environment call from M-mode */      \
+    _(insn_pgfault, 12)    /* Instruction page fault */            \
+    _(load_pgfault, 13)    /* Load page fault */                   \
+    _(store_pgfault, 15)   /* Store page fault */
 /* clang-format on */
 
 enum {
@@ -196,6 +200,8 @@ static uint32_t *csr_get_ptr(riscv_t *rv, uint32_t csr)
     case CSR_FCSR:
         return (uint32_t *) (&rv->csr_fcsr);
 #endif
+    case CSR_SATP:
+        return (uint32_t *) (&rv->csr_satp);
     default:
         return NULL;
     }
@@ -220,7 +226,16 @@ static uint32_t csr_csrrw(riscv_t *rv, uint32_t csr, uint32_t val)
         out &= FFLAG_MASK;
 #endif
 
-    *c = val;
+    if (c == &rv->csr_satp) {
+        const uint8_t mode_sv32 = val >> 31;
+        if (mode_sv32)
+            *c = val & MASK(22); /* store ppn */
+        else                     /* bare mode */
+            *c = 0; /* virtual mem addr maps to same physical mem addr directly
+                     */
+    } else {
+        *c = val;
+    }
 
     return out;
 }
@@ -456,7 +471,7 @@ static bool do_fuse3(riscv_t *rv, rv_insn_t *ir, uint64_t cycle, uint32_t PC)
     for (int i = 0; i < ir->imm2; i++) {
         uint32_t addr = rv->X[fuse[i].rs1] + fuse[i].imm;
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->X[fuse[i].rs2]);
+        rv->io.mem_write_w(rv, addr, rv->X[fuse[i].rs2]);
     }
     PC += ir->imm2 * 4;
     if (unlikely(RVOP_NO_NEXT(ir))) {
@@ -480,7 +495,7 @@ static bool do_fuse4(riscv_t *rv, rv_insn_t *ir, uint64_t cycle, uint32_t PC)
     for (int i = 0; i < ir->imm2; i++) {
         uint32_t addr = rv->X[fuse[i].rs1] + fuse[i].imm;
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        rv->X[fuse[i].rd] = rv->io.mem_read_w(addr);
+        rv->X[fuse[i].rd] = rv->io.mem_read_w(rv, addr);
     }
     PC += ir->imm2 * 4;
     if (unlikely(RVOP_NO_NEXT(ir))) {
@@ -604,16 +619,18 @@ static void block_translate(riscv_t *rv, block_t *block)
     block->pc_start = block->pc_end = rv->PC;
 
     rv_insn_t *prev_ir = NULL;
-    rv_insn_t *ir = mpool_calloc(rv->block_ir_mp);
+    rv_insn_t *ir = mpool_alloc(rv->block_ir_mp);
     block->ir_head = ir;
 
     /* translate the basic block */
     while (true) {
+        memset(ir, 0, sizeof(rv_insn_t));
+
         if (prev_ir)
             prev_ir->next = ir;
 
         /* fetch the next instruction */
-        const uint32_t insn = rv->io.mem_ifetch(block->pc_end);
+        const uint32_t insn = rv->io.mem_ifetch(rv, block->pc_end);
 
         /* decode the instruction */
         if (!rv_decode(ir, insn)) {
@@ -644,7 +661,7 @@ static void block_translate(riscv_t *rv, block_t *block)
             break;
         }
 
-        ir = mpool_calloc(rv->block_ir_mp);
+        ir = mpool_alloc(rv->block_ir_mp);
     }
 
     assert(prev_ir);
@@ -691,7 +708,7 @@ static bool detect_memset(riscv_t *rv, size_t type)
 
     uint32_t tmp_pc = rv->PC;
     for (uint32_t i = 0; i < memset_len; i++) {
-        const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
+        const uint32_t insn = rv->io.mem_ifetch(rv, tmp_pc);
         if (unlikely(insn != memset_insn[i]))
             return false;
         tmp_pc += 4;
@@ -712,7 +729,7 @@ static bool detect_memcpy(riscv_t *rv, size_t type)
 
     uint32_t tmp_pc = rv->PC;
     for (uint32_t i = 0; i < memcpy_len; i++) {
-        const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
+        const uint32_t insn = rv->io.mem_ifetch(rv, tmp_pc);
         if (unlikely(insn != memcpy_insn[i]))
             return false;
         tmp_pc += 4;
@@ -1178,6 +1195,230 @@ void rv_step(void *arg)
 #endif
 }
 
+static bool ppn_is_valid(riscv_t *rv, uint32_t ppn)
+{
+    vm_attr_t *attr = PRIV(rv);
+    const uint32_t nr_pg_max = attr->mem_size / RV_PG_SIZE;
+    return ppn < nr_pg_max;
+}
+
+#define PAGE_TABLE(ppn)                                                       \
+    ppn_is_valid(rv, ppn) ? (uint32_t *) &attr->mem[ppn << (RV_PG_SHIFT - 2)] \
+                          : NULL
+
+/* Walk through page tables and get the corresponding PTE by virtual address if
+ * exists
+ * @rv: RISC-V emulator
+ * @addr: virtual address
+ * @return: NULL if a not found or fault else the corresponding PTE
+ */
+static uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr)
+{
+    vm_attr_t *attr = PRIV(rv);
+    uint32_t ppn = rv->csr_satp;
+    if (ppn == 0) /* Bare mode */
+        return NULL;
+
+    /* start from root page table */
+    uint32_t *page_table = PAGE_TABLE(ppn);
+    if (!page_table)
+        return NULL;
+
+    for (int level = 1; level >= 0; level--) {
+        uint32_t vpn = addr >> RV_PG_SHIFT >> (level * (RV_PG_SHIFT - 2));
+        uint32_t *pte = page_table + vpn;
+
+        /* PTE XWRV bit in order */
+        uint8_t XWRV_bit = (*pte & MASK(4));
+        switch (XWRV_bit) {
+        case 0b0001: /* next level of the page table */
+            page_table = PAGE_TABLE(ppn);
+            if (!page_table)
+                return NULL;
+            break;
+        case 0b0011:
+        case 0b0111:
+        case 0b1001:
+        case 0b1011:
+        case 0b1111:
+            ppn = (*pte >> (RV_PG_SHIFT - 2));
+            if (unlikely(ppn) & MASK(10)) /* misaligned superpage */
+                return NULL;
+            return pte; /* leaf PTE */
+        case 0b0101:
+        case 0b1101:
+            return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+/* Verify the PTE and generate corresponding faults if needed
+ * @op: the operation
+ * @rv: RISC-V emulator
+ * @pte: to be verified pte
+ * @addr: the corresponding virtual address to cause fault
+ * @return: false if a corresponding fault is generated else true
+ */
+/* FIXME: handle access fault */
+#define MMU_FAULT_CHECK(op, rv, pte, addr, access_bits) \
+    mmu_##op##_fault_check(rv, pte, addr, access_bits)
+#define MMU_FAULT_CHECK_IMPL(op, pgfault)                                      \
+    static bool mmu_##op##_fault_check(riscv_t *rv, uint32_t *pte,             \
+                                       uint32_t addr, uint32_t access_bits)    \
+    {                                                                          \
+        if (!pte && rv->csr_satp) { /* not found */                            \
+            rv_except_##pgfault(rv, addr);                                     \
+            return false;                                                      \
+        } else if (pte &&                                                      \
+                   (!(*pte & PTE_V) || (!(*pte & PTE_R) && (*pte & PTE_W)))) { \
+            rv_except_##pgfault(rv, addr);                                     \
+            return false;                                                      \
+        } else if (pte && (!(*pte & PTE_X) && (access_bits & PTE_X))) {        \
+            rv_except_##pgfault(rv, addr);                                     \
+            return false;                                                      \
+        } else if (pte && (!(!(MSTATUS_MXR & rv->csr_mstatus) &&               \
+                             !(*pte & PTE_R) && (access_bits & PTE_R)) &&      \
+                           !((MSTATUS_MXR & rv->csr_mstatus) &&                \
+                             !((*pte & PTE_R) | (*pte & PTE_X)) &&             \
+                             (access_bits & PTE_R)))) {                        \
+            rv_except_##pgfault(rv, addr);                                     \
+            return false;                                                      \
+        } else if (pte && ((MSTATUS_MPRV & rv->csr_mstatus) &&                 \
+                           !(MSTATUS_MPPH &                                    \
+                             rv->csr_mstatus) && /* MPP=01 means S-mode */     \
+                           (MSTATUS_MPPL & rv->csr_mstatus))) {                \
+            if (!(MSTATUS_SUM & rv->csr_mstatus) && (*pte & PTE_U)) {          \
+                rv_except_##pgfault(rv, addr);                                 \
+                return false;                                                  \
+            }                                                                  \
+        }                                                                      \
+        return true;                                                           \
+    }
+
+MMU_FAULT_CHECK_IMPL(ifetch, insn_pgfault)
+MMU_FAULT_CHECK_IMPL(read, load_pgfault)
+MMU_FAULT_CHECK_IMPL(write, store_pgfault)
+
+#define get_ppn_and_offset(ppn, offset)    \
+    do {                                   \
+        ppn = *pte << RV_PG_SHIFT;         \
+        offset = addr & MASK(RV_PG_SHIFT); \
+    } while (0)
+
+uint32_t mmu_ifetch(riscv_t *rv, const uint32_t addr)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(ifetch, rv, pte, addr, PTE_X);
+    if (unlikely(!ok))
+        return 0;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_ifetch(ppn | offset);
+    }
+    return memory_ifetch(addr);
+}
+
+uint32_t mmu_read_w(riscv_t *rv, const uint32_t addr)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R);
+    if (unlikely(!ok))
+        return 0;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_read_w(ppn | offset);
+    }
+    return memory_read_w(addr);
+}
+
+uint16_t mmu_read_s(riscv_t *rv, const uint32_t addr)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R);
+    if (unlikely(!ok))
+        return 0;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_read_s(ppn | offset);
+    }
+    return memory_read_s(addr);
+}
+
+uint8_t mmu_read_b(riscv_t *rv, const uint32_t addr)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R);
+    if (unlikely(!ok))
+        return 0;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_read_b(ppn | offset);
+    }
+    return memory_read_b(addr);
+}
+
+void mmu_write_w(riscv_t *rv, const uint32_t addr, const uint32_t val)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W);
+    if (unlikely(!ok))
+        return;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_write_w(ppn | offset, (uint8_t *) &val);
+    }
+    return memory_write_w(addr, (uint8_t *) &val);
+}
+
+void mmu_write_s(riscv_t *rv, const uint32_t addr, const uint16_t val)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W);
+    if (unlikely(!ok))
+        return;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_write_s(ppn | offset, (uint8_t *) &val);
+    }
+    return memory_write_s(addr, (uint8_t *) &val);
+}
+
+void mmu_write_b(riscv_t *rv, const uint32_t addr, const uint8_t val)
+{
+    uint32_t *pte = mmu_walk(rv, addr);
+    bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W);
+    if (unlikely(!ok))
+        return;
+
+    if (rv->csr_satp) {
+        uint32_t ppn;
+        uint32_t offset;
+        get_ppn_and_offset(ppn, offset);
+        return memory_write_b(ppn | offset, (uint8_t *) &val);
+    }
+    return memory_write_b(addr, (uint8_t *) &val);
+}
+
 void ebreak_handler(riscv_t *rv)
 {
     assert(rv);
@@ -1225,3 +1466,22 @@ void dump_registers(riscv_t *rv, char *out_file_path)
     if (out_file_path[0] != '-')
         fclose(f);
 }
+
+riscv_io_t mmu_io = {
+    /* memory read interface */
+    .mem_ifetch = mmu_ifetch,
+    .mem_read_w = mmu_read_w,
+    .mem_read_s = mmu_read_s,
+    .mem_read_b = mmu_read_b,
+
+    /* memory write interface */
+    .mem_write_w = mmu_write_w,
+    .mem_write_s = mmu_write_s,
+    .mem_write_b = mmu_write_b,
+
+    /* system services or essential routines */
+    .on_ecall = ecall_handler,
+    .on_ebreak = ebreak_handler,
+    .on_memcpy = memcpy_handler,
+    .on_memset = memset_handler,
+};
diff --git a/src/riscv.c b/src/riscv.c
index 3fe80ec63..5029291a4 100644
--- a/src/riscv.c
+++ b/src/riscv.c
@@ -162,8 +162,10 @@ void rv_remap_stdstream(riscv_t *rv, fd_stream_pair_t *fsp, uint32_t fsp_size)
 #define MEMIO(op) on_mem_##op
 #define IO_HANDLER_IMPL(type, op, RW)                                     \
     static IIF(RW)(                                                       \
-        /* W */ void MEMIO(op)(riscv_word_t addr, riscv_##type##_t data), \
-        /* R */ riscv_##type##_t MEMIO(op)(riscv_word_t addr))            \
+        /* W */ void MEMIO(op)(UNUSED riscv_t * rv, riscv_word_t addr,    \
+                               riscv_##type##_t data),                    \
+        /* R */ riscv_##type##_t MEMIO(op)(UNUSED riscv_t * rv,           \
+                                           riscv_word_t addr))            \
     {                                                                     \
         IIF(RW)                                                           \
         (memory_##op(addr, (uint8_t *) &data), return memory_##op(addr)); \
@@ -239,6 +241,10 @@ riscv_t *rv_create(riscv_user_t rv_attr)
         memcpy(&rv->io, &io, sizeof(riscv_io_t));
     } else {
         /* TODO: system emulator */
+
+        /* this variable has external linkage to mmu_io defined in emulate.c */
+        extern riscv_io_t mmu_io;
+        memcpy(&rv->io, &mmu_io, sizeof(riscv_io_t));
     }
 
     /* default standard stream.
diff --git a/src/riscv.h b/src/riscv.h
index 57977bdee..feb062aa0 100644
--- a/src/riscv.h
+++ b/src/riscv.h
@@ -91,10 +91,61 @@ enum {
 #define MISA_A (1 << ('A' - 'A'))
 #define MISA_F (1 << ('F' - 'A'))
 #define MISA_C (1 << ('C' - 'A'))
+
+#define MSTATUS_SIE_SHIFT 1
+#define MSTATUS_MIE_SHIFT 3
+#define MSTATUS_SPIE_SHIFT 5
+#define MSTATUS_UBE_SHIFT 6
 #define MSTATUS_MPIE_SHIFT 7
-#define MSTATUS_MPP_SHIFT 11
+#define MSTATUS_SPP_SHIFT 8
+#define MSTATUS_MPPL_SHIFT 11
+#define MSTATUS_MPPH_SHIFT 12
+#define MSTATUS_MPRV_SHIFT 17
+#define MSTATUS_SUM_SHIFT 18
+#define MSTATUS_MXR_SHIFT 18
+#define MSTATUS_TVM_SHIFT 20
+#define MSTATUS_TW_SHIFT 21
+#define MSTATUS_TSR_SHIFT 22
+#define MSTATUS_SIE (1 << MSTATUS_SIE_SHIFT)
+#define MSTATUS_MIE (1 << MSTATUS_MIE_SHIFT)
+#define MSTATUS_SPIE (1 << MSTATUS_SPIE_SHIFT)
+#define MSTATUS_UBE (1 << MSTATUS_UBE_SHIFT)
 #define MSTATUS_MPIE (1 << MSTATUS_MPIE_SHIFT)
-#define MSTATUS_MPP (3 << MSTATUS_MPP_SHIFT)
+#define MSTATUS_SPP (1 << MSTATUS_SPP_SHIFT)
+#define MSTATUS_MPPL (1 << MSTATUS_MPPL_SHIFT)
+#define MSTATUS_MPPH (1 << MSTATUS_MPPH_SHIFT)
+#define MSTATUS_MPP (3 << MSTATUS_MPPL_SHIFT)
+#define MSTATUS_MPRV (1 << MSTATUS_MPRV_SHIFT)
+#define MSTATUS_SUM (1 << MSTATUS_SUM_SHIFT)
+#define MSTATUS_MXR (1 << MSTATUS_MXR_SHIFT)
+#define MSTATUS_TVM (1 << MSTATUS_TVM_SHIFT)
+#define MSTATUS_TW (1 << MSTATUS_TW_SHIFT)
+#define MSTATUS_TSR (1 << MSTATUS_TSR_SHIFT)
+
+#define PTE_V (1)
+#define PTE_R (1 << 1)
+#define PTE_W (1 << 2)
+#define PTE_X (1 << 3)
+#define PTE_U (1 << 4)
+#define PTE_G (1 << 5)
+#define PTE_A (1 << 6)
+#define PTE_D (1 << 7)
+
+#define SSTATUS_SIE_SHIFT 1
+#define SSTATUS_SPIE_SHIFT 4
+#define SSTATUS_UBE_SHIFT 5
+#define SSTATUS_SPP_SHIFT 7
+#define SSTATUS_SUM_SHIFT 17
+#define SSTATUS_MXR_SHIFT 18
+#define SSTATUS_SIE (1 << SSTATUS_SIE_SHIFT)
+#define SSTATUS_SPIE (1 << SSTATUS_SPIE_SHIFT)
+#define SSTATUS_UBE (1 << SSTATUS_UBE_SHIFT)
+#define SSTATUS_SPP (1 << SSTATUS_SPP_SHIFT)
+#define SSTATUS_SUM (1 << SSTATUS_SUM_SHIFT)
+#define SSTATUS_MXR (1 << SSTATUS_MXR_SHIFT)
+
+#define RV_PG_SHIFT 12
+#define RV_PG_SIZE (1 << RV_PG_SHIFT)
 
 #define BLOCK_MAP_CAPACITY_BITS 10
 
@@ -111,15 +162,21 @@ typedef softfloat_float32_t riscv_float_t;
 #endif
 
 /* memory read handlers */
-typedef riscv_word_t (*riscv_mem_ifetch)(riscv_word_t addr);
-typedef riscv_word_t (*riscv_mem_read_w)(riscv_word_t addr);
-typedef riscv_half_t (*riscv_mem_read_s)(riscv_word_t addr);
-typedef riscv_byte_t (*riscv_mem_read_b)(riscv_word_t addr);
+typedef riscv_word_t (*riscv_mem_ifetch)(riscv_t *rv, riscv_word_t addr);
+typedef riscv_word_t (*riscv_mem_read_w)(riscv_t *rv, riscv_word_t addr);
+typedef riscv_half_t (*riscv_mem_read_s)(riscv_t *rv, riscv_word_t addr);
+typedef riscv_byte_t (*riscv_mem_read_b)(riscv_t *rv, riscv_word_t addr);
 
 /* memory write handlers */
-typedef void (*riscv_mem_write_w)(riscv_word_t addr, riscv_word_t data);
-typedef void (*riscv_mem_write_s)(riscv_word_t addr, riscv_half_t data);
-typedef void (*riscv_mem_write_b)(riscv_word_t addr, riscv_byte_t data);
+typedef void (*riscv_mem_write_w)(riscv_t *rv,
+                                  riscv_word_t addr,
+                                  riscv_word_t data);
+typedef void (*riscv_mem_write_s)(riscv_t *rv,
+                                  riscv_word_t addr,
+                                  riscv_half_t data);
+typedef void (*riscv_mem_write_b)(riscv_t *rv,
+                                  riscv_word_t addr,
+                                  riscv_byte_t data);
 
 /* system instruction handlers */
 typedef void (*riscv_on_ecall)(riscv_t *rv);
diff --git a/src/riscv_private.h b/src/riscv_private.h
index 0d40b4570..2e7d5cd45 100644
--- a/src/riscv_private.h
+++ b/src/riscv_private.h
@@ -42,6 +42,9 @@ enum {
     CSR_MTVAL = 0x343,    /* Machine bad address or instruction */
     CSR_MIP = 0x344,      /* Machine interrupt pending */
 
+    CSR_SATP =
+        0x180, /* supervisor address translation and protection register */
+
     /* low words */
     CSR_CYCLE = 0xC00, /* Cycle counter for RDCYCLE instruction */
     CSR_TIME = 0xC01,  /* Timer for RDTIME instruction */
@@ -122,6 +125,17 @@ struct riscv_internal {
     uint32_t csr_mip;      /* Machine interrupt pending */
     uint32_t csr_mbadaddr;
 
+    uint32_t csr_sstatus;    /* supervisor status register */
+    uint32_t csr_stvec;      /* supervisor trap vector base address register */
+    uint32_t csr_sip;        /* supervisor interrupt pending register */
+    uint32_t csr_sie;        /* supervisor interrupt enable register */
+    uint32_t csr_scounteren; /* supervisor counter-enable register */
+    uint32_t csr_sscratch;   /* supervisor scratch register */
+    uint32_t csr_sepc;       /* supervisor exception program counter */
+    uint32_t csr_scause;     /* supervisor cause register */
+    uint32_t csr_stval;      /* supervisor trap value register */
+    uint32_t csr_satp;       /* supervisor address translation and protection */
+
     bool compressed; /**< current instruction is compressed or not */
 #if !RV32_HAS(JIT)
     block_map_t block_map; /**< basic block map */
diff --git a/src/rv32_template.c b/src/rv32_template.c
index 07811ce15..e1c65e078 100644
--- a/src/rv32_template.c
+++ b/src/rv32_template.c
@@ -506,7 +506,7 @@ RVOP(
     lb,
     {
         rv->X[ir->rd] =
-            sign_extend_b(rv->io.mem_read_b(rv->X[ir->rs1] + ir->imm));
+            sign_extend_b(rv->io.mem_read_b(rv, rv->X[ir->rs1] + ir->imm));
     },
     GEN({
         mem;
@@ -523,7 +523,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(1, load, false, 1);
-        rv->X[ir->rd] = sign_extend_h(rv->io.mem_read_s(addr));
+        rv->X[ir->rd] = sign_extend_h(rv->io.mem_read_s(rv, addr));
     },
     GEN({
         mem;
@@ -540,7 +540,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        rv->X[ir->rd] = rv->io.mem_read_w(addr);
+        rv->X[ir->rd] = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         mem;
@@ -554,7 +554,7 @@ RVOP(
 /* LBU: Load Byte Unsigned */
 RVOP(
     lbu,
-    { rv->X[ir->rd] = rv->io.mem_read_b(rv->X[ir->rs1] + ir->imm); },
+    { rv->X[ir->rd] = rv->io.mem_read_b(rv, rv->X[ir->rs1] + ir->imm); },
     GEN({
         mem;
         rald, VR0, rs1;
@@ -570,7 +570,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(1, load, false, 1);
-        rv->X[ir->rd] = rv->io.mem_read_s(addr);
+        rv->X[ir->rd] = rv->io.mem_read_s(rv, addr);
     },
     GEN({
         mem;
@@ -590,7 +590,7 @@ RVOP(
 /* SB: Store Byte */
 RVOP(
     sb,
-    { rv->io.mem_write_b(rv->X[ir->rs1] + ir->imm, rv->X[ir->rs2]); },
+    { rv->io.mem_write_b(rv, rv->X[ir->rs1] + ir->imm, rv->X[ir->rs2]); },
     GEN({
         mem;
         rald, VR0, rs1;
@@ -606,7 +606,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(1, store, false, 1);
-        rv->io.mem_write_s(addr, rv->X[ir->rs2]);
+        rv->io.mem_write_s(rv, addr, rv->X[ir->rs2]);
     },
     GEN({
         mem;
@@ -623,7 +623,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->X[ir->rs2]);
+        rv->io.mem_write_w(rv, addr, rv->X[ir->rs2]);
     },
     GEN({
         mem;
@@ -1318,7 +1318,7 @@ RVOP(
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
         if (ir->rd)
-            rv->X[ir->rd] = rv->io.mem_read_w(addr);
+            rv->X[ir->rd] = rv->io.mem_read_w(rv, addr);
         /* skip registration of the 'reservation set'
          * FIXME: unimplemented
          */
@@ -1336,7 +1336,7 @@ RVOP(
          */
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->X[ir->rs2]);
+        rv->io.mem_write_w(rv, addr, rv->X[ir->rs2]);
         rv->X[ir->rd] = 0;
     },
     GEN({
@@ -1349,11 +1349,11 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
-        rv->io.mem_write_w(addr, value2);
+        rv->io.mem_write_w(rv, addr, value2);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1365,12 +1365,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t res = value1 + value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1382,12 +1382,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t res = value1 ^ value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1399,12 +1399,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t res = value1 & value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1416,12 +1416,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t res = value1 | value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1433,14 +1433,14 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const int32_t a = value1;
         const int32_t b = value2;
         const uint32_t res = a < b ? value1 : value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1452,14 +1452,14 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const int32_t a = value1;
         const int32_t b = value2;
         const uint32_t res = a > b ? value1 : value2;
-        rv->io.mem_write_w(addr, res);
+        rv->io.mem_write_w(rv, addr, res);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1471,12 +1471,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t ures = value1 < value2 ? value1 : value2;
-        rv->io.mem_write_w(addr, ures);
+        rv->io.mem_write_w(rv, addr, ures);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1488,12 +1488,12 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1];
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        const uint32_t value1 = rv->io.mem_read_w(addr);
+        const uint32_t value1 = rv->io.mem_read_w(rv, addr);
         const uint32_t value2 = rv->X[ir->rs2];
         if (ir->rd)
             rv->X[ir->rd] = value1;
         const uint32_t ures = value1 > value2 ? value1 : value2;
-        rv->io.mem_write_w(addr, ures);
+        rv->io.mem_write_w(rv, addr, ures);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1510,7 +1510,7 @@ RVOP(
         /* copy into the float register */
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        rv->F[ir->rd].v = rv->io.mem_read_w(addr);
+        rv->F[ir->rd].v = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1523,7 +1523,7 @@ RVOP(
         /* copy from float registers */
         const uint32_t addr = rv->X[ir->rs1] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->F[ir->rs2].v);
+        rv->io.mem_write_w(rv, addr, rv->F[ir->rs2].v);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -1884,7 +1884,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + (uint32_t) ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, true, 1);
-        rv->X[ir->rd] = rv->io.mem_read_w(addr);
+        rv->X[ir->rd] = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         mem;
@@ -1905,7 +1905,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + (uint32_t) ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, true, 1);
-        rv->io.mem_write_w(addr, rv->X[ir->rs2]);
+        rv->io.mem_write_w(rv, addr, rv->X[ir->rs2]);
     },
     GEN({
         mem;
@@ -2258,7 +2258,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[rv_reg_sp] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, true, 1);
-        rv->X[ir->rd] = rv->io.mem_read_w(addr);
+        rv->X[ir->rd] = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         mem;
@@ -2364,7 +2364,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[rv_reg_sp] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, true, 1);
-        rv->io.mem_write_w(addr, rv->X[ir->rs2]);
+        rv->io.mem_write_w(rv, addr, rv->X[ir->rs2]);
     },
     GEN({
         mem;
@@ -2383,7 +2383,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[rv_reg_sp] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        rv->F[ir->rd].v = rv->io.mem_read_w(addr);
+        rv->F[ir->rd].v = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -2395,7 +2395,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[rv_reg_sp] + ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->F[ir->rs2].v);
+        rv->io.mem_write_w(rv, addr, rv->F[ir->rs2].v);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -2407,7 +2407,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + (uint32_t) ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, load, false, 1);
-        rv->F[ir->rd].v = rv->io.mem_read_w(addr);
+        rv->F[ir->rd].v = rv->io.mem_read_w(rv, addr);
     },
     GEN({
         assert; /* FIXME: Implement */
@@ -2419,7 +2419,7 @@ RVOP(
     {
         const uint32_t addr = rv->X[ir->rs1] + (uint32_t) ir->imm;
         RV_EXC_MISALIGN_HANDLER(3, store, false, 1);
-        rv->io.mem_write_w(addr, rv->F[ir->rs2].v);
+        rv->io.mem_write_w(rv, addr, rv->F[ir->rs2].v);
     },
     GEN({
         assert; /* FIXME: Implement */