diff --git a/.gitmodules b/.gitmodules index e9d451d..1af7a1a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "external/atom"] path = external/atom url = https://github.com/fleroviux/atom +[submodule "external/lunatic"] + path = external/lunatic + url = https://github.com/fleroviux/lunatic diff --git a/README.md b/README.md index 5627494..54b4e3c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ A Nintendo DS emulator developed for fun, with performance and multicore CPUs in mind. A nearly from scratch rewrite of my previous emulator aiming to try new techniques and achieve higher code quality. -However, at the moment not all code has been rewritten yet. But most games and the firmware boot. Notably the JIT is not integrated yet and 3D graphics are promising but work-in-progress. **This is highly experimental software.** I am developing this emulator for fun and learning only. diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 78a336a..7aea1cf 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -1 +1,4 @@ -add_subdirectory(atom) \ No newline at end of file +set(LUNATIC_USE_EXTERNAL_FMT ON CACHE BOOL "" FORCE) + +add_subdirectory(atom) +add_subdirectory(lunatic) \ No newline at end of file diff --git a/external/lunatic b/external/lunatic new file mode 160000 index 0000000..88d3f9d --- /dev/null +++ b/external/lunatic @@ -0,0 +1 @@ +Subproject commit 88d3f9d3ec050a90d80e2977f0a3e5cb7de90cec diff --git a/resources/hgss.png b/resources/hgss.png index 2a96b41..b8480ea 100644 Binary files a/resources/hgss.png and b/resources/hgss.png differ diff --git a/src/dual/CMakeLists.txt b/src/dual/CMakeLists.txt index d4e9d8a..98af255 100644 --- a/src/dual/CMakeLists.txt +++ b/src/dual/CMakeLists.txt @@ -5,8 +5,8 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(SOURCES - src/arm/tablegen/tablegen.cpp - src/arm/arm.cpp + src/arm/interpreter/tablegen/tablegen.cpp + src/arm/interpreter/interpreter_cpu.cpp src/common/scheduler.cpp src/nds/arm7/apu.cpp src/nds/arm7/dma.cpp @@ -50,14 +50,14 @@ set(SOURCES ) set(HEADERS - src/arm/handlers/arithmetic.inl - src/arm/handlers/handler16.inl - src/arm/handlers/handler32.inl - src/arm/handlers/memory.inl - src/arm/tablegen/decoder.hpp - src/arm/tablegen/gen_arm.hpp - src/arm/tablegen/gen_thumb.hpp - src/arm/arm.hpp + src/arm/interpreter/handlers/arithmetic.inl + src/arm/interpreter/handlers/handler16.inl + src/arm/interpreter/handlers/handler32.inl + src/arm/interpreter/handlers/memory.inl + src/arm/interpreter/tablegen/decoder.hpp + src/arm/interpreter/tablegen/gen_arm.hpp + src/arm/interpreter/tablegen/gen_thumb.hpp + src/arm/interpreter/interpreter_cpu.hpp src/nds/video_unit/gpu/renderer/software/edge.hpp src/nds/video_unit/gpu/renderer/software/interpolator.hpp ) @@ -104,9 +104,20 @@ set(HEADERS_PUBLIC include/dual/nds/timer.hpp ) +option(DUAL_ENABLE_JIT "Enable Just-In-Time compiler support" ON) + +if(DUAL_ENABLE_JIT) + list(APPEND SOURCES src/arm/jit/lunatic_cpu.cpp) + list(APPEND HEADERS src/arm/jit/lunatic_cpu.hpp) +endif() + add_library(dual ${SOURCES} ${HEADERS} ${HEADERS_PUBLIC}) target_link_libraries(dual PUBLIC atom-common atom-logger atom-math) +if(DUAL_ENABLE_JIT) + target_link_libraries(dual PRIVATE lunatic) + target_compile_definitions(dual PUBLIC DUAL_ENABLE_JIT) +endif() target_include_directories(dual PUBLIC include) target_include_directories(dual PRIVATE src) diff --git a/src/dual/include/dual/arm/coprocessor.hpp b/src/dual/include/dual/arm/coprocessor.hpp index 67c2efb..e05b52a 100644 --- a/src/dual/include/dual/arm/coprocessor.hpp +++ b/src/dual/include/dual/arm/coprocessor.hpp @@ -5,11 +5,14 @@ namespace dual::arm { + class CPU; + class Coprocessor { public: virtual ~Coprocessor() = default; virtual void Reset() {} + virtual void SetCPU(CPU* cpu) {} virtual u32 MRC(int opc1, int cn, int cm, int opc2) = 0; virtual void MCR(int opc1, int cn, int cm, int opc2, u32 value) = 0; diff --git a/src/dual/include/dual/arm/cpu.hpp b/src/dual/include/dual/arm/cpu.hpp index 7e44e4c..72ced79 100644 --- a/src/dual/include/dual/arm/cpu.hpp +++ b/src/dual/include/dual/arm/cpu.hpp @@ -7,6 +7,11 @@ namespace dual::arm { + struct AttachCPn { + int id; + dual::arm::Coprocessor* coprocessor; + }; + class CPU { public: enum class Model { @@ -72,7 +77,8 @@ namespace dual::arm { virtual u32 GetExceptionBase() const = 0; virtual void SetExceptionBase(u32 address) = 0; - virtual void SetCoprocessor(int id, Coprocessor* coprocessor) = 0; + virtual void InvalidateICache() {} + virtual void InvalidateICacheRange(u32 address_lo, u32 address_hi) {} virtual void SetUnalignedDataAccessEnable(bool enable) = 0; diff --git a/src/dual/include/dual/nds/arm9/cp15.hpp b/src/dual/include/dual/nds/arm9/cp15.hpp index 8977fab..46ccbaa 100644 --- a/src/dual/include/dual/nds/arm9/cp15.hpp +++ b/src/dual/include/dual/nds/arm9/cp15.hpp @@ -9,9 +9,10 @@ namespace dual::nds::arm9 { class CP15 final : public arm::Coprocessor { public: - CP15(arm::CPU* cpu, MemoryBus* bus); + explicit CP15(MemoryBus* bus); void Reset() override; + void SetCPU(arm::CPU* cpu) override; void DirectBoot(); u32 MRC(int opc1, int cn, int cm, int opc2) override; void MCR(int opc1, int cn, int cm, int opc2, u32 value) override; diff --git a/src/dual/include/dual/nds/enums.hpp b/src/dual/include/dual/nds/enums.hpp index 754ff79..ead6f96 100644 --- a/src/dual/include/dual/nds/enums.hpp +++ b/src/dual/include/dual/nds/enums.hpp @@ -29,6 +29,11 @@ namespace dual::nds { return (CPU)((int)cpu ^ 1); } + enum class CPUExecutionEngine { + Interpreter, + JIT + }; + } // namespace dual::nds template<> struct fmt::formatter : formatter { diff --git a/src/dual/include/dual/nds/nds.hpp b/src/dual/include/dual/nds/nds.hpp index c9cf770..b9355bf 100644 --- a/src/dual/include/dual/nds/nds.hpp +++ b/src/dual/include/dual/nds/nds.hpp @@ -32,6 +32,7 @@ namespace dual::nds { NDS(); void Reset(); + void SetCPUExecutionEngine(CPUExecutionEngine cpu_execution_engine); void Step(int cycles_to_run); void LoadBootROM9(std::span data); void LoadBootROM7(std::span data); @@ -50,6 +51,8 @@ namespace dual::nds { void SetTouchState(bool pen_down, u8 x, u8 y); private: + void CreateCPUCores(); + Scheduler m_scheduler{}; SystemMemory m_memory{}; @@ -123,6 +126,8 @@ namespace dual::nds { std::shared_ptr m_rom; u64 m_step_target{}; + + CPUExecutionEngine m_cpu_execution_engine{CPUExecutionEngine::Interpreter}; }; } // namespace dual::nds diff --git a/src/dual/include/dual/nds/rom.hpp b/src/dual/include/dual/nds/rom.hpp index 4a73bdb..1b3a733 100644 --- a/src/dual/include/dual/nds/rom.hpp +++ b/src/dual/include/dual/nds/rom.hpp @@ -31,7 +31,8 @@ namespace dual::nds { const u32 address_hi = address + size; if(address_hi > m_size || address_hi < address) { - ATOM_PANIC("out-of-bounds ROM read request: address=0x{:08X}, size={}", address, size); + //ATOM_PANIC("out-of-bounds ROM read request: address=0x{:08X}, size={}", address, size); + return; } std::memcpy(destination, &m_data[address], size); } diff --git a/src/dual/src/arm/handlers/arithmetic.inl b/src/dual/src/arm/interpreter/handlers/arithmetic.inl similarity index 100% rename from src/dual/src/arm/handlers/arithmetic.inl rename to src/dual/src/arm/interpreter/handlers/arithmetic.inl diff --git a/src/dual/src/arm/handlers/handler16.inl b/src/dual/src/arm/interpreter/handlers/handler16.inl similarity index 100% rename from src/dual/src/arm/handlers/handler16.inl rename to src/dual/src/arm/interpreter/handlers/handler16.inl diff --git a/src/dual/src/arm/handlers/handler32.inl b/src/dual/src/arm/interpreter/handlers/handler32.inl similarity index 100% rename from src/dual/src/arm/handlers/handler32.inl rename to src/dual/src/arm/interpreter/handlers/handler32.inl diff --git a/src/dual/src/arm/handlers/memory.inl b/src/dual/src/arm/interpreter/handlers/memory.inl similarity index 100% rename from src/dual/src/arm/handlers/memory.inl rename to src/dual/src/arm/interpreter/handlers/memory.inl diff --git a/src/dual/src/arm/arm.cpp b/src/dual/src/arm/interpreter/interpreter_cpu.cpp similarity index 88% rename from src/dual/src/arm/arm.cpp rename to src/dual/src/arm/interpreter/interpreter_cpu.cpp index 30f123d..bbf6cf4 100644 --- a/src/dual/src/arm/arm.cpp +++ b/src/dual/src/arm/interpreter/interpreter_cpu.cpp @@ -1,13 +1,14 @@ -#include "arm.hpp" +#include "interpreter_cpu.hpp" namespace dual::arm { - ARM::ARM( + InterpreterCPU::InterpreterCPU( Memory& memory, Scheduler& scheduler, CycleCounter& cycle_counter, - Model model + Model model, + std::span coprocessor_table ) : m_memory{memory} , m_scheduler{scheduler} , m_cycle_counter{cycle_counter} @@ -17,10 +18,13 @@ namespace dual::arm { BuildConditionTable(); Reset(); - m_coprocessors.fill(nullptr); + for(auto& attach_cp_n : coprocessor_table) { + m_coprocessors.at(attach_cp_n.id) = attach_cp_n.coprocessor; + attach_cp_n.coprocessor->SetCPU(this); + } } - void ARM::Reset() { + void InterpreterCPU::Reset() { constexpr u32 nop = 0xE320F000; m_state = {}; @@ -32,7 +36,7 @@ namespace dual::arm { SetIRQFlag(false); } - void ARM::Run(int cycles) { + void InterpreterCPU::Run(int cycles) { if(GetWaitingForIRQ()) { m_cycle_counter.AddDeviceCycles((uint)cycles); return; @@ -82,7 +86,7 @@ namespace dual::arm { } } - void ARM::SignalIRQ() { + void InterpreterCPU::SignalIRQ() { if(m_state.cpsr.mask_irq) { return; } @@ -107,19 +111,19 @@ namespace dual::arm { ReloadPipeline32(); } - void ARM::ReloadPipeline32() { + void InterpreterCPU::ReloadPipeline32() { m_opcode[0] = ReadWordCode(m_state.r15); m_opcode[1] = ReadWordCode(m_state.r15 + 4); m_state.r15 += 8; } - void ARM::ReloadPipeline16() { + void InterpreterCPU::ReloadPipeline16() { m_opcode[0] = ReadHalfCode(m_state.r15); m_opcode[1] = ReadHalfCode(m_state.r15 + 2); m_state.r15 += 4; } - void ARM::BuildConditionTable() { + void InterpreterCPU::BuildConditionTable() { for(int flags = 0; flags < 16; flags++) { bool n = flags & 8; bool z = flags & 4; @@ -145,7 +149,7 @@ namespace dual::arm { } } - auto ARM::GetRegisterBankByMode(Mode mode) -> Bank { + auto InterpreterCPU::GetRegisterBankByMode(Mode mode) -> Bank { switch(mode) { case Mode::User: return Bank::None; case Mode::System: return Bank::None; @@ -159,7 +163,7 @@ namespace dual::arm { ATOM_PANIC("invalid ARM CPU mode: 0x{:02X}", (uint)mode); } - void ARM::SwitchMode(Mode new_mode) { + void InterpreterCPU::SwitchMode(Mode new_mode) { auto old_bank = GetRegisterBankByMode((Mode)m_state.cpsr.mode); auto new_bank = GetRegisterBankByMode(new_mode); diff --git a/src/dual/src/arm/arm.hpp b/src/dual/src/arm/interpreter/interpreter_cpu.hpp similarity index 94% rename from src/dual/src/arm/arm.hpp rename to src/dual/src/arm/interpreter/interpreter_cpu.hpp index 5bb6522..d0aad30 100644 --- a/src/dual/src/arm/arm.hpp +++ b/src/dual/src/arm/interpreter/interpreter_cpu.hpp @@ -8,16 +8,18 @@ #include #include #include +#include namespace dual::arm { - class ARM final : public CPU { + class InterpreterCPU final : public CPU { public: - ARM( + InterpreterCPU( Memory& memory, Scheduler& scheduler, CycleCounter& cycle_counter, - Model model + Model model, + std::span coprocessor_table = {} ); void Reset() override; @@ -30,10 +32,6 @@ namespace dual::arm { m_exception_base = address; } - void SetCoprocessor(int id, Coprocessor* coprocessor) override { - m_coprocessors.at(id) = coprocessor; - } - void SetUnalignedDataAccessEnable(bool enable) override { m_unaligned_data_access_enable = enable; } @@ -111,8 +109,8 @@ namespace dual::arm { void Run(int cycles) override; - typedef void (ARM::*Handler16)(u16); - typedef void (ARM::*Handler32)(u32); + typedef void (InterpreterCPU::*Handler16)(u16); + typedef void (InterpreterCPU::*Handler32)(u32); private: enum class Condition { @@ -169,7 +167,7 @@ namespace dual::arm { Scheduler& m_scheduler; CycleCounter& m_cycle_counter; Model m_model; - std::array m_coprocessors; + std::array m_coprocessors{}; bool m_irq_line; bool m_wait_for_irq = false; diff --git a/src/dual/src/arm/tablegen/decoder.hpp b/src/dual/src/arm/interpreter/tablegen/decoder.hpp similarity index 100% rename from src/dual/src/arm/tablegen/decoder.hpp rename to src/dual/src/arm/interpreter/tablegen/decoder.hpp diff --git a/src/dual/src/arm/interpreter/tablegen/gen_arm.hpp b/src/dual/src/arm/interpreter/tablegen/gen_arm.hpp new file mode 100644 index 0000000..f64be2d --- /dev/null +++ b/src/dual/src/arm/interpreter/tablegen/gen_arm.hpp @@ -0,0 +1,106 @@ + +enum class MultiplyOpcode { + MUL = 0b000, + MLA = 0b001, + UMULL = 0b100, + UMLAL = 0b101, + SMULL = 0b110, + SMLAL = 0b111 +}; + +enum class SignedMultiplyOpcode { + SMLAxy = 0b1000, + SM__Wy = 0b1001, + SMLALxy = 0b1010, + SMULxy = 0b1011 +}; + +template +static constexpr auto GenerateHandlerARM() -> Handler32 { + const bool pre = instruction & (1 << 24); + const bool add = instruction & (1 << 23); + const bool wb = instruction & (1 << 21); + const bool load = instruction & (1 << 20); + + switch(GetARMInstructionType(instruction)) { + case ARMInstrType::HalfwordSignedTransfer: { + const bool immediate = instruction & (1 << 22); + const auto opcode = (instruction >> 5) & 3; + + return &InterpreterCPU::ARM_HalfDoubleAndSignedTransfer; + } + case ARMInstrType::Multiply: { + const bool set_flags = instruction & (1 << 20); + + switch(static_cast((instruction >> 21) & 0xF)) { + case MultiplyOpcode::MUL: return &InterpreterCPU::ARM_Multiply; + case MultiplyOpcode::MLA: return &InterpreterCPU::ARM_Multiply; + case MultiplyOpcode::UMULL: return &InterpreterCPU::ARM_MultiplyLong; + case MultiplyOpcode::UMLAL: return &InterpreterCPU::ARM_MultiplyLong; + case MultiplyOpcode::SMULL: return &InterpreterCPU::ARM_MultiplyLong; + case MultiplyOpcode::SMLAL: return &InterpreterCPU::ARM_MultiplyLong; + } + + break; + } + case ARMInstrType::SingleDataSwap: { + const bool byte = instruction & (1 << 22); + + return &InterpreterCPU::ARM_SingleDataSwap; + } + case ARMInstrType::StatusTransfer: { + const bool immediate = instruction & (1 << 25); + const bool use_spsr = instruction & (1 << 22); + const bool to_status = instruction & (1 << 21); + + return &InterpreterCPU::ARM_StatusTransfer; + } + case ARMInstrType::BranchAndExchange: return &InterpreterCPU::ARM_BranchAndExchangeMaybeLink; + case ARMInstrType::CountLeadingZeros: return &InterpreterCPU::ARM_CountLeadingZeros; + case ARMInstrType::BranchLinkExchange: return &InterpreterCPU::ARM_BranchAndExchangeMaybeLink; + case ARMInstrType::SaturatingAddSubtract: { + const int opcode = (instruction >> 20) & 0xF; + + return &InterpreterCPU::ARM_SaturatingAddSubtract; + } + case ARMInstrType::SignedHalfwordMultiply: { + const bool x = instruction & (1 << 5); + const bool y = instruction & (1 << 6); + + switch(static_cast((instruction >> 21) & 0xF)) { + case SignedMultiplyOpcode::SMLAxy: return &InterpreterCPU::ARM_SignedHalfwordMultiply; + case SignedMultiplyOpcode::SM__Wy: return &InterpreterCPU::ARM_SignedWordHalfwordMultiply; + case SignedMultiplyOpcode::SMLALxy: return &InterpreterCPU::ARM_SignedHalfwordMultiplyLongAccumulate; + case SignedMultiplyOpcode::SMULxy: return &InterpreterCPU::ARM_SignedHalfwordMultiply; + } + + break; + } + case ARMInstrType::DataProcessing: { + const bool immediate = instruction & (1 << 25); + const bool set_flags = instruction & (1 << 20); + const auto opcode = static_cast((instruction >> 21) & 0xF); + const auto field4 = (instruction >> 4) & 0xF; + + return &InterpreterCPU::ARM_DataProcessing; + } + case ARMInstrType::SingleDataTransfer: { + const bool immediate = ~instruction & (1 << 25); + const bool byte = instruction & (1 << 22); + + return &InterpreterCPU::ARM_SingleDataTransfer; + } + case ARMInstrType::BlockDataTransfer: { + const bool user_mode = instruction & (1 << 22); + + return &InterpreterCPU::ARM_BlockDataTransfer; + } + case ARMInstrType::BranchAndLink: return &InterpreterCPU::ARM_BranchAndLink<(instruction >> 24) & 1>; + case ARMInstrType::CoprocessorRegisterXfer: return &InterpreterCPU::ARM_CoprocessorRegisterTransfer; + case ARMInstrType::SoftwareInterrupt: return &InterpreterCPU::ARM_SWI; + case ARMInstrType::BranchLinkExchangeImm: return &InterpreterCPU::ARM_BranchLinkExchangeImm; + default: break; + } + + return &InterpreterCPU::ARM_Undefined; +} diff --git a/src/dual/src/arm/tablegen/gen_thumb.hpp b/src/dual/src/arm/interpreter/tablegen/gen_thumb.hpp similarity index 62% rename from src/dual/src/arm/tablegen/gen_thumb.hpp rename to src/dual/src/arm/interpreter/tablegen/gen_thumb.hpp index 82b15d8..d07888e 100644 --- a/src/dual/src/arm/tablegen/gen_thumb.hpp +++ b/src/dual/src/arm/interpreter/tablegen/gen_thumb.hpp @@ -6,113 +6,113 @@ static constexpr auto GenerateHandlerThumb() -> Handler16 { const auto opcode = (instruction >> 11) & 3; const auto offset5 = (instruction >> 6) & 0x1F; - return &ARM::Thumb_MoveShiftedRegister; + return &InterpreterCPU::Thumb_MoveShiftedRegister; } case ThumbInstrType::AddSub: { const bool immediate = (instruction >> 10) & 1; const bool subtract = (instruction >> 9) & 1; const auto field3 = (instruction >> 6) & 7; - return &ARM::Thumb_AddSub; + return &InterpreterCPU::Thumb_AddSub; } case ThumbInstrType::MoveCompareAddSubImm: { const auto opcode = (instruction >> 11) & 3; const auto rD = (instruction >> 8) & 7; - return &ARM::Thumb_MoveCompareAddSubImm; + return &InterpreterCPU::Thumb_MoveCompareAddSubImm; } case ThumbInstrType::ALU: { - const auto opcode = static_cast((instruction >> 6) & 0xF); + const auto opcode = static_cast((instruction >> 6) & 0xF); - return &ARM::Thumb_ALU; + return &InterpreterCPU::Thumb_ALU; } case ThumbInstrType::HighRegisterOps: { - const auto opcode = static_cast((instruction >> 8) & 3); + const auto opcode = static_cast((instruction >> 8) & 3); const bool high1 = (instruction >> 7) & 1; const bool high2 = (instruction >> 6) & 1; - return &ARM::Thumb_HighRegisterOps_BX; + return &InterpreterCPU::Thumb_HighRegisterOps_BX; } case ThumbInstrType::LoadStoreRelativePC: { const auto rD = (instruction >> 8) & 7; - return &ARM::Thumb_LoadStoreRelativePC; + return &InterpreterCPU::Thumb_LoadStoreRelativePC; } case ThumbInstrType::LoadStoreOffsetReg: { const auto opcode = (instruction >> 10) & 3; const auto rO = (instruction >> 6) & 7; - return &ARM::Thumb_LoadStoreOffsetReg; + return &InterpreterCPU::Thumb_LoadStoreOffsetReg; } case ThumbInstrType::LoadStoreSigned: { const auto opcode = (instruction >> 10) & 3; const auto rO = (instruction >> 6) & 7; - return &ARM::Thumb_LoadStoreSigned; + return &InterpreterCPU::Thumb_LoadStoreSigned; } case ThumbInstrType::LoadStoreOffsetImm: { const auto opcode = (instruction >> 11) & 3; const auto offset5 = (instruction >> 6) & 0x1F; - return &ARM::Thumb_LoadStoreOffsetImm; + return &InterpreterCPU::Thumb_LoadStoreOffsetImm; } case ThumbInstrType::LoadStoreHword: { const bool load = (instruction >> 11) & 1; const auto offset5 = (instruction >> 6) & 0x1F; - return &ARM::Thumb_LoadStoreHword; + return &InterpreterCPU::Thumb_LoadStoreHword; } case ThumbInstrType::LoadStoreRelativeSP: { const bool load = (instruction >> 11) & 1; const auto rD = (instruction >> 8) & 7; - return &ARM::Thumb_LoadStoreRelativeToSP; + return &InterpreterCPU::Thumb_LoadStoreRelativeToSP; } case ThumbInstrType::LoadAddress: { const bool use_r13 = (instruction >> 11) & 1; const auto rD = (instruction >> 8) & 7; - return &ARM::Thumb_LoadAddress; + return &InterpreterCPU::Thumb_LoadAddress; } case ThumbInstrType::AddOffsetToSP: { const bool subtract = (instruction >> 7) & 1; - return &ARM::Thumb_AddOffsetToSP; + return &InterpreterCPU::Thumb_AddOffsetToSP; } case ThumbInstrType::PushPop: { const bool load = (instruction >> 11) & 1; const bool pc_lr = (instruction >> 8) & 1; - return &ARM::Thumb_PushPop; + return &InterpreterCPU::Thumb_PushPop; } case ThumbInstrType::LoadStoreMultiple: { const bool load = (instruction >> 11) & 1; const auto rB = (instruction >> 8) & 7; - return &ARM::Thumb_LoadStoreMultiple; + return &InterpreterCPU::Thumb_LoadStoreMultiple; } case ThumbInstrType::ConditionalBranch: { const auto condition = (instruction >> 8) & 0xF; - return &ARM::Thumb_ConditionalBranch; + return &InterpreterCPU::Thumb_ConditionalBranch; } case ThumbInstrType::SoftwareInterrupt: { - return &ARM::Thumb_SWI; + return &InterpreterCPU::Thumb_SWI; } case ThumbInstrType::UnconditionalBranch: { - return &ARM::Thumb_UnconditionalBranch; + return &InterpreterCPU::Thumb_UnconditionalBranch; } case ThumbInstrType::LongBranchLinkPrefix: { - return &ARM::Thumb_LongBranchLinkPrefix; + return &InterpreterCPU::Thumb_LongBranchLinkPrefix; } case ThumbInstrType::LongBranchLinkSuffix: { - return &ARM::Thumb_LongBranchLinkSuffix; + return &InterpreterCPU::Thumb_LongBranchLinkSuffix; } case ThumbInstrType::LongBranchLinkExchangeSuffix: { - return &ARM::Thumb_LongBranchLinkSuffix; + return &InterpreterCPU::Thumb_LongBranchLinkSuffix; } default: break; } - return &ARM::Thumb_Unimplemented; + return &InterpreterCPU::Thumb_Unimplemented; } diff --git a/src/dual/src/arm/tablegen/tablegen.cpp b/src/dual/src/arm/interpreter/tablegen/tablegen.cpp similarity index 81% rename from src/dual/src/arm/tablegen/tablegen.cpp rename to src/dual/src/arm/interpreter/tablegen/tablegen.cpp index b854439..f012059 100644 --- a/src/dual/src/arm/tablegen/tablegen.cpp +++ b/src/dual/src/arm/interpreter/tablegen/tablegen.cpp @@ -1,13 +1,13 @@ #include -#include "arm/arm.hpp" +#include "arm/interpreter/interpreter_cpu.hpp" #include "decoder.hpp" namespace dual::arm { - using Handler16 = ARM::Handler16; - using Handler32 = ARM::Handler32; + using Handler16 = InterpreterCPU::Handler16; + using Handler32 = InterpreterCPU::Handler32; /** A helper class used to generate lookup tables for * the interpreter at compiletime. @@ -57,7 +57,7 @@ namespace dual::arm { } }; - std::array ARM::k_opcode_lut_16 = TableGen::GenerateTableThumb(); - std::array ARM::k_opcode_lut_32 = TableGen::GenerateTableARM(); + std::array InterpreterCPU::k_opcode_lut_16 = TableGen::GenerateTableThumb(); + std::array InterpreterCPU::k_opcode_lut_32 = TableGen::GenerateTableARM(); } // namespace dual::arm diff --git a/src/dual/src/arm/jit/lunatic_cpu.cpp b/src/dual/src/arm/jit/lunatic_cpu.cpp new file mode 100644 index 0000000..3143a9b --- /dev/null +++ b/src/dual/src/arm/jit/lunatic_cpu.cpp @@ -0,0 +1,2 @@ + +#include "lunatic_cpu.hpp" \ No newline at end of file diff --git a/src/dual/src/arm/jit/lunatic_cpu.hpp b/src/dual/src/arm/jit/lunatic_cpu.hpp new file mode 100644 index 0000000..ef7f8d3 --- /dev/null +++ b/src/dual/src/arm/jit/lunatic_cpu.hpp @@ -0,0 +1,181 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dual::arm { + + class LunaticCPU final : public CPU { + public: + LunaticCPU( + dual::arm::Memory& memory, + CycleCounter& cycle_counter, + Model model, + std::span coprocessor_table = {} + ) : m_lunatic_memory{memory} + , m_cycle_counter{cycle_counter} { + lunatic::CPU::Descriptor::Model lunatic_cpu_model; + std::array lunatic_cop_array{}; + + switch(model) { + case Model::ARM7: lunatic_cpu_model = lunatic::CPU::Descriptor::Model::ARM7; break; + case Model::ARM9: lunatic_cpu_model = lunatic::CPU::Descriptor::Model::ARM9; break; + default: ATOM_PANIC("unimplemented CPU model"); + } + + for(auto& attach_cp_n : coprocessor_table) { + dual::arm::Coprocessor& coprocessor = *attach_cp_n.coprocessor; + m_lunatic_coprocessors.emplace_back(coprocessor); + lunatic_cop_array.at(attach_cp_n.id) = &m_lunatic_coprocessors.back(); + coprocessor.SetCPU(this); + } + + m_lunatic_cpu = lunatic::CreateCPU({ + .memory = m_lunatic_memory, + .coprocessors = lunatic_cop_array, + .model = lunatic_cpu_model + }); + } + + void Reset() override { + m_lunatic_cpu->Reset(); + } + + u32 GetExceptionBase() const override { + return m_lunatic_cpu->GetExceptionBase(); + }; + + void SetExceptionBase(u32 address) override { + m_lunatic_cpu->SetExceptionBase(address); + } + + void InvalidateICache() override { + m_lunatic_cpu->ClearICache(); + } + + void InvalidateICacheRange(u32 address_lo, u32 address_hi) override { + m_lunatic_cpu->ClearICacheRange(address_lo, address_hi); + } + + void SetUnalignedDataAccessEnable(bool enable) override { + ATOM_PANIC("unimplemented"); + } + + bool GetWaitingForIRQ() const override { + return m_lunatic_cpu->WaitForIRQ(); + } + + void SetWaitingForIRQ(bool value) override { + m_lunatic_cpu->WaitForIRQ() = value; + } + + bool GetIRQFlag() const override { + return m_lunatic_cpu->IRQLine(); + } + + void SetIRQFlag(bool value) override { + m_lunatic_cpu->IRQLine() = value; + } + + u32 GetGPR(GPR reg) const override { + return m_lunatic_cpu->GetGPR(static_cast(reg)); + } + + u32 GetGPR(GPR reg, Mode mode) const override { + return m_lunatic_cpu->GetGPR(static_cast(reg), static_cast(mode)); + } + + PSR GetCPSR() const override { + return m_lunatic_cpu->GetCPSR().v; + } + + PSR GetSPSR(Mode mode) const override { + return m_lunatic_cpu->GetSPSR(static_cast(mode)).v; + } + + void SetGPR(GPR reg, u32 value) override { + m_lunatic_cpu->SetGPR(static_cast(reg), value); + } + + void SetGPR(GPR reg, Mode mode, u32 value) override { + m_lunatic_cpu->SetGPR(static_cast(reg), static_cast(mode), value); + } + + void SetCPSR(PSR value) override { + m_lunatic_cpu->SetCPSR({.v = value.word}); + } + + void SetSPSR(Mode mode, PSR value) override { + m_lunatic_cpu->SetSPSR(static_cast(mode), {.v = value.word}); + } + + void Run(int cycles) override { + // @todo: better integration of the cycle counter with the JIT? + m_lunatic_cpu->Run(cycles); + m_cycle_counter.AddDeviceCycles(cycles); + } + + private: + struct Memory final : lunatic::Memory { + explicit Memory(dual::arm::Memory& memory_impl) : m_memory_impl{memory_impl} {} + + u8 ReadByte(u32 address, Bus bus) override { + return m_memory_impl.ReadByte(address, static_cast(bus)); + } + + u16 ReadHalf(u32 address, Bus bus) override { + return m_memory_impl.ReadHalf(address, static_cast(bus)); + } + + u32 ReadWord(u32 address, Bus bus) override { + return m_memory_impl.ReadWord(address, static_cast(bus)); + } + + void WriteByte(u32 address, u8 value, Bus bus) override { + m_memory_impl.WriteByte(address, value, static_cast(bus)); + } + + void WriteHalf(u32 address, u16 value, Bus bus) override { + m_memory_impl.WriteHalf(address, value, static_cast(bus)); + } + + void WriteWord(u32 address, u32 value, Bus bus) override { + m_memory_impl.WriteWord(address, value, static_cast(bus)); + } + + dual::arm::Memory& m_memory_impl; + }; + + struct Coprocessor final : lunatic::Coprocessor { + explicit Coprocessor(dual::arm::Coprocessor& coprocessor_impl) : m_coprocessor_impl{coprocessor_impl} {} + + bool ShouldWriteBreakBasicBlock(int opc1, int cn, int cm, int opc2) override { + // @todo: evaluate if narrowing this down would have any real world benefits. + return true; + } + + u32 Read(int opc1, int cn, int cm, int opc2) override { + return m_coprocessor_impl.MRC(opc1, cn, cm, opc2); + } + + void Write(int opc1, int cn, int cm, int opc2, u32 value) override { + m_coprocessor_impl.MCR(opc1, cn, cm, opc2, value); + } + + dual::arm::Coprocessor& m_coprocessor_impl; + }; + + std::unique_ptr m_lunatic_cpu{}; + Memory m_lunatic_memory; + std::vector m_lunatic_coprocessors{}; + CycleCounter& m_cycle_counter; + }; + +} // namespace dual::arm diff --git a/src/dual/src/arm/tablegen/gen_arm.hpp b/src/dual/src/arm/tablegen/gen_arm.hpp deleted file mode 100644 index be3b356..0000000 --- a/src/dual/src/arm/tablegen/gen_arm.hpp +++ /dev/null @@ -1,106 +0,0 @@ - -enum class MultiplyOpcode { - MUL = 0b000, - MLA = 0b001, - UMULL = 0b100, - UMLAL = 0b101, - SMULL = 0b110, - SMLAL = 0b111 -}; - -enum class SignedMultiplyOpcode { - SMLAxy = 0b1000, - SM__Wy = 0b1001, - SMLALxy = 0b1010, - SMULxy = 0b1011 -}; - -template -static constexpr auto GenerateHandlerARM() -> Handler32 { - const bool pre = instruction & (1 << 24); - const bool add = instruction & (1 << 23); - const bool wb = instruction & (1 << 21); - const bool load = instruction & (1 << 20); - - switch(GetARMInstructionType(instruction)) { - case ARMInstrType::HalfwordSignedTransfer: { - const bool immediate = instruction & (1 << 22); - const auto opcode = (instruction >> 5) & 3; - - return &ARM::ARM_HalfDoubleAndSignedTransfer; - } - case ARMInstrType::Multiply: { - const bool set_flags = instruction & (1 << 20); - - switch(static_cast((instruction >> 21) & 0xF)) { - case MultiplyOpcode::MUL: return &ARM::ARM_Multiply; - case MultiplyOpcode::MLA: return &ARM::ARM_Multiply; - case MultiplyOpcode::UMULL: return &ARM::ARM_MultiplyLong; - case MultiplyOpcode::UMLAL: return &ARM::ARM_MultiplyLong; - case MultiplyOpcode::SMULL: return &ARM::ARM_MultiplyLong; - case MultiplyOpcode::SMLAL: return &ARM::ARM_MultiplyLong; - } - - break; - } - case ARMInstrType::SingleDataSwap: { - const bool byte = instruction & (1 << 22); - - return &ARM::ARM_SingleDataSwap; - } - case ARMInstrType::StatusTransfer: { - const bool immediate = instruction & (1 << 25); - const bool use_spsr = instruction & (1 << 22); - const bool to_status = instruction & (1 << 21); - - return &ARM::ARM_StatusTransfer; - } - case ARMInstrType::BranchAndExchange: return &ARM::ARM_BranchAndExchangeMaybeLink; - case ARMInstrType::CountLeadingZeros: return &ARM::ARM_CountLeadingZeros; - case ARMInstrType::BranchLinkExchange: return &ARM::ARM_BranchAndExchangeMaybeLink; - case ARMInstrType::SaturatingAddSubtract: { - const int opcode = (instruction >> 20) & 0xF; - - return &ARM::ARM_SaturatingAddSubtract; - } - case ARMInstrType::SignedHalfwordMultiply: { - const bool x = instruction & (1 << 5); - const bool y = instruction & (1 << 6); - - switch(static_cast((instruction >> 21) & 0xF)) { - case SignedMultiplyOpcode::SMLAxy: return &ARM::ARM_SignedHalfwordMultiply; - case SignedMultiplyOpcode::SM__Wy: return &ARM::ARM_SignedWordHalfwordMultiply; - case SignedMultiplyOpcode::SMLALxy: return &ARM::ARM_SignedHalfwordMultiplyLongAccumulate; - case SignedMultiplyOpcode::SMULxy: return &ARM::ARM_SignedHalfwordMultiply; - } - - break; - } - case ARMInstrType::DataProcessing: { - const bool immediate = instruction & (1 << 25); - const bool set_flags = instruction & (1 << 20); - const auto opcode = static_cast((instruction >> 21) & 0xF); - const auto field4 = (instruction >> 4) & 0xF; - - return &ARM::ARM_DataProcessing; - } - case ARMInstrType::SingleDataTransfer: { - const bool immediate = ~instruction & (1 << 25); - const bool byte = instruction & (1 << 22); - - return &ARM::ARM_SingleDataTransfer; - } - case ARMInstrType::BlockDataTransfer: { - const bool user_mode = instruction & (1 << 22); - - return &ARM::ARM_BlockDataTransfer; - } - case ARMInstrType::BranchAndLink: return &ARM::ARM_BranchAndLink<(instruction >> 24) & 1>; - case ARMInstrType::CoprocessorRegisterXfer: return &ARM::ARM_CoprocessorRegisterTransfer; - case ARMInstrType::SoftwareInterrupt: return &ARM::ARM_SWI; - case ARMInstrType::BranchLinkExchangeImm: return &ARM::ARM_BranchLinkExchangeImm; - default: break; - } - - return &ARM::ARM_Undefined; -} diff --git a/src/dual/src/nds/arm9/cp15.cpp b/src/dual/src/nds/arm9/cp15.cpp index a05e367..8c5f398 100644 --- a/src/dual/src/nds/arm9/cp15.cpp +++ b/src/dual/src/nds/arm9/cp15.cpp @@ -7,7 +7,7 @@ namespace dual::nds::arm9 { - CP15::CP15(arm::CPU* cpu, MemoryBus* bus) : m_cpu{cpu}, m_bus{bus} { + CP15::CP15(MemoryBus* bus) : m_bus{bus} { } void CP15::Reset() { @@ -15,6 +15,10 @@ namespace dual::nds::arm9 { DirectBoot(); } + void CP15::SetCPU(arm::CPU* cpu) { + m_cpu = cpu; + } + void CP15::DirectBoot() { // Reset control register (enable DTCM and ITCM, exception base = 0xFFFF0000) MCR(0, 1, 0, 0, 0x0005707D); @@ -77,6 +81,16 @@ namespace dual::nds::arm9 { m_cpu->SetWaitingForIRQ(true); break; } + case ID(0, 7, 5, 0): { // Invalidate ICache + m_cpu->InvalidateICache(); + break; + } + case ID(0, 7, 5, 1): { // Invalidate ICache Line + const u32 address_lo = value & ~0x1Fu; + const u32 address_hi = address_lo + 0x1Fu; + m_cpu->InvalidateICacheRange(address_lo, address_hi); + break; + } case ID(0, 9, 1, 0): { // DTCM region register const int size = static_cast((value >> 1) & 0x1Fu); diff --git a/src/dual/src/nds/nds.cpp b/src/dual/src/nds/nds.cpp index 3c55568..c8274bf 100644 --- a/src/dual/src/nds/nds.cpp +++ b/src/dual/src/nds/nds.cpp @@ -6,32 +6,24 @@ #include #include -#include "arm/arm.hpp" +#include "arm/interpreter/interpreter_cpu.hpp" +#ifdef DUAL_ENABLE_JIT + #include "arm/jit/lunatic_cpu.hpp" +#endif namespace dual::nds { NDS::NDS() { - m_arm9.cpu = std::make_unique( - m_arm9.bus, - m_scheduler, - m_arm9.cycle_counter, - arm::CPU::Model::ARM9 - ); - m_arm9.cp15 = std::make_unique(m_arm9.cpu.get(), &m_arm9.bus); - m_arm9.cpu->SetCoprocessor(15, m_arm9.cp15.get()); - - m_arm7.cpu = std::make_unique( - m_arm7.bus, - m_scheduler, - m_arm7.cycle_counter, - arm::CPU::Model::ARM7 - ); + m_arm9.cp15 = std::make_unique(&m_arm9.bus); + } - m_arm9.irq.SetCPU(m_arm9.cpu.get()); - m_arm7.irq.SetCPU(m_arm7.cpu.get()); + void NDS::SetCPUExecutionEngine(CPUExecutionEngine cpu_execution_engine) { + m_cpu_execution_engine = cpu_execution_engine; } void NDS::Reset() { + CreateCPUCores(); + m_scheduler.Reset(); m_video_unit.Reset(); @@ -77,6 +69,29 @@ namespace dual::nds { m_step_target = 0u; } + void NDS::CreateCPUCores() { + const arm::AttachCPn attach_cp15{.id = 15, .coprocessor = m_arm9.cp15.get()}; + + switch(m_cpu_execution_engine) { + case CPUExecutionEngine::Interpreter: { + m_arm9.cpu = std::make_unique(m_arm9.bus, m_scheduler, m_arm9.cycle_counter, arm::CPU::Model::ARM9, std::span{{attach_cp15}}); + m_arm7.cpu = std::make_unique(m_arm7.bus, m_scheduler, m_arm7.cycle_counter, arm::CPU::Model::ARM7); + break; + } +#ifdef DUAL_ENABLE_JIT + case CPUExecutionEngine::JIT: { + m_arm9.cpu = std::make_unique(m_arm9.bus, m_arm9.cycle_counter, arm::CPU::Model::ARM9, std::span{{attach_cp15}}); + m_arm7.cpu = std::make_unique(m_arm7.bus, m_arm7.cycle_counter, arm::CPU::Model::ARM7); + break; + } +#endif + default: ATOM_PANIC("unknown CPU emulator"); + } + + m_arm9.irq.SetCPU(m_arm9.cpu.get()); + m_arm7.irq.SetCPU(m_arm7.cpu.get()); + } + void NDS::Step(int cycles_to_run) { const u64 step_target = m_step_target + cycles_to_run; diff --git a/src/platform/sdl/src/application.cpp b/src/platform/sdl/src/application.cpp index 9f096ec..410a37b 100644 --- a/src/platform/sdl/src/application.cpp +++ b/src/platform/sdl/src/application.cpp @@ -30,12 +30,16 @@ int Application::Run(int argc, char** argv) { std::string boot9_path = "boot9.bin"; int scale = 0; bool fullscreen = false; + bool enable_jit = false; atom::Arguments args{"irisdual", "A Nintendo DS emulator developed for fun, with performance and multicore CPUs in mind.", {0, 1, 0}}; args.RegisterArgument(boot7_path, true, "boot7", "Path to the ARM7 Boot ROM", "path"); args.RegisterArgument(boot9_path, true, "boot9", "Path to the ARM9 Boot ROM", "path"); args.RegisterArgument(scale, true, "scale", "Screen scale factor"); args.RegisterArgument(fullscreen, true, "fullscreen", "Whether to run in fullscreen or windowed mode"); +#ifdef DUAL_ENABLE_JIT + args.RegisterArgument(enable_jit, true, "jit", "Use dynamic recompilation"); +#endif args.RegisterFile("nds_file", false); if(!args.Parse(argc, argv, &files)) { @@ -43,6 +47,12 @@ int Application::Run(int argc, char** argv) { } CreateWindow(scale, fullscreen); +#ifdef DUAL_ENABLE_JIT + // CPU engine must be configured before resetting the emulator + if(enable_jit) { + m_nds->SetCPUExecutionEngine(dual::nds::CPUExecutionEngine::JIT); + } +#endif // ARM7 boot ROM must be loaded before the ROM when firmware booting. LoadBootROM(boot7_path.c_str(), false); LoadBootROM(boot9_path.c_str(), true);