Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

R5900: Improve the EE cache performance #12108

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 88 additions & 56 deletions pcsx2/COP0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,116 +230,142 @@ void MapTLB(const tlbs& t, int i)
u32 saddr, eaddr;

COP0_LOG("MAP TLB %d: 0x%08X-> [0x%08X 0x%08X] S=%d G=%d ASID=%d Mask=0x%03X EntryLo0 PFN=%x EntryLo0 Cache=%x EntryLo1 PFN=%x EntryLo1 Cache=%x VPN2=%x",
i, t.VPN2, t.PFN0, t.PFN1, t.S >> 31, t.G, t.ASID,
t.Mask, t.EntryLo0 >> 6, (t.EntryLo0 & 0x38) >> 3, t.EntryLo1 >> 6, (t.EntryLo1 & 0x38) >> 3, t.VPN2);
i, t.VPN2(), t.PFN0(), t.PFN1(), t.isSPR() >> 31, t.isGlobal(), t.EntryHi.ASID,
t.Mask(), t.EntryLo0.PFN, t.EntryLo0.C, t.EntryLo1.PFN, t.EntryLo1.C, t.VPN2());

// According to the manual
// 'It [SPR] must be mapped into a contiguous 16 KB of virtual address space that is
// aligned on a 16KB boundary.Results are not guaranteed if this restriction is not followed.'
// Assume that the game isn't doing anything less-than-ideal with the scratchpad mapping and map it directly to eeMem->Scratch.
if (t.S)
if (t.isSPR())
{
if (t.VPN2 != 0x70000000)
Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2);
if (t.VPN2() != 0x70000000)
Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2());

vtlb_VMapBuffer(t.VPN2, eeMem->Scratch, Ps2MemSize::Scratch);
vtlb_VMapBuffer(t.VPN2(), eeMem->Scratch, Ps2MemSize::Scratch);
}
else
{
if (t.EntryLo0 & 0x2)
if (t.EntryLo0.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = t.VPN2 >> 12;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = t.VPN2() >> 12;
eaddr = saddr + t.Mask() + 1;

for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memSetPageAddr(addr << 12, t.PFN0 + ((addr - saddr) << 12));
memSetPageAddr(addr << 12, t.PFN0() + ((addr - saddr) << 12));
Cpu->Clear(addr << 12, 0x400);
}
}
}

if (t.EntryLo1 & 0x2)
if (t.EntryLo1.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = (t.VPN2 >> 12) + t.Mask + 1;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = (t.VPN2() >> 12) + t.Mask() + 1;
eaddr = saddr + t.Mask() + 1;

for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memSetPageAddr(addr << 12, t.PFN1 + ((addr - saddr) << 12));
memSetPageAddr(addr << 12, t.PFN1() + ((addr - saddr) << 12));
Cpu->Clear(addr << 12, 0x400);
}
}
}
}
}

__inline int ConvertPageMask(const u32 PageMask)
{
const u32 mask = std::popcount(PageMask >> 13);

pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here.");

return (1 << (12 + mask)) - 1;
}

void UnmapTLB(const tlbs& t, int i)
{
//Console.WriteLn("Clear TLB %d: %08x-> [%08x %08x] S=%d G=%d ASID=%d Mask= %03X", i,t.VPN2,t.PFN0,t.PFN1,t.S,t.G,t.ASID,t.Mask);
u32 mask, addr;
u32 saddr, eaddr;

if (t.S)
if (t.isSPR())
{
vtlb_VMapUnmap(t.VPN2, 0x4000);
vtlb_VMapUnmap(t.VPN2(), 0x4000);
return;
}

if (t.EntryLo0 & 0x2)
if (t.EntryLo0.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = t.VPN2 >> 12;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = t.VPN2() >> 12;
eaddr = saddr + t.Mask() + 1;
// Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1);
for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memClearPageAddr(addr << 12);
Cpu->Clear(addr << 12, 0x400);
}
}
}

if (t.EntryLo1 & 0x2)
if (t.EntryLo1.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = (t.VPN2 >> 12) + t.Mask + 1;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = (t.VPN2() >> 12) + t.Mask() + 1;
eaddr = saddr + t.Mask() + 1;
// Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1);
for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memClearPageAddr(addr << 12);
Cpu->Clear(addr << 12, 0x400);
}
}
}

for (size_t i = 0; i < cachedTlbs.count; i++)
{
if (cachedTlbs.PFN0s[i] == t.PFN0() && cachedTlbs.PFN1s[i] == t.PFN1() && cachedTlbs.PageMasks[i] == ConvertPageMask(t.PageMask.UL))
{
cachedTlbs.PFN0s.erase(cachedTlbs.PFN0s.begin() + i);
cachedTlbs.PFN1s.erase(cachedTlbs.PFN1s.begin() + i);
cachedTlbs.PageMasks.erase(cachedTlbs.PageMasks.begin() + i);
cachedTlbs.CacheEnabled0.erase(cachedTlbs.CacheEnabled0.begin() + i);
cachedTlbs.CacheEnabled1.erase(cachedTlbs.CacheEnabled1.begin() + i);
cachedTlbs.count--;
break;
}
}
}

void WriteTLB(int i)
{
tlb[i].PageMask = cpuRegs.CP0.n.PageMask;
tlb[i].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[i].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[i].EntryLo1 = cpuRegs.CP0.n.EntryLo1;

tlb[i].Mask = (cpuRegs.CP0.n.PageMask >> 13) & 0xfff;
tlb[i].nMask = (~tlb[i].Mask) & 0xfff;
tlb[i].VPN2 = ((cpuRegs.CP0.n.EntryHi >> 13) & (~tlb[i].Mask)) << 13;
tlb[i].ASID = cpuRegs.CP0.n.EntryHi & 0xfff;
tlb[i].G = cpuRegs.CP0.n.EntryLo0 & cpuRegs.CP0.n.EntryLo1 & 0x1;
tlb[i].PFN0 = (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12;
tlb[i].PFN1 = (((cpuRegs.CP0.n.EntryLo1 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12;
tlb[i].S = cpuRegs.CP0.n.EntryLo0 & 0x80000000;
tlb[i].PageMask.UL = cpuRegs.CP0.n.PageMask;
tlb[i].EntryHi.UL = cpuRegs.CP0.n.EntryHi;
tlb[i].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0;
tlb[i].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1;

if (!tlb[i].isSPR() && ((tlb[i].EntryLo0.V && tlb[i].EntryLo0.isCached()) || (tlb[i].EntryLo1.V && tlb[i].EntryLo1.isCached())))
{
const size_t idx = cachedTlbs.count;
cachedTlbs.CacheEnabled0[idx] = tlb[i].EntryLo0.isCached() ? ~0 : 0;
cachedTlbs.CacheEnabled1[idx] = tlb[i].EntryLo1.isCached() ? ~0 : 0;
cachedTlbs.PFN1s[idx] = tlb[i].PFN1();
cachedTlbs.PFN0s[idx] = tlb[i].PFN0();
cachedTlbs.PageMasks[idx] = ConvertPageMask(tlb[i].PageMask.UL);

cachedTlbs.count++;
}

MapTLB(tlb[i], i);
}
Expand All @@ -357,10 +383,16 @@ namespace COP0 {

int i = cpuRegs.CP0.n.Index & 0x3f;

cpuRegs.CP0.n.PageMask = tlb[i].PageMask;
cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi & ~(tlb[i].PageMask | 0x1f00);
cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi >> 12) & 1);
cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi >> 12) & 1);
cpuRegs.CP0.n.PageMask = tlb[i].PageMask.UL;
cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi.UL & ~(tlb[i].PageMask.UL | 0x1f00);
/*
* TEST THIS??
cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1);
cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1);
*/
cpuRegs.CP0.n.EntryLo0 = tlb[i].EntryLo0.UL;
cpuRegs.CP0.n.EntryLo1 = tlb[i].EntryLo1.UL;

}

void TLBWI()
Expand All @@ -374,10 +406,10 @@ namespace COP0 {
cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1);

UnmapTLB(tlb[j], j);
tlb[j].PageMask = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1;
tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1;
WriteTLB(j);
}

Expand All @@ -394,10 +426,10 @@ namespace COP0 {
//if (j > 48) return;

UnmapTLB(tlb[j], j);
tlb[j].PageMask = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1;
tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1;
WriteTLB(j);
}

Expand All @@ -422,7 +454,7 @@ namespace COP0 {
cpuRegs.CP0.n.Index = 0xFFFFFFFF;
for (i = 0; i < 48; i++)
{
if (tlb[i].VPN2 == ((~tlb[i].Mask) & (EntryHi32.s.VPN2)) && ((tlb[i].G & 1) || ((tlb[i].ASID & 0xff) == EntryHi32.s.ASID)))
if (tlb[i].VPN2() == ((~tlb[i].Mask()) & (EntryHi32.s.VPN2)) && ((tlb[i].isGlobal()) || ((tlb[i].EntryHi.ASID & 0xff) == EntryHi32.s.ASID)))
{
cpuRegs.CP0.n.Index = i;
break;
Expand Down
3 changes: 3 additions & 0 deletions pcsx2/R5900.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ u32 EEoCycle;

alignas(16) cpuRegistersPack _cpuRegistersPack;
alignas(16) tlbs tlb[48];
cachedTlbs_t cachedTlbs;

R5900cpu *Cpu = NULL;

static constexpr uint eeWaitCycles = 3072;
Expand All @@ -59,6 +61,7 @@ void cpuReset()
std::memset(&cpuRegs, 0, sizeof(cpuRegs));
std::memset(&fpuRegs, 0, sizeof(fpuRegs));
std::memset(&tlb, 0, sizeof(tlb));
cachedTlbs.reset();

cpuRegs.pc = 0xbfc00000; //set pc reg to stack
cpuRegs.CP0.n.Config = 0x440;
Expand Down
97 changes: 88 additions & 9 deletions pcsx2/R5900.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,69 @@ struct fpuRegisters {
u32 ACCflag; // an internal accumulator overflow flag
};

union PageMask_t
{
struct
{
u32 : 13;
u32 Mask : 12;
u32 : 7;
};
u32 UL;

constexpr u32 nMask() const { return ~Mask & 0xfff; };
};

union EntryHi_t
{
struct
{
u32 ASID:8;
u32 : 5;
u32 VPN2:19;
};
u32 UL;
};

union EntryLo_t
{
struct
{
u32 G:1;
u32 V:1;
u32 D:1;
u32 C:3;
u32 PFN:20;
u32 : 5;
u32 S : 1; // Only used in EntryLo0
};
u32 UL;

constexpr bool isCached() const { return C == 0x3; }
};

struct tlbs
{
u32 PageMask,EntryHi;
u32 EntryLo0,EntryLo1;
u32 Mask, nMask;
u32 G;
u32 ASID;
u32 VPN2;
u32 PFN0;
u32 PFN1;
u32 S;
PageMask_t PageMask;
EntryHi_t EntryHi;
EntryLo_t EntryLo0;
EntryLo_t EntryLo1;

// (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask())) << 12;
constexpr u32 PFN0() const { return (EntryLo0.PFN & ~Mask()) << 12; }
constexpr u32 PFN1() const { return (EntryLo1.PFN & ~Mask()) << 12; }
constexpr u32 VPN2() const {return ((EntryHi.VPN2) & (~Mask())) << 13; }
constexpr u32 Mask() const { return PageMask.Mask; }
constexpr bool isGlobal() const { return EntryLo0.G && EntryLo1.G; }
constexpr bool isSPR() const { return EntryLo0.S; }

constexpr bool operator==(const tlbs& other) const
{
return PageMask.UL == other.PageMask.UL &&
EntryHi.UL == other.EntryHi.UL &&
EntryLo0.UL == other.EntryLo0.UL &&
EntryLo1.UL == other.EntryLo1.UL;
}
};

#ifndef _PC_
Expand Down Expand Up @@ -211,6 +263,33 @@ struct cpuRegistersPack
alignas(16) extern cpuRegistersPack _cpuRegistersPack;
alignas(16) extern tlbs tlb[48];

struct cachedTlbs_t
{
u32 count;
std::vector<u32> PageMasks;
std::vector<u32> PFN1s;
std::vector<u32> CacheEnabled1;
std::vector<u32> PFN0s;
std::vector<u32> CacheEnabled0;

inline void reset()
{
count = 0;
PageMasks.clear();
PageMasks.resize(48);
PFN1s.clear();
PFN1s.resize(48);
PFN0s.clear();
PFN0s.resize(48);
CacheEnabled1.clear();
CacheEnabled1.resize(48);
CacheEnabled0.clear();
CacheEnabled0.resize(48);
}
};

extern cachedTlbs_t cachedTlbs;

static cpuRegisters& cpuRegs = _cpuRegistersPack.cpuRegs;
static fpuRegisters& fpuRegs = _cpuRegistersPack.fpuRegs;

Expand Down
Loading
Loading