-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Add an emitter peephole for post-indexed addressing #105181
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5844,6 +5844,12 @@ void emitter::emitIns_R_R_I(instruction ins, | |
return; | ||
} | ||
|
||
if ((reg1 == reg2) && (EA_SIZE(attr) == EA_PTRSIZE) && emitComp->opts.OptimizationEnabled() && | ||
OptimizePostIndexed(ins, reg1, imm, attr)) | ||
{ | ||
return; | ||
} | ||
|
||
reg1 = encodingSPtoZR(reg1); | ||
reg2 = encodingSPtoZR(reg2); | ||
} | ||
|
@@ -11070,6 +11076,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) | |
code |= ((code_t)imm << 12); // iiiiiiiii | ||
code |= insEncodeReg_Rn(id->idReg2()); // nnnnn | ||
dst += emitOutput_Instr(dst, code); | ||
|
||
// With pre or post-indexing we may have a second GC register to | ||
// update. | ||
if (insOptsIndexed(id->idInsOpt()) && !id->idIsSmallDsc()) | ||
{ | ||
if (emitInsIsLoad(ins)) | ||
{ | ||
// Load will write the destination (reg1). | ||
if (id->idGCref() != GCT_NONE) | ||
{ | ||
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); | ||
} | ||
else | ||
{ | ||
emitGCregDeadUpd(id->idReg1(), dst); | ||
} | ||
} | ||
|
||
// We will always write reg2. | ||
if (id->idGCrefReg2() != GCT_NONE) | ||
{ | ||
emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); | ||
} | ||
else | ||
{ | ||
emitGCregDeadUpd(id->idReg2(), dst); | ||
} | ||
|
||
goto SKIP_GC_UPDATE; | ||
} | ||
|
||
break; | ||
|
||
case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn | ||
|
@@ -17150,6 +17187,127 @@ bool emitter::IsOptimizableLdrToMov( | |
return true; | ||
} | ||
|
||
//----------------------------------------------------------------------------------- | ||
// OptimizePostIndexed: Optimize an addition/subtraction from a register by | ||
// replacing the previous instruction with a post-indexed addressing form if | ||
// possible. | ||
// | ||
// Arguments: | ||
// ins - Whether this is an add or subtraction | ||
// reg - The register that is being updated | ||
// imm - Immediate that is being added/subtracted | ||
// | ||
// Returns: | ||
// True if the previous instruction was optimized to perform the add/sub. | ||
// | ||
bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr) | ||
{ | ||
assert((ins == INS_add) || (ins == INS_sub)); | ||
|
||
if (!emitCanPeepholeLastIns() || !emitInsIsLoadOrStore(emitLastIns->idIns())) | ||
{ | ||
return false; | ||
} | ||
|
||
if ((emitLastIns->idInsFmt() != IF_LS_2A) || emitLastIns->idIsTlsGD()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any load/store instructions this doesn't cover for the initial work being done? There's a lot of different instructions that support post-indexing, but not sure if all of them are There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this doesn't support
I don't think there are any other instructions that load or stores that support the post-indexed writeback form, but I could be wrong. |
||
{ | ||
return false; | ||
} | ||
|
||
// Cannot allow post indexing if the load itself is already modifying the | ||
// register. | ||
regNumber loadStoreDataReg = emitLastIns->idReg1(); | ||
if (loadStoreDataReg == reg) | ||
{ | ||
return false; | ||
} | ||
|
||
// We must be updating the same register that the addressing is happening | ||
// on. The SP register is stored as ZR, so make sure to normalize that too. | ||
regNumber loadStoreAddrReg = encodingZRtoSP(emitLastIns->idReg2()); | ||
if (loadStoreAddrReg != reg) | ||
{ | ||
return false; | ||
} | ||
|
||
// Only some stores/loads are eligible | ||
switch (emitLastIns->idIns()) | ||
{ | ||
case INS_ldrb: | ||
case INS_strb: | ||
case INS_ldurb: | ||
case INS_sturb: | ||
case INS_ldrh: | ||
case INS_strh: | ||
case INS_ldurh: | ||
case INS_sturh: | ||
case INS_ldrsb: | ||
case INS_ldursb: | ||
case INS_ldrsh: | ||
case INS_ldursh: | ||
case INS_ldrsw: | ||
case INS_ldursw: | ||
case INS_ldr: | ||
case INS_str: | ||
case INS_ldur: | ||
case INS_stur: | ||
break; | ||
|
||
default: | ||
return false; | ||
} | ||
|
||
if (ins == INS_sub) | ||
{ | ||
imm = -imm; | ||
} | ||
|
||
// Only some post-indexing offsets can be represented. | ||
if ((imm < -256) || (imm >= 256)) | ||
{ | ||
return false; | ||
} | ||
|
||
instruction newIns = emitLastIns->idIns(); | ||
emitAttr newAttr; | ||
|
||
switch (emitLastIns->idGCref()) | ||
{ | ||
case GCT_BYREF: | ||
newAttr = EA_BYREF; | ||
break; | ||
case GCT_GCREF: | ||
newAttr = EA_GCREF; | ||
break; | ||
default: | ||
newAttr = emitLastIns->idOpSize(); | ||
break; | ||
} | ||
|
||
emitRemoveLastInstruction(); | ||
|
||
instrDesc* id = emitNewInstrCns(newAttr, imm); | ||
id->idIns(newIns); | ||
id->idInsFmt(IF_LS_2C); | ||
id->idInsOpt(INS_OPTS_POST_INDEX); | ||
|
||
id->idReg1(loadStoreDataReg); | ||
id->idReg2(encodingSPtoZR(loadStoreAddrReg)); | ||
|
||
if (EA_IS_BYREF(regAttr)) | ||
{ | ||
id->idGCrefReg2(GCT_BYREF); | ||
} | ||
else if (EA_IS_GCREF(regAttr)) | ||
{ | ||
id->idGCrefReg2(GCT_GCREF); | ||
} | ||
|
||
dispIns(id); | ||
appendToCurIG(id); | ||
return true; | ||
} | ||
|
||
#if defined(FEATURE_SIMD) | ||
//----------------------------------------------------------------------------------- | ||
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't necessarily have to be
EA_PTRSIZE
, right? Rather, we just need the offset to be a multiple of8
in range (looks to be in the range of-4096
to4032
, inclusive)?-- Not something I think that needs to be handled in this PR, but rather that might be a possible future improvement on top.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like it depends on the instruction. Some are multiples of 4/8/16 and some might be raw offsets. The ranges vary based on instruction too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It has to be
EA_PTRSIZE
-- the register that is written is the address that was used for the load, so it is always pointer sized.The offset is an unscaled 9-bit signed immediate for the (single register) loads that support the write-back. So -256 to 255 is supported for the combined form.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍
Yeah, I was looking at the wrong instruction here. There's a few different
post-indexing
forms. Forldp
, as an example, it's:There's then
ldapr
which is fixed4
or8
,ldiapp
which is fixed8
or16
, andldpsw
which has the... a multiple of 4 in the range -256 to 252 ...
textSeems we're not handling these ones in this PR, which is fine, just had a mental disconnect due to the differences between them 😄
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, some of those forms we could definitely handle in the future. One complication is that some of those forms allow redefining the GC ness of up to 3 registers which
instrDesc
does not support today, so we would need to expand it in some way.