From 4d3dfe96d417f34b84630b6d012a368b61bbc21f Mon Sep 17 00:00:00 2001 From: Dmitry Borisenkov Date: Thu, 26 Nov 2020 16:49:14 +0200 Subject: [PATCH] Inline stslice if possible If stslice store a slice which is made of a cell that has one use and was entiraly made of stores via a builder, the pass replaces the slice with correspondin strore instructions. The pass relies on DCE to cleanup unused values. --- llvm/lib/Target/TVM/CMakeLists.txt | 1 + llvm/lib/Target/TVM/TVM.h | 2 + llvm/lib/Target/TVM/TVMInlineSliceStore.cpp | 167 ++++++++++++++++++ llvm/lib/Target/TVM/TVMMoveMaterializable.cpp | 1 - llvm/lib/Target/TVM/TVMStoreCombine.cpp | 44 ++++- llvm/lib/Target/TVM/TVMTargetMachine.cpp | 3 + llvm/lib/Target/TVM/TVMUtilities.cpp | 9 + llvm/lib/Target/TVM/TVMUtilities.h | 4 + .../TVM/{ => optimizations}/store-opt.ll | 2 +- .../TVM/optimizations/stslice_inline.ll | 77 ++++++++ 10 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/Target/TVM/TVMInlineSliceStore.cpp rename llvm/test/CodeGen/TVM/{ => optimizations}/store-opt.ll (97%) create mode 100644 llvm/test/CodeGen/TVM/optimizations/stslice_inline.ll diff --git a/llvm/lib/Target/TVM/CMakeLists.txt b/llvm/lib/Target/TVM/CMakeLists.txt index f8f8a9b79dd5..9cfa7a7ed1cd 100644 --- a/llvm/lib/Target/TVM/CMakeLists.txt +++ b/llvm/lib/Target/TVM/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(TVMCodeGen TVMLoadStoreReplace.cpp TVMMoveMaterializable.cpp TVMIfConversionTerm.cpp + TVMInlineSliceStore.cpp TVMLowerIntrinsics.cpp ) diff --git a/llvm/lib/Target/TVM/TVM.h b/llvm/lib/Target/TVM/TVM.h index 1c563f2b4f34..abd0b47ea5ef 100644 --- a/llvm/lib/Target/TVM/TVM.h +++ b/llvm/lib/Target/TVM/TVM.h @@ -44,6 +44,7 @@ FunctionPass *createTVMIfConversionTerm(); BasicBlockPass *createTVMDefineUndef(); BasicBlockPass *createTVMLoadStoreReplace(); BasicBlockPass *createTVMStoreCombine(); +BasicBlockPass *createTVMInlineSliceStore(); ModulePass *createTVMLowerIntrinsicsPass(); void initializeTVMArgumentMovePass(PassRegistry &); @@ -63,6 +64,7 @@ void initializeTVMLoadStoreReplacePass(PassRegistry &); void initializeTVMMoveMaterializablePass(PassRegistry &); void initializeTVMIfConversionTermPass(PassRegistry &); void initializeTVMStoreCombinePass(PassRegistry &); +void initializeTVMInlineSliceStorePass(PassRegistry &); void initializeTVMLowerIntrinsicsPass(PassRegistry &); } // namespace llvm diff --git a/llvm/lib/Target/TVM/TVMInlineSliceStore.cpp b/llvm/lib/Target/TVM/TVMInlineSliceStore.cpp new file mode 100644 index 000000000000..c5ca0bd8fb8d --- /dev/null +++ b/llvm/lib/Target/TVM/TVMInlineSliceStore.cpp @@ -0,0 +1,167 @@ +//===------------- TVMInlineSliceStore.cpp - Inline slice store -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Replace store slice with its content. +/// +//===----------------------------------------------------------------------===// + +#include + +#include "TVM.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include "TVMUtilities.h" + +using namespace llvm; + +#define DEBUG_TYPE "tvm-inline-slice-store" + +namespace { +class TVMInlineSliceStore final : public BasicBlockPass { + StringRef getPassName() const override { + return "Replace store slice with its content"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnBasicBlock(BasicBlock &BB) override; + +public: + static char ID; + explicit TVMInlineSliceStore() : BasicBlockPass(ID) {} +}; +} // End anonymous namespace + +char TVMInlineSliceStore::ID = 0; +INITIALIZE_PASS(TVMInlineSliceStore, DEBUG_TYPE, "Inline store slice", + false, false) + +BasicBlockPass *llvm::createTVMInlineSliceStore() { return new TVMInlineSliceStore(); } + +static CallSite storeCallSite(Instruction *I) { + auto CS = CallSite(I); + if (!CS) + return CS; + if (CS.getIntrinsicID() == Intrinsic::tvm_stslice) + return CS; + return {}; +} + +/// For a given store \p CS return its builder argument. +static Value* argBuilder(CallSite CS) { + if (!CS) + return nullptr; + unsigned ID = CS.getIntrinsicID(); + if (TVM::isIntStore(ID)) + return CS.getArgument(1); + if (TVM::isVarIntStore(ID)) + return CS.getArgument(0); + return nullptr; +} + +/// Return stores to a slice in reverse order +static std::vector collectStores(Value *Slice) { + assert(Slice); + std::vector Values; + if (!Slice->hasOneUse()) + return {}; + auto CS = CallSite(Slice); + if (!CS || CS.getIntrinsicID() != Intrinsic::tvm_ctos) + return {}; + Value *Cell = CS.getArgument(0); + if (!Cell->hasOneUse()) + return {}; + CS = CallSite(Cell); + if (!CS || CS.getIntrinsicID() != Intrinsic::tvm_endc) + return {}; + Value *Builder = CS.getArgument(0); + CS = CallSite(Builder); + while (Builder) { + if (!Builder->hasOneUse()) + break; + auto *NextBuilder = argBuilder(CS); + if (!NextBuilder) + break; + Values.push_back(Builder); + Builder = NextBuilder; + CS = CallSite(Builder); + } + if (!CS || (CS.getIntrinsicID() != Intrinsic::tvm_newc)) + return {}; + return Values; +} + +/// Provide arguments for \p Store replacement. +/// The argument are the same as for original Store but builder is substituted +/// with \p Builder. +static std::vector fillArguments(Value *Store, Value *Builder) { + auto CS = CallSite(Store); + assert(CS); + unsigned ID = CS.getIntrinsicID(); + if (TVM::isIntStore(ID)) + return {CS.getArgument(0), Builder, CS.getArgument(2)}; + if (TVM::isVarIntStore(ID)) + return {Builder, CS.getArgument(1)}; + llvm_unreachable("Unexpected Store"); + return {}; +} + +/// Inline \p Stores and return the address of the last builder +/// to replace the original one. +static Value *inlineStores(Instruction *I, + const std::vector &Stores) { + assert(!Stores.empty()); + IRBuilder<> Builder(I); + auto CS = CallSite(I); + Value *Prev = CS.getArgument(1); + for (Value *Store : make_range(Stores.rbegin(), Stores.rend())) { + CS = CallSite(Store); + assert(CS); + std::vector Args = fillArguments(Store, Prev); + auto *Fn = Intrinsic::getDeclaration(I->getModule(), CS.getIntrinsicID()); + Prev = Builder.CreateCall(Fn, Args); + } + return Prev; +} + +bool TVMInlineSliceStore::runOnBasicBlock(BasicBlock &BB) { + std::vector RemoveInst; + auto It = BB.begin(), End = BB.end(); + while (It != End) { + It = std::find_if(It, End, [](Instruction &I) { + return static_cast(I.hasOneUse() && storeCallSite(&I)); + }); + if (It != End) { + auto CS = storeCallSite(&*It); + Value *Slice = CS.getArgument(0); + std::vector Stores = collectStores(Slice); + if (!Stores.empty()) { + Value *LastStore = inlineStores(&*It, Stores); + It->replaceAllUsesWith(LastStore); + RemoveInst.push_back(&*It); + } + } else { + break; + } + ++It; + } + return !RemoveInst.empty(); +} diff --git a/llvm/lib/Target/TVM/TVMMoveMaterializable.cpp b/llvm/lib/Target/TVM/TVMMoveMaterializable.cpp index 2f77eae1a10e..5ed23c855149 100644 --- a/llvm/lib/Target/TVM/TVMMoveMaterializable.cpp +++ b/llvm/lib/Target/TVM/TVMMoveMaterializable.cpp @@ -72,7 +72,6 @@ class TVMMoveMaterializable final : public MachineFunctionPass { bool processInstruction(MachineInstr &MI); bool runOnBasicBlocks(MachineFunction &MF); - TVMFunctionInfo *MFI; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; LiveIntervals *LIS; diff --git a/llvm/lib/Target/TVM/TVMStoreCombine.cpp b/llvm/lib/Target/TVM/TVMStoreCombine.cpp index 774e3567739e..39ed6bde325c 100644 --- a/llvm/lib/Target/TVM/TVMStoreCombine.cpp +++ b/llvm/lib/Target/TVM/TVMStoreCombine.cpp @@ -16,6 +16,8 @@ #include #include "TVM.h" +#include "TVMExtras.h" +#include "TVMUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Constants.h" @@ -110,8 +112,8 @@ static void combine(BasicBlock::iterator Start, BasicBlock::iterator End) { unsigned Sz = cast(CS.getArgument(2))->getZExtValue(); if (Val.slt(0)) { APInt Pow2(257, 0, false); - Pow2.setBit(Sz); - Val = Pow2 - Val; + Pow2.setBit(Sz + 1); + Val = Pow2 - Val.abs(); } Data <<= Sz; Size += Sz; @@ -163,6 +165,44 @@ static BasicBlock::iterator tryCombine(BasicBlock::iterator Start, bool TVMStoreCombine::runOnBasicBlock(BasicBlock &BB) { bool Changed = false; + std::vector removeInst; + for (auto &I : BB) { + auto CS = CallSite(&I); + if (CS && TVM::isVarIntStore(CS.getIntrinsicID())) { + unsigned ID = CS.getIntrinsicID(); + bool isSigned = + ID == Intrinsic::tvm_stvarint16 || ID == Intrinsic::tvm_stvarint32; + bool is16bit = + ID == Intrinsic::tvm_stvarint16 || ID == Intrinsic::tvm_stvaruint16; + auto *Arg = CS.getArgument(1); + auto *CInt = dyn_cast(Arg); + if (!CInt) + continue; + APInt Val = CInt->getValue(); + APInt AbsValue = Val.abs(); + unsigned numBits = + AbsValue.isNullValue() ? 1 : (AbsValue.ceilLogBase2() + 1 + isSigned); + unsigned numBytes = (numBits + 7) / 8; + unsigned numByteEncBits = is16bit ? 4 : 8; + + IRBuilder<> Builder(&I); + auto *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::tvm_stu); + std::vector Args = {Builder.getIntN(257, numBytes), + CS.getArgument(0), + Builder.getIntN(257, numByteEncBits)}; + auto *NewStore = Builder.CreateCall(Fn, Args); + Fn = isSigned + ? Intrinsic::getDeclaration(I.getModule(), Intrinsic::tvm_sti) + : Fn; + Args = {CInt, NewStore, Builder.getIntN(257, numBytes * 8)}; + NewStore = Builder.CreateCall(Fn, Args); + I.replaceAllUsesWith(NewStore); + removeInst.push_back(&I); + } + } + Changed |= !removeInst.empty(); + for (auto *I : removeInst) + I->eraseFromParent(); auto It = BB.begin(), End = BB.end(); do { It = std::find_if(It, End, [](Instruction &I) { diff --git a/llvm/lib/Target/TVM/TVMTargetMachine.cpp b/llvm/lib/Target/TVM/TVMTargetMachine.cpp index 62ad000b9048..993e68628e96 100644 --- a/llvm/lib/Target/TVM/TVMTargetMachine.cpp +++ b/llvm/lib/Target/TVM/TVMTargetMachine.cpp @@ -45,6 +45,7 @@ extern "C" void LLVMInitializeTVMTarget() { initializeTVMLoadStoreReplacePass(PR); initializeTVMMoveMaterializablePass(PR); initializeTVMStoreCombinePass(PR); + initializeTVMInlineSliceStorePass(PR); initializeTVMLowerIntrinsicsPass(PR); } @@ -138,6 +139,7 @@ void TVMPassConfig::addIRPasses() { addPass(createTVMLoopPrepare()); addPass(createTVMControlFlowPrepare()); addPass(createTVMDefineUndef()); + addPass(createTVMInlineSliceStore()); addPass(createTVMStoreCombine()); } @@ -171,6 +173,7 @@ void TVMPassConfig::addPreEmitPass() { addPass(createTVMRegStackify()); addPass(createTVMLoopInstructions()); addPass(createTVMMoveMaterializable()); + addPass(createGlobalDCEPass()); addPass(createTVMStackModel()); // Perform the very last peephole optimizations on the code. diff --git a/llvm/lib/Target/TVM/TVMUtilities.cpp b/llvm/lib/Target/TVM/TVMUtilities.cpp index 18612a1dc7d9..fc1b619b377c 100644 --- a/llvm/lib/Target/TVM/TVMUtilities.cpp +++ b/llvm/lib/Target/TVM/TVMUtilities.cpp @@ -36,6 +36,15 @@ bool TVM::isConstInt(const MachineInstr &MI) { || MI.getOpcode() == TVM::CONST_U257; } +bool TVM::isIntStore(unsigned ID) { + return ID == Intrinsic::tvm_sti || ID == Intrinsic::tvm_stu; +} + +bool TVM::isVarIntStore(unsigned ID) { + return ID == Intrinsic::tvm_stvarint16 || ID == Intrinsic::tvm_stvaruint16 + || ID == Intrinsic::tvm_stvarint32 || ID == Intrinsic::tvm_stvaruint32; +} + // A shortcut overload for BuildMI() function MachineInstrBuilder llvm::BuildMI(MachineInstr *InsertPoint, const MCInstrDesc &InstrDesc) { diff --git a/llvm/lib/Target/TVM/TVMUtilities.h b/llvm/lib/Target/TVM/TVMUtilities.h index 685e5d83dddd..1196bf947b39 100644 --- a/llvm/lib/Target/TVM/TVMUtilities.h +++ b/llvm/lib/Target/TVM/TVMUtilities.h @@ -32,6 +32,10 @@ namespace TVM { bool isArgument(const MachineInstr &MI); bool isArgumentNum(const MachineInstr &MI); bool isConstInt(const MachineInstr &MI); + +bool isIntStore(unsigned ID); +bool isVarIntStore(unsigned ID); + } // end namespace TVM } // end namespace llvm diff --git a/llvm/test/CodeGen/TVM/store-opt.ll b/llvm/test/CodeGen/TVM/optimizations/store-opt.ll similarity index 97% rename from llvm/test/CodeGen/TVM/store-opt.ll rename to llvm/test/CodeGen/TVM/optimizations/store-opt.ll index 6b5a57aa315e..abf9dde38276 100644 --- a/llvm/test/CodeGen/TVM/store-opt.ll +++ b/llvm/test/CodeGen/TVM/optimizations/store-opt.ll @@ -14,7 +14,7 @@ define builder @test1() { %6 = call builder @llvm.tvm.sti(i257 -1, builder %5, i257 1) %7 = call builder @llvm.tvm.stu(i257 3, builder %6, i257 2) %8 = call builder @llvm.tvm.sti(i257 -7, builder %7, i257 3) - ; CHECK: %[[VR3:[0-9]+]] = call builder @llvm.tvm.stu(i257 127, builder %[[VR2]], i257 6) + ; CHECK: %[[VR3:[0-9]+]] = call builder @llvm.tvm.stu(i257 121, builder %[[VR2]], i257 6) %9 = call builder @llvm.tvm.sti(i257 -7, builder %8, i257 251) ; CHECK: %{{[0-9]+}} = call builder @llvm.tvm.sti(i257 -7, builder %[[VR3]], i257 251) ret builder %9 diff --git a/llvm/test/CodeGen/TVM/optimizations/stslice_inline.ll b/llvm/test/CodeGen/TVM/optimizations/stslice_inline.ll new file mode 100644 index 000000000000..ab5a1a179593 --- /dev/null +++ b/llvm/test/CodeGen/TVM/optimizations/stslice_inline.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -march=tvm -filetype=asm | FileCheck %s +target datalayout = "E-S257-i1:257:257-i8:257:257-i16:257:257-i32:257:257-i64:257:257-i257:257:257-p:257:257-a:257:257" +target triple = "tvm" + +; CHECK-LABEL: stslice +define builder @stslice() { +; CHECK: PUSHINT +; CHECK: STUR +; CHECK-NOT: STSLICE + %b1 = tail call builder @llvm.tvm.newc() + %b2 = tail call builder @llvm.tvm.stu(i257 0, builder %b1, i257 2) + %b3 = tail call builder @llvm.tvm.sti(i257 -1, builder %b2, i257 2) + %c1 = tail call cell @llvm.tvm.endc(builder %b3) + %s1 = tail call slice @llvm.tvm.ctos(cell %c1) + %b4 = tail call builder @llvm.tvm.newc() + %b5 = tail call builder @llvm.tvm.stslice(slice %s1, builder %b4) + ret builder %b5 +} + +; CHECK-LABEL: stslice_varint +define builder @stslice_varint() { +; CHECK-NOT: STSLICE +; CHECK: PUSHINT 275010290703 +; CHECK: STUR 42 + %b1 = tail call builder @llvm.tvm.newc() + %b2 = tail call builder @llvm.tvm.stvarint16(builder %b1, i257 0) + %b3 = tail call builder @llvm.tvm.stvarint16(builder %b2, i257 -7) + %b4 = tail call builder @llvm.tvm.stvaruint32(builder %b3, i257 2) + %b5 = tail call builder @llvm.tvm.sti(i257 -1, builder %b4, i257 2) + %c1 = tail call cell @llvm.tvm.endc(builder %b5) + %s1 = tail call slice @llvm.tvm.ctos(cell %c1) + %b6 = tail call builder @llvm.tvm.newc() + %b7 = tail call builder @llvm.tvm.stslice(slice %s1, builder %b6) + ret builder %b7 +} + +; CHECK-LABEL: stslice_neg +define builder @stslice_neg() { +; CHECK: STSLICE +; CHECK: STSLICE + %b1 = tail call builder @llvm.tvm.newc() + %b2 = tail call builder @llvm.tvm.stu(i257 0, builder %b1, i257 2) + %b3 = tail call builder @llvm.tvm.sti(i257 -1, builder %b2, i257 2) + %c1 = tail call cell @llvm.tvm.endc(builder %b3) + %s1 = tail call slice @llvm.tvm.ctos(cell %c1) + %b4 = tail call builder @llvm.tvm.newc() + %b5 = tail call builder @llvm.tvm.stslice(slice %s1, builder %b4) + %b6 = tail call builder @llvm.tvm.stslice(slice %s1, builder %b5) + ret builder %b6 +} + +; CHECK-LABEL: stslice_rec +define builder @stslice_rec() { +; CHECK: PUSHINT +; CHECK: STUR +; CHECK-NOT: STSLICE + %b1 = tail call builder @llvm.tvm.newc() + %b2 = tail call builder @llvm.tvm.stu(i257 0, builder %b1, i257 2) + %b3 = tail call builder @llvm.tvm.sti(i257 -1, builder %b2, i257 2) + %c1 = tail call cell @llvm.tvm.endc(builder %b3) + %s1 = tail call slice @llvm.tvm.ctos(cell %c1) + %b4 = tail call builder @llvm.tvm.newc() + %b5 = tail call builder @llvm.tvm.stslice(slice %s1, builder %b4) + %c2 = tail call cell @llvm.tvm.endc(builder %b5) + %s2 = tail call slice @llvm.tvm.ctos(cell %c2) + %b7 = tail call builder @llvm.tvm.newc() + %b8 = tail call builder @llvm.tvm.stslice(slice %s2, builder %b7) + ret builder %b8 +} +declare builder @llvm.tvm.stu(i257, builder, i257) +declare builder @llvm.tvm.sti(i257, builder, i257) +declare builder @llvm.tvm.stvarint16(builder, i257) +declare builder @llvm.tvm.stvaruint32(builder, i257) +declare builder @llvm.tvm.newc() +declare cell @llvm.tvm.endc(builder) +declare builder @llvm.tvm.stslice(slice, builder) +declare slice @llvm.tvm.ctos(cell)