Skip to content

Commit

Permalink
LAA: don't speculate stride when BTC >= 0
Browse files Browse the repository at this point in the history
Speculating the stride currently inserts a Stride == 1 predicate, which
is equivalent to asserting that the that the loop executes atleast once.
However, when the backedge-taken-count is known-non-negative,
speculating the stride unnecessarily versions the loop. Avoid this.

Fixes llvm#96656.
  • Loading branch information
artagnon committed Jun 28, 2024
1 parent 7c18195 commit 4f76d53
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 583 deletions.
20 changes: 9 additions & 11 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2930,7 +2930,8 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// computation of an interesting IV - but we chose not to as we
// don't have a cost model here, and broadening the scope exposes
// far too many unprofitable cases.
const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
ScalarEvolution *SE = PSE->getSE();
const SCEV *StrideExpr = getStrideFromPointer(Ptr, SE, TheLoop);
if (!StrideExpr)
return;

Expand All @@ -2944,8 +2945,8 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
}

// Avoid adding the "Stride == 1" predicate when we know that
// Stride >= Trip-Count. Such a predicate will effectively optimize a single
// or zero iteration loop, as Trip-Count <= Stride == 1.
// Backedge-Taken-Count is non-negative, or when Stride >
// Backedge-Taken-Count. Trip-Count = Backedge-Taken-Count + 1.
//
// TODO: We are currently not making a very informed decision on when it is
// beneficial to apply stride versioning. It might make more sense that the
Expand All @@ -2966,20 +2967,17 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
const SCEV *CastedStride = StrideExpr;
const SCEV *CastedBECount = MaxBTC;
ScalarEvolution *SE = PSE->getSE();
if (BETypeSizeBits >= StrideTypeSizeBits)
CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
else
CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
// Since TripCount == BackEdgeTakenCount + 1, checking:
// "Stride >= TripCount" is equivalent to checking:
// Stride - MaxBTC> 0
if (SE->isKnownPositive(StrideMinusBETaken)) {
if (SE->isKnownPositive(StrideMinusBETaken) ||
SE->isKnownNonNegative(MaxBTC)) {
LLVM_DEBUG(
dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes "
"at most once.\n");
dbgs() << "LAA: Stride > Backedge-Taken-Count or Backedge-Taken-Count "
">= 0; No point in versioning as the Stride==1 predicate "
"will imply that the loop executes at most once.\n");
return;
}
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ define void @false.equal.predicate(ptr %arg, ptr %arg1, i1 %arg2) {
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %load == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
; CHECK-NEXT: {(8 + %arg1),+,(8 * (sext i32 %load to i64))<nsw>}<%loop.body>
; CHECK-NEXT: --> {(8 + %arg1),+,8}<%loop.body>
;
entry:
%load = load i32, ptr %arg, align 4
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,16 @@ define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP1]]:
; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + %dst))
; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: (Low: (((4 * %iv.1) + %dst) umin ((4 * %iv.1) + (800 * (sext i32 %offset to i64))<nsw> + %dst)) High: (4 + (((4 * %iv.1) + %dst) umax ((4 * %iv.1) + (800 * (sext i32 %offset to i64))<nsw> + %dst))))
; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
; CHECK-NEXT: Group [[GRP2]]:
; CHECK-NEXT: (Low: %src High: (804 + %src))
; CHECK-NEXT: Member: {%src,+,4}<nuw><%inner.loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %offset == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/Transforms/LoopDistribute/debug-print.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i
; CHECK-LABEL: 'f'
; CHECK: LDist: Found a candidate loop: for.body
; CHECK: Backward dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4 ->
; CHECK-NEXT: %load.strided.a = load i32, ptr %gep.strided.a, align 4
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK: Seeded partitions:
; CHECK: Partition 0
; CHECK: Partition 1
; CHECK: Partition 2
; CHECK: Partition 3
; CHECK: Distributing loop
; CHECK: Skipping; cannot isolate unsafe dependencies
entry:
br label %for.body

Expand Down
62 changes: 12 additions & 50 deletions llvm/test/Transforms/LoopDistribute/symbolic-stride.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,65 +22,27 @@ define void @f(ptr noalias %a,
;
; DEFAULT-LABEL: @f(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
; DEFAULT: for.body.lver.check:
; DEFAULT-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
; DEFAULT-NEXT: br i1 [[IDENT_CHECK]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; DEFAULT: for.body.ph.lver.orig:
; DEFAULT-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
; DEFAULT: for.body.lver.orig:
; DEFAULT-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
; DEFAULT-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IND_LVER_ORIG]]
; DEFAULT-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
; DEFAULT-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]]
; DEFAULT-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
; DEFAULT-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LVER_ORIG]]
; DEFAULT-NEXT: store i32 [[MULA_LVER_ORIG]], ptr [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[IND_LVER_ORIG]]
; DEFAULT-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXD_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i64 [[IND_LVER_ORIG]], [[STRIDE]]
; DEFAULT-NEXT: [[ARRAYIDXSTRIDEDA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_LVER_ORIG]]
; DEFAULT-NEXT: [[LOADSTRIDEDA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXSTRIDEDA_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADSTRIDEDA_LVER_ORIG]]
; DEFAULT-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IND_LVER_ORIG]]
; DEFAULT-NEXT: store i32 [[MULC_LVER_ORIG]], ptr [[ARRAYIDXC_LVER_ORIG]], align 4
; DEFAULT-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], 20
; DEFAULT-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
; DEFAULT: for.body.ph.ldist1:
; DEFAULT-NEXT: br label [[FOR_BODY_LDIST1:%.*]]
; DEFAULT: for.body.ldist1:
; DEFAULT-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ]
; DEFAULT-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IND_LDIST1]]
; DEFAULT-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4
; DEFAULT-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IND_LDIST1]]
; DEFAULT-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
; DEFAULT-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
; DEFAULT-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
; DEFAULT-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
; DEFAULT-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4
; DEFAULT-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], 20
; DEFAULT-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
; DEFAULT: for.body.ph:
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
; DEFAULT-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; DEFAULT-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; DEFAULT-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IND]]
; DEFAULT-NEXT: [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
; DEFAULT-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IND]]
; DEFAULT-NEXT: [[LOADB:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4
; DEFAULT-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]]
; DEFAULT-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1
; DEFAULT-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IND]]
; DEFAULT-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD]]
; DEFAULT-NEXT: store i32 [[MULA]], ptr [[ARRAYIDXA_PLUS_4]], align 4
; DEFAULT-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[IND]]
; DEFAULT-NEXT: [[LOADD:%.*]] = load i32, ptr [[ARRAYIDXD]], align 4
; DEFAULT-NEXT: [[MUL:%.*]] = mul i64 [[IND]], [[STRIDE]]
; DEFAULT-NEXT: [[MUL:%.*]] = mul i64 [[IND]], [[STRIDE:%.*]]
; DEFAULT-NEXT: [[ARRAYIDXSTRIDEDA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL]]
; DEFAULT-NEXT: [[LOADSTRIDEDA:%.*]] = load i32, ptr [[ARRAYIDXSTRIDEDA]], align 4
; DEFAULT-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADSTRIDEDA]]
; DEFAULT-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IND]]
; DEFAULT-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IND]]
; DEFAULT-NEXT: store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
; DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], 20
; DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT1:%.*]], label [[FOR_BODY]]
; DEFAULT: for.end.loopexit:
; DEFAULT-NEXT: br label [[FOR_END:%.*]]
; DEFAULT: for.end.loopexit1:
; DEFAULT-NEXT: br label [[FOR_END]]
; DEFAULT-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; DEFAULT: for.end:
; DEFAULT-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,15 @@
define void @test(ptr %arg, i64 %arg1) {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[INNER_1_LVER_CHECK:%.*]]
; CHECK: inner.1.lver.check:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[ARG:%.*]], [[BB:%.*]] ], [ @glob.1, [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 3
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[ARG1:%.*]], 1
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[INNER_1_PH_LVER_ORIG:%.*]], label [[INNER_1_PH:%.*]]
; CHECK: inner.1.ph.lver.orig:
; CHECK-NEXT: br label [[INNER_1_LVER_ORIG:%.*]]
; CHECK: inner.1.lver.orig:
; CHECK-NEXT: [[IV_1_LVER_ORIG:%.*]] = phi i64 [ 0, [[INNER_1_PH_LVER_ORIG]] ], [ [[IV_NEXT_LVER_ORIG:%.*]], [[INNER_1_LVER_ORIG]] ]
; CHECK-NEXT: [[PTR_IV_1_LVER_ORIG:%.*]] = phi ptr [ @glob.2, [[INNER_1_PH_LVER_ORIG]] ], [ [[PTR_IV_1_NEXT_LVER_ORIG:%.*]], [[INNER_1_LVER_ORIG]] ]
; CHECK-NEXT: [[TMP25_LVER_ORIG:%.*]] = mul nuw nsw i64 [[IV_1_LVER_ORIG]], [[ARG1]]
; CHECK-NEXT: [[GEP_2_LVER_ORIG:%.*]] = getelementptr inbounds double, ptr [[GEP_1]], i64 [[TMP25_LVER_ORIG]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_2_LVER_ORIG]], align 8
; CHECK-NEXT: [[GEP_3_LVER_ORIG:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[TMP25_LVER_ORIG]]
; CHECK-NEXT: [[GEP_4_LVER_ORIG:%.*]] = getelementptr double, ptr [[GEP_3_LVER_ORIG]], i64 2
; CHECK-NEXT: [[TMP29_LVER_ORIG:%.*]] = load double, ptr [[GEP_4_LVER_ORIG]], align 8
; CHECK-NEXT: [[PTR_IV_1_NEXT_LVER_ORIG]] = getelementptr inbounds double, ptr [[PTR_IV_1_LVER_ORIG]], i64 1
; CHECK-NEXT: [[IV_NEXT_LVER_ORIG]] = add nuw nsw i64 [[IV_1_LVER_ORIG]], 1
; CHECK-NEXT: [[C_1_LVER_ORIG:%.*]] = icmp eq i64 [[IV_1_LVER_ORIG]], 1
; CHECK-NEXT: br i1 [[C_1_LVER_ORIG]], label [[INNER_1_EXIT_LOOPEXIT:%.*]], label [[INNER_1_LVER_ORIG]]
; CHECK: inner.1.ph:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR_PHI]], i64 16
; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load double, ptr [[SCEVGEP]], align 8
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 3
; CHECK-NEXT: br label [[INNER_1:%.*]]
; CHECK: inner.1:
; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi double [ [[LOAD_INITIAL]], [[INNER_1_PH]] ], [ 0.000000e+00, [[INNER_1]] ]
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, [[INNER_1_PH]] ], [ [[IV_NEXT:%.*]], [[INNER_1]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ @glob.2, [[INNER_1_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[INNER_1]] ]
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i64 [[IV_1]], [[ARG1]]
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], [[INNER_1]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ @glob.2, [[OUTER_HEADER]] ], [ [[PTR_IV_1_NEXT:%.*]], [[INNER_1]] ]
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i64 [[IV_1]], [[ARG1:%.*]]
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds double, ptr [[GEP_1]], i64 [[TMP25]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_2]], align 8
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[TMP25]]
Expand All @@ -47,15 +26,10 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_1]], i64 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_1]], 1
; CHECK-NEXT: br i1 [[C_1]], label [[INNER_1_EXIT_LOOPEXIT1:%.*]], label [[INNER_1]]
; CHECK: inner.1.exit.loopexit:
; CHECK-NEXT: [[LCSSA_PTR_IV_1_PH:%.*]] = phi ptr [ [[PTR_IV_1_LVER_ORIG]], [[INNER_1_LVER_ORIG]] ]
; CHECK-NEXT: br label [[INNER_1_EXIT:%.*]]
; CHECK: inner.1.exit.loopexit1:
; CHECK-NEXT: [[LCSSA_PTR_IV_1_PH2:%.*]] = phi ptr [ [[PTR_IV_1]], [[INNER_1]] ]
; CHECK-NEXT: br label [[INNER_1_EXIT]]
; CHECK-NEXT: br i1 [[C_1]], label [[INNER_1_EXIT:%.*]], label [[INNER_1]]
; CHECK: inner.1.exit:
; CHECK-NEXT: [[LCSSA_PTR_IV_1:%.*]] = phi ptr [ [[LCSSA_PTR_IV_1_PH]], [[INNER_1_EXIT_LOOPEXIT]] ], [ [[LCSSA_PTR_IV_1_PH2]], [[INNER_1_EXIT_LOOPEXIT1]] ]
; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i64 [ [[IV_1]], [[INNER_1]] ]
; CHECK-NEXT: [[LCSSA_PTR_IV_1:%.*]] = phi ptr [ [[PTR_IV_1]], [[INNER_1]] ]
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_1]], i64 1
; CHECK-NEXT: br label [[INNER_2:%.*]]
; CHECK: inner.2:
Expand All @@ -69,11 +43,13 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[LCSSA_PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2]], [[INNER_2]] ]
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 1
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_2]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[IV_1_LCSSA]], 3
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INDVAR_LCSSA]], 3
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[TMP1]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr @glob.2, i64 [[TMP3]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[INNER_3_PH_LVER_ORIG:%.*]], label [[INNER_3_PH:%.*]]
; CHECK: inner.3.ph.lver.orig:
Expand All @@ -89,10 +65,10 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[C_2_LVER_ORIG:%.*]] = icmp eq i64 [[IV_2_LVER_ORIG]], 1
; CHECK-NEXT: br i1 [[C_2_LVER_ORIG]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[INNER_3_LVER_ORIG]]
; CHECK: inner.3.ph:
; CHECK-NEXT: [[LOAD_INITIAL5:%.*]] = load double, ptr [[PTR_PHI]], align 8
; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load double, ptr [[PTR_PHI]], align 8
; CHECK-NEXT: br label [[INNER_3:%.*]]
; CHECK: inner.3:
; CHECK-NEXT: [[STORE_FORWARDED6:%.*]] = phi double [ [[LOAD_INITIAL5]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ]
; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi double [ [[LOAD_INITIAL]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ]
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ 0, [[INNER_3_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER_3]] ]
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds double, ptr [[GEP_6]], i64 [[IV_2]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8
Expand All @@ -101,13 +77,13 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_2]], 1
; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT4:%.*]], label [[INNER_3]]
; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT1:%.*]], label [[INNER_3]]
; CHECK: outer.latch.loopexit:
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: outer.latch.loopexit4:
; CHECK: outer.latch.loopexit1:
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: outer.latch:
; CHECK-NEXT: br label [[INNER_1_LVER_CHECK]]
; CHECK-NEXT: br label [[OUTER_HEADER]]
;
bb:
br label %outer.header
Expand Down
Loading

0 comments on commit 4f76d53

Please sign in to comment.