From 93c9e035c7b45d253448a6b9301c58f5cba7a57b Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 3 Jul 2024 14:57:10 +0100 Subject: [PATCH 1/2] LoopLoadElim: add pre-commit test for #96656 After pr96656.ll were added to LAA and LoopVersioning, it was decided that the bug is in a caller of LoopVersioning, not in LAA or LoopVersioning itself. The caller has now been found to be LoopLoadElim. Hence, re-organize the added tests to avoid confusion, and add a new reduced-test for #96656 to LoopLoadElim, in preparation to fix the bug. --- .../Analysis/LoopAccessAnalysis/pr96656.ll | 49 ----------- .../LoopAccessAnalysis/symbolic-stride.ll | 48 ++++++++++ llvm/test/Transforms/LoopLoadElim/pr96656.ll | 87 +++++++++++++++++++ .../{pr96656.ll => single-iteration.ll} | 36 ++++---- 4 files changed, 155 insertions(+), 65 deletions(-) delete mode 100644 llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll create mode 100644 llvm/test/Transforms/LoopLoadElim/pr96656.ll rename llvm/test/Transforms/LoopVersioning/{pr96656.ll => single-iteration.ll} (75%) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll b/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll deleted file mode 100644 index 5b9833553fa02c..00000000000000 --- a/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll +++ /dev/null @@ -1,49 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s - -define void @false.equal.predicate(ptr %arg, ptr %arg1, i1 %arg2) { -; CHECK-LABEL: 'false.equal.predicate' -; CHECK-NEXT: loop.body: -; CHECK-NEXT: Memory dependences are safe -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-NEXT: Equal predicate: %load == 1 -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: -; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul: -; CHECK-NEXT: {(8 + %arg1),+,(8 * (sext i32 %load to i64))}<%loop.body> -; CHECK-NEXT: --> {(8 + %arg1),+,8}<%loop.body> -; -entry: - %load = load i32, ptr %arg, align 4 - br i1 %arg2, label %noloop.exit, label %loop.ph - -loop.ph: ; preds = %entry - %sext7 = sext i32 %load to i64 - %gep8 = getelementptr i8, ptr %arg1, i64 8 - br label %loop.body - -loop.body: ; preds = %loop.body, %loop.ph - %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ] - %mul = mul i64 %phi, %sext7 - %gep10 = getelementptr double, ptr %gep8, i64 %mul - %load11 = load double, ptr %gep10, align 8 - store double %load11, ptr %arg1, align 8 - %add = add i64 %phi, 1 - %icmp = icmp eq i64 %phi, 0 - br i1 %icmp, label %loop.exit, label %loop.body - -noloop.exit: ; preds = %entry - %sext = sext i32 %load to i64 - %gep = getelementptr double, ptr %arg1, i64 %sext - %load5 = load double, ptr %gep, align 8 - store double %load5, ptr %arg, align 8 - ret void - -loop.exit: ; preds = %loop.body - ret void -} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll index 7c1b11e22aef24..0871ae84c37111 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -223,6 +223,54 @@ exit: ret void } +define double @single_iteration_unknown_stride(i32 %arg, ptr %arg1, i1 %arg2) { +; CHECK-LABEL: 'single_iteration_unknown_stride' +; CHECK-NEXT: loop.body: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %arg == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul: +; CHECK-NEXT: {(8 + %arg1),+,(8 * (sext i32 %arg to i64))}<%loop.body> +; CHECK-NEXT: --> {(8 + %arg1),+,8}<%loop.body> +; +entry: + br i1 %arg2, label %noloop.exit, label %loop.ph + +loop.ph: ; preds = %entry + %sext7 = sext i32 %arg to i64 + %gep8 = getelementptr i8, ptr %arg1, i64 8 + br label %loop.body + +loop.body: ; preds = %loop.body, %loop.ph + %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ] + %mul = mul i64 %phi, %sext7 + %gep10 = getelementptr double, ptr %gep8, i64 %mul + %load11 = load double, ptr %gep10, align 8 + store double %load11, ptr %arg1, align 8 + %add = add i64 %phi, 1 + %icmp = icmp eq i64 %phi, 0 + br i1 %icmp, label %loop.exit, label %loop.body + +noloop.exit: ; preds = %entry + %sext = sext i32 %arg to i64 + %gep = getelementptr double, ptr %arg1, i64 %sext + %load5 = load double, ptr %gep, align 8 + ret double %load5 + +loop.exit: ; preds = %loop.body + %sext2 = sext i32 %arg to i64 + %gep2 = getelementptr double, ptr %arg1, i64 %sext2 + %load6 = load double, ptr %gep2, align 8 + ret double %load6 +} + ; A loop with two symbolic strides. define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) { ; CHECK-LABEL: 'two_strides' diff --git a/llvm/test/Transforms/LoopLoadElim/pr96656.ll b/llvm/test/Transforms/LoopLoadElim/pr96656.ll new file mode 100644 index 00000000000000..c6b336c0db8d39 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/pr96656.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s + +define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) { +; CHECK-LABEL: define void @single_iteration_versioning( +; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4 +; CHECK-NEXT: br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_LVER_CHECK:.*]] +; CHECK: [[LOOP_LVER_CHECK]]: +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8 +; CHECK-NEXT: [[GEP9:%.*]] = getelementptr i8, ptr [[ARG1]], i64 16 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[LOOP_LVER_ORIG:.*]] +; CHECK: [[LOOP_LVER_ORIG]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[LOOP_PH_LVER_ORIG]] ], [ [[ADD:%.*]], %[[LOOP_LVER_ORIG]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[PHI]], [[SEXT7]] +; CHECK-NEXT: [[GEP10:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL]] +; CHECK-NEXT: [[LOAD11:%.*]] = load double, ptr [[GEP10]], align 8 +; CHECK-NEXT: [[GEP13_LVER_ORIG:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL]] +; CHECK-NEXT: store double [[LOAD11]], ptr [[GEP13_LVER_ORIG]], align 8 +; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP_LVER_ORIG:%.*]] = icmp eq i64 [[PHI]], 1 +; CHECK-NEXT: br i1 [[ICMP_LVER_ORIG]], label %[[EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[LOOP_LVER_ORIG]] +; CHECK: [[LOOP_PH]]: +; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load double, ptr [[GEP8]], align 8 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi double [ [[LOAD_INITIAL]], %[[LOOP_PH]] ], [ [[STORE_FORWARDED]], %[[LOOP]] ] +; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, %[[LOOP_PH]] ], [ [[ADD1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[PHI1]], [[SEXT7]] +; CHECK-NEXT: [[GEP11:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL1]] +; CHECK-NEXT: [[LOAD12:%.*]] = load double, ptr [[GEP11]], align 8 +; CHECK-NEXT: [[GEP13:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL1]] +; CHECK-NEXT: store double [[STORE_FORWARDED]], ptr [[GEP13]], align 8 +; CHECK-NEXT: [[ADD1]] = add i64 [[PHI1]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[PHI1]], 1 +; CHECK-NEXT: br i1 [[ICMP]], label %[[EXIT_LOOPEXIT_LOOPEXIT1:.*]], label %[[LOOP]] +; CHECK: [[NOLOOP_EXIT]]: +; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[LOAD]] to i64 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]] +; CHECK-NEXT: [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8 +; CHECK-NEXT: store double [[LOAD6]], ptr [[ARG]], align 8 +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT_LOOPEXIT1]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %load = load i32, ptr %arg, align 4 + br i1 %arg2, label %noloop.exit, label %loop.ph + +loop.ph: ; preds = %entry + %sext7 = sext i32 %load to i64 + %gep8 = getelementptr i8, ptr %arg1, i64 8 + %gep9 = getelementptr i8, ptr %arg1, i64 16 + br label %loop + +loop: ; preds = %loop, %loop.ph + %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop ] + %mul = mul i64 %phi, %sext7 + %gep11 = getelementptr double, ptr %gep8, i64 %mul + %load12 = load double, ptr %gep11, align 8 + %gep13 = getelementptr double, ptr %gep9, i64 %mul + store double %load12, ptr %gep13, align 8 + %add = add i64 %phi, 1 + %icmp = icmp eq i64 %phi, 1 + br i1 %icmp, label %exit, label %loop + +noloop.exit: ; preds = %loop.ph + %sext = sext i32 %load to i64 + %gep = getelementptr double, ptr %arg1, i64 %sext + %load5 = load double, ptr %gep, align 8 + store double %load5, ptr %arg, align 8 + br label %exit + +exit: ; preds = %loop.body + ret void +} diff --git a/llvm/test/Transforms/LoopVersioning/pr96656.ll b/llvm/test/Transforms/LoopVersioning/single-iteration.ll similarity index 75% rename from llvm/test/Transforms/LoopVersioning/pr96656.ll rename to llvm/test/Transforms/LoopVersioning/single-iteration.ll index 0264fe40a94302..7be34f06785853 100644 --- a/llvm/test/Transforms/LoopVersioning/pr96656.ll +++ b/llvm/test/Transforms/LoopVersioning/single-iteration.ll @@ -1,16 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=loop-versioning -S %s | FileCheck %s -define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) { -; CHECK-LABEL: define void @lver.check.unnecessary( -; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) { +; Callers should not call LoopVersioning on single-iteration loops, but LoopVersioning faithfully versions the loop when the stride is unknown and there is just a single iteration. + +define double @single_iteration_unknown_stride(i32 %arg, ptr %arg1, i1 %arg2) { +; CHECK-LABEL: define double @single_iteration_unknown_stride( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4 ; CHECK-NEXT: br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_BODY_LVER_CHECK:.*]] ; CHECK: [[LOOP_BODY_LVER_CHECK]]: -; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64 +; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[ARG]] to i64 ; CHECK-NEXT: [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[ARG]], 1 ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[LOOP_BODY_PH_LVER_ORIG:.*]], label %[[LOOP_BODY_PH:.*]] ; CHECK: [[LOOP_BODY_PH_LVER_ORIG]]: ; CHECK-NEXT: br label %[[LOOP_BODY_LVER_ORIG:.*]] @@ -35,24 +36,25 @@ define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) { ; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[PHI]], 0 ; CHECK-NEXT: br i1 [[ICMP]], label %[[LOOP_EXIT_LOOPEXIT1:.*]], label %[[LOOP_BODY]] ; CHECK: [[NOLOOP_EXIT]]: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ARG]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT]] ; CHECK-NEXT: [[LOAD5:%.*]] = load double, ptr [[GEP]], align 8 -; CHECK-NEXT: store double [[LOAD5]], ptr [[ARG]], align 8 -; CHECK-NEXT: ret void +; CHECK-NEXT: ret double [[LOAD5]] ; CHECK: [[LOOP_EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[LOOP_EXIT:.*]] ; CHECK: [[LOOP_EXIT_LOOPEXIT1]]: ; CHECK-NEXT: br label %[[LOOP_EXIT]] ; CHECK: [[LOOP_EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[ARG]] to i64 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]] +; CHECK-NEXT: [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8 +; CHECK-NEXT: ret double [[LOAD6]] ; entry: - %load = load i32, ptr %arg, align 4 br i1 %arg2, label %noloop.exit, label %loop.ph loop.ph: ; preds = %entry - %sext7 = sext i32 %load to i64 + %sext7 = sext i32 %arg to i64 %gep8 = getelementptr i8, ptr %arg1, i64 8 br label %loop.body @@ -67,12 +69,14 @@ loop.body: ; preds = %loop.body, %loop.ph br i1 %icmp, label %loop.exit, label %loop.body noloop.exit: ; preds = %entry - %sext = sext i32 %load to i64 + %sext = sext i32 %arg to i64 %gep = getelementptr double, ptr %arg1, i64 %sext %load5 = load double, ptr %gep, align 8 - store double %load5, ptr %arg, align 8 - ret void + ret double %load5 loop.exit: ; preds = %loop.body - ret void + %sext2 = sext i32 %arg to i64 + %gep2 = getelementptr double, ptr %arg1, i64 %sext2 + %load6 = load double, ptr %gep2, align 8 + ret double %load6 } From 604c223cc1b46d39d5d342e7296b7ccd4681ed76 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 3 Jul 2024 15:45:19 +0100 Subject: [PATCH 2/2] LoopLoadElim: don't version single-iteration loops It is unnecessary for LoopLoadElim to version single-iteration loops. Don't call LoopVersioning when the BTC is known to be 1. Fixes #96656. --- .../Transforms/Scalar/LoopLoadElimination.cpp | 9 ++++--- .../invalidate-laa-after-versioning.ll | 25 ++++++++++--------- llvm/test/Transforms/LoopLoadElim/pr96656.ll | 25 +++---------------- 3 files changed, 21 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 489f12e689d319..058a749b9b703b 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -598,10 +598,11 @@ class LoadEliminationForLoop { } // Point of no-return, start the transformation. First, version the loop - // if necessary. - - LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE()); - LV.versionLoop(); + // if it's not a single-iteration loop. + if (!PSE.getBackedgeTakenCount()->isOne()) { + LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE()); + LV.versionLoop(); + } // After versioning, some of the candidates' pointers could stop being // SCEVAddRecs. We need to filter them out. diff --git a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll index 10e10653a431da..4d8a58feae7501 100644 --- a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll +++ b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll @@ -12,7 +12,7 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: br label [[INNER_1_LVER_CHECK:%.*]] ; CHECK: inner.1.lver.check: ; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[ARG:%.*]], [[BB:%.*]] ], [ @glob.1, [[OUTER_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 3 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 3 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[ARG1:%.*]], 1 ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[INNER_1_PH_LVER_ORIG:%.*]], label [[INNER_1_PH:%.*]] ; CHECK: inner.1.ph.lver.orig: @@ -28,7 +28,7 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[TMP29_LVER_ORIG:%.*]] = load double, ptr [[GEP_4_LVER_ORIG]], align 8 ; CHECK-NEXT: [[PTR_IV_1_NEXT_LVER_ORIG]] = getelementptr inbounds double, ptr [[PTR_IV_1_LVER_ORIG]], i64 1 ; CHECK-NEXT: [[IV_NEXT_LVER_ORIG]] = add nuw nsw i64 [[IV_1_LVER_ORIG]], 1 -; CHECK-NEXT: [[C_1_LVER_ORIG:%.*]] = icmp eq i64 [[IV_1_LVER_ORIG]], 1 +; CHECK-NEXT: [[C_1_LVER_ORIG:%.*]] = icmp eq i64 [[IV_1_LVER_ORIG]], 2 ; CHECK-NEXT: br i1 [[C_1_LVER_ORIG]], label [[INNER_1_EXIT_LOOPEXIT:%.*]], label [[INNER_1_LVER_ORIG]] ; CHECK: inner.1.ph: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR_PHI]], i64 16 @@ -46,7 +46,7 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[TMP29:%.*]] = load double, ptr [[GEP_4]], align 8 ; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_1]], i64 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_1]], 1 -; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_1]], 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_1]], 2 ; CHECK-NEXT: br i1 [[C_1]], label [[INNER_1_EXIT_LOOPEXIT1:%.*]], label [[INNER_1]] ; CHECK: inner.1.exit.loopexit: ; CHECK-NEXT: [[LCSSA_PTR_IV_1_PH:%.*]] = phi ptr [ [[PTR_IV_1_LVER_ORIG]], [[INNER_1_LVER_ORIG]] ] @@ -72,7 +72,8 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24 ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[PTR_PHI]], i64 32 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[SCEVGEP4]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP3]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[INNER_3_PH_LVER_ORIG:%.*]], label [[INNER_3_PH:%.*]] @@ -86,13 +87,13 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[GEP_9_LVER_ORIG:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2_LVER_ORIG]] ; CHECK-NEXT: [[TMP18_LVER_ORIG:%.*]] = load double, ptr [[GEP_9_LVER_ORIG]], align 8 ; CHECK-NEXT: [[IV_2_NEXT_LVER_ORIG]] = add nuw nsw i64 [[IV_2_LVER_ORIG]], 1 -; CHECK-NEXT: [[C_2_LVER_ORIG:%.*]] = icmp eq i64 [[IV_2_LVER_ORIG]], 1 +; CHECK-NEXT: [[C_2_LVER_ORIG:%.*]] = icmp eq i64 [[IV_2_LVER_ORIG]], 2 ; CHECK-NEXT: br i1 [[C_2_LVER_ORIG]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[INNER_3_LVER_ORIG]] ; CHECK: inner.3.ph: -; CHECK-NEXT: [[LOAD_INITIAL5:%.*]] = load double, ptr [[PTR_PHI]], align 8 +; CHECK-NEXT: [[LOAD_INITIAL6:%.*]] = load double, ptr [[PTR_PHI]], align 8 ; CHECK-NEXT: br label [[INNER_3:%.*]] ; CHECK: inner.3: -; CHECK-NEXT: [[STORE_FORWARDED6:%.*]] = phi double [ [[LOAD_INITIAL5]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ] +; CHECK-NEXT: [[STORE_FORWARDED7:%.*]] = phi double [ [[LOAD_INITIAL6]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ] ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ 0, [[INNER_3_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER_3]] ] ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds double, ptr [[GEP_6]], i64 [[IV_2]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8 @@ -100,11 +101,11 @@ define void @test(ptr %arg, i64 %arg1) { ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2]] ; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8 ; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1 -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_2]], 1 -; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT4:%.*]], label [[INNER_3]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_2]], 2 +; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT5:%.*]], label [[INNER_3]] ; CHECK: outer.latch.loopexit: ; CHECK-NEXT: br label [[OUTER_LATCH]] -; CHECK: outer.latch.loopexit4: +; CHECK: outer.latch.loopexit5: ; CHECK-NEXT: br label [[OUTER_LATCH]] ; CHECK: outer.latch: ; CHECK-NEXT: br label [[INNER_1_LVER_CHECK]] @@ -128,7 +129,7 @@ inner.1: %tmp29 = load double, ptr %gep.4, align 8 %ptr.iv.1.next = getelementptr inbounds double, ptr %ptr.iv.1, i64 1 %iv.next = add nuw nsw i64 %iv.1, 1 - %c.1 = icmp eq i64 %iv.1, 1 + %c.1 = icmp eq i64 %iv.1, 2 br i1 %c.1, label %inner.1.exit, label %inner.1 inner.1.exit: ; preds = %bb22 @@ -155,7 +156,7 @@ inner.3: ; preds = %bb14, %bb10 %gep.9 = getelementptr double, ptr %ptr.phi, i64 %iv.2 %tmp18 = load double, ptr %gep.9, align 8 %iv.2.next = add nuw nsw i64 %iv.2, 1 - %c.2 = icmp eq i64 %iv.2, 1 + %c.2 = icmp eq i64 %iv.2, 2 br i1 %c.2, label %outer.latch, label %inner.3 outer.latch: diff --git a/llvm/test/Transforms/LoopLoadElim/pr96656.ll b/llvm/test/Transforms/LoopLoadElim/pr96656.ll index c6b336c0db8d39..8e3f2f1f48e9a7 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr96656.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr96656.ll @@ -6,26 +6,11 @@ define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) { ; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4 -; CHECK-NEXT: br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_LVER_CHECK:.*]] -; CHECK: [[LOOP_LVER_CHECK]]: +; CHECK-NEXT: br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: ; CHECK-NEXT: [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64 ; CHECK-NEXT: [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8 ; CHECK-NEXT: [[GEP9:%.*]] = getelementptr i8, ptr [[ARG1]], i64 16 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1 -; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]] -; CHECK: [[LOOP_PH_LVER_ORIG]]: -; CHECK-NEXT: br label %[[LOOP_LVER_ORIG:.*]] -; CHECK: [[LOOP_LVER_ORIG]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[LOOP_PH_LVER_ORIG]] ], [ [[ADD:%.*]], %[[LOOP_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[PHI]], [[SEXT7]] -; CHECK-NEXT: [[GEP10:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL]] -; CHECK-NEXT: [[LOAD11:%.*]] = load double, ptr [[GEP10]], align 8 -; CHECK-NEXT: [[GEP13_LVER_ORIG:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL]] -; CHECK-NEXT: store double [[LOAD11]], ptr [[GEP13_LVER_ORIG]], align 8 -; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1 -; CHECK-NEXT: [[ICMP_LVER_ORIG:%.*]] = icmp eq i64 [[PHI]], 1 -; CHECK-NEXT: br i1 [[ICMP_LVER_ORIG]], label %[[EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[LOOP_LVER_ORIG]] -; CHECK: [[LOOP_PH]]: ; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load double, ptr [[GEP8]], align 8 ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: @@ -38,17 +23,13 @@ define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) { ; CHECK-NEXT: store double [[STORE_FORWARDED]], ptr [[GEP13]], align 8 ; CHECK-NEXT: [[ADD1]] = add i64 [[PHI1]], 1 ; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[PHI1]], 1 -; CHECK-NEXT: br i1 [[ICMP]], label %[[EXIT_LOOPEXIT_LOOPEXIT1:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[ICMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] ; CHECK: [[NOLOOP_EXIT]]: ; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[LOAD]] to i64 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]] ; CHECK-NEXT: [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8 ; CHECK-NEXT: store double [[LOAD6]], ptr [[ARG]], align 8 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[EXIT_LOOPEXIT_LOOPEXIT]]: -; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] -; CHECK: [[EXIT_LOOPEXIT_LOOPEXIT1]]: -; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: