From b4809c03c5202275657a5b1df39d83e4c7f4081e Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 13 Feb 2024 21:38:05 -0800
Subject: [PATCH 01/48] Add make rule to prepare branch for release (#1731)

Change-Id: Iaad4e043f8c295edbe16fa08f8ad431d8d96d8e5

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 .github/ISSUE_TEMPLATE/NEW_RELEASE.md |  9 ++++++---
 Makefile                              | 19 ++++++++++++++++---
 charts/kueue/Chart.yaml               |  2 +-
 charts/kueue/values.yaml              |  8 +++-----
 hack/e2e-test.sh                      |  2 +-
 hack/multikueue-e2e-test.sh           |  2 +-
 6 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/NEW_RELEASE.md b/.github/ISSUE_TEMPLATE/NEW_RELEASE.md
index 3f9c2c5214..6d47cba36f 100644
--- a/.github/ISSUE_TEMPLATE/NEW_RELEASE.md
+++ b/.github/ISSUE_TEMPLATE/NEW_RELEASE.md
@@ -14,13 +14,16 @@ Please do not remove items from the checklist
   At least two for minor or major releases. At least one for a patch release.
 - [ ] Verify that the changelog in this issue and the CHANGELOG folder is up-to-date
   - [ ] Use https://github.com/kubernetes/release/tree/master/cmd/release-notes to gather notes.
-    Example: `release-notes --org kubernetes-sigs --repo kueue --branch release-0.3 --start-sha 4a0ebe7a3c5f2775cdf5fc7d60c23225660f8702 --end-sha a51cf138afe65677f5f5c97f8f8b1bc4887f73d2`
+    Example: `release-notes --org kubernetes-sigs --repo kueue --branch release-0.3 --start-sha 4a0ebe7a3c5f2775cdf5fc7d60c23225660f8702 --end-sha a51cf138afe65677f5f5c97f8f8b1bc4887f73d2 --dependencies=false --required-author=""`
 - [ ] For major or minor releases (v$MAJ.$MIN.0), create a new release branch.
-  - [ ] an OWNER creates a vanilla release branch with
+  - [ ] An OWNER creates a vanilla release branch with
         `git branch release-$MAJ.$MIN main`
   - [ ] An OWNER pushes the new release branch with
         `git push release-$MAJ.$MIN`
-- [ ] Update `README.md`, `CHANGELOG`, `charts/kueue/Chart.yaml` (`appVersion`) and `charts/kueue/values.yaml` (`controllerManager.manager.image.tag`) in the release branch: <!-- example #211 #214 -->
+- [ ] Update the release branch:
+  - [ ] Update `RELEASE_BRANCH` and `RELEASE_VERSION` in `Makefile` and run `make prepare-release-branch`
+  - [ ] Update the `CHANGELOG`
+  - [ ] Submit a pull request with the changes: <!-- example #211 #214 -->
 - [ ] An OWNER [prepares a draft release](https://github.com/kubernetes-sigs/kueue/releases)
   - [ ] Write the change log into the draft release.
   - [ ] Run
diff --git a/Makefile b/Makefile
index 3aa2c9464f..425a1945ef 100644
--- a/Makefile
+++ b/Makefile
@@ -81,6 +81,11 @@ version_pkg = sigs.k8s.io/kueue/pkg/version
 LD_FLAGS += -X '$(version_pkg).GitVersion=$(GIT_TAG)'
 LD_FLAGS += -X '$(version_pkg).GitCommit=$(shell git rev-parse HEAD)'
 
+# Update these variables when preparing a new release or a release branch.
+# Then run `make prepare-release-branch`
+RELEASE_VERSION=v0.5.3
+RELEASE_BRANCH=main
+
 .PHONY: all
 all: generate fmt vet build
 
@@ -180,20 +185,22 @@ run-test-e2e-%: K8S_VERSION = $(@:run-test-e2e-%=%)
 run-test-e2e-%: FORCE
 	@echo Running e2e for k8s ${K8S_VERSION}
 	E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" ./hack/e2e-test.sh
+	@$(call clean-manifests)
 
 JOBSET_VERSION = $(shell $(GO_CMD) list -m -f "{{.Version}}" sigs.k8s.io/jobset)
 run-test-multikueue-e2e-%: K8S_VERSION = $(@:run-test-multikueue-e2e-%=%)
 run-test-multikueue-e2e-%: FORCE
 	@echo Running multikueue e2e for k8s ${K8S_VERSION}
 	E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) ./hack/multikueue-e2e-test.sh
+	@$(call clean-manifests)
 
 .PHONY: ci-lint
 ci-lint: golangci-lint
 	$(GOLANGCI_LINT) run --timeout 15m0s
 
 .PHONY: verify
-verify: gomod-verify vet ci-lint fmt-verify toc-verify manifests generate update-helm generate-apiref
-	git --no-pager diff --exit-code config/components apis charts/kueue/templates client-go site/content/en/docs/reference
+verify: gomod-verify vet ci-lint fmt-verify toc-verify manifests generate update-helm generate-apiref prepare-release-branch
+	git --no-pager diff --exit-code config/components apis charts/kueue/templates client-go site/
 
 ##@ Build
 
@@ -243,7 +250,7 @@ ifndef ignore-not-found
   ignore-not-found = false
 endif
 
-clean-manifests = (cd config/components/manager && $(KUSTOMIZE) edit set image controller=gcr.io/k8s-staging-kueue/kueue:main)
+clean-manifests = (cd config/components/manager && $(KUSTOMIZE) edit set image controller=gcr.io/k8s-staging-kueue/kueue:$(RELEASE_BRANCH))
 
 .PHONY: install
 install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
@@ -286,6 +293,12 @@ artifacts: kustomize yq helm
 	# Revert the image changes
 	$(YQ)  e  '.controllerManager.manager.image.repository = "$(STAGING_IMAGE_REGISTRY)/$(IMAGE_NAME)" | .controllerManager.manager.image.tag = "main" | .controllerManager.manager.image.pullPolicy = "Always"' -i charts/kueue/values.yaml
 
+.PHONY: prepare-release-branch
+prepare-release-branch: yq kustomize
+	sed -r 's/v[0-9]+\.[0-9]+\.[0-9]+/$(RELEASE_VERSION)/g' -i README.md -i site/config.toml
+	$(YQ) e '.appVersion = "$(RELEASE_VERSION)"' -i charts/kueue/Chart.yaml
+	@$(call clean-manifests)
+
 ##@ Tools
 
 # Build an image that can be used with kubectl debug
diff --git a/charts/kueue/Chart.yaml b/charts/kueue/Chart.yaml
index 3b19e9071b..69476c0564 100644
--- a/charts/kueue/Chart.yaml
+++ b/charts/kueue/Chart.yaml
@@ -18,4 +18,4 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "0.5.3"
+appVersion: "v0.5.3"
diff --git a/charts/kueue/values.yaml b/charts/kueue/values.yaml
index 0c1a90d8af..94725329f9 100644
--- a/charts/kueue/values.yaml
+++ b/charts/kueue/values.yaml
@@ -9,9 +9,9 @@ enablePrometheus: false
 enableCertManager: false
 # Customize controlerManager
 controllerManager:
- #featureGates:
- #  - name: PartialAdmission
- #    enabled: true
+  #featureGates:
+  #  - name: PartialAdmission
+  #    enabled: true
   kubeRbacProxy:
     image:
       repository: gcr.io/kubebuilder/kube-rbac-proxy
@@ -22,8 +22,6 @@ controllerManager:
   manager:
     image:
       repository: gcr.io/k8s-staging-kueue/kueue
-      # tag, if defined will use the given image tag, else Chart.AppVersion will be used
-      tag: main
       # This should be set to 'IfNotPresent' for released version      
       pullPolicy: Always
     resources:
diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh
index 6b0c172df3..61f60202c4 100755
--- a/hack/e2e-test.sh
+++ b/hack/e2e-test.sh
@@ -35,7 +35,7 @@ function cleanup {
         fi
 	cluster_cleanup $KIND_CLUSTER_NAME
     fi
-    (cd config/components/manager && $KUSTOMIZE edit set image controller=gcr.io/k8s-staging-kueue/kueue:main)
+    exit 0
 }
 
 function startup {
diff --git a/hack/multikueue-e2e-test.sh b/hack/multikueue-e2e-test.sh
index 1c5aa53f83..5ab8eef3c8 100755
--- a/hack/multikueue-e2e-test.sh
+++ b/hack/multikueue-e2e-test.sh
@@ -46,7 +46,7 @@ function cleanup {
         cluster_cleanup $WORKER1_KIND_CLUSTER_NAME
         cluster_cleanup $WORKER2_KIND_CLUSTER_NAME
     fi
-    (cd config/components/manager && $KUSTOMIZE edit set image controller=gcr.io/k8s-staging-kueue/kueue:main)
+    exit 0
 }
 
 

From 650d2f2de88204dc8cd8ebb74c913762f4b280e3 Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Wed, 14 Feb 2024 09:42:00 -0500
Subject: [PATCH 02/48] Prepare release notes and files for v0.6 (#1733)

Change-Id: I325b633421e009cd3f7ae5b48ddcf0d3da46741d
---
 CHANGELOG/CHANGELOG-0.6.md                   | 82 ++++++++++++++++++++
 Makefile                                     |  4 +-
 README.md                                    |  2 +-
 charts/kueue/Chart.yaml                      |  2 +-
 config/components/manager/kustomization.yaml |  2 +-
 site/config.toml                             |  2 +-
 6 files changed, 88 insertions(+), 6 deletions(-)
 create mode 100644 CHANGELOG/CHANGELOG-0.6.md

diff --git a/CHANGELOG/CHANGELOG-0.6.md b/CHANGELOG/CHANGELOG-0.6.md
new file mode 100644
index 0000000000..b39a971b1a
--- /dev/null
+++ b/CHANGELOG/CHANGELOG-0.6.md
@@ -0,0 +1,82 @@
+## v0.6.0
+
+Changes since `v0.5.0`:
+
+### API Change
+
+- A `stopPolicy` field in the ClusterQueue allows to hold or drain a ClusterQueue (#1299, @trasc)
+- Add a lendingLimit field in ClusterQueue's quotas, to allow restricting how much of the unused resources by the ClusterQueue can be borrowed by other ClusterQueues in the cohort.
+  In other words, this allows a quota equal to `nominal-lendingLimit` to be exclusively used by the ClusterQueue. (#1385, @B1F030)
+- Add validation for clusterQueue: when cohort is empty, borrowingLimit must be nil. (#1525, @B1F030)
+- Allow decrease reclaimable pods to 0 for suspended job (#1277, @yaroslava-serdiuk)
+- MultiKueue: Add Path location type for cluster KubeConfigs. (#1640, @trasc)
+- MultiKueue: Add garbage collection of deleted Workloads. (#1643, @trasc)
+- MultiKueue: Multi cluster job dispatching for k8s Job. This doesn't include support for live status updates. (#1313, @trasc)
+- Support for a mechanism to suspend a running Job without requeueing (#1252, @vicentefb)
+- Support for preemption while borrowing (#1397, @mimowo)
+- The leaderElection field in the Configuration API is now defaulted.
+  Leader election is now enabled by default. (#1598, @astefanutti)
+- Visibility API: Add an endpoint that allows a user to fetch information about pending workloads and their position in LocalQueue. (#1365, @PBundyra)
+- Visibility API: Introduce an on-demand API endpoint for fetching pending workloads in a ClusterQueue. (#1251, @PBundyra)
+- Visibility API: extend the information returned for the pending workloads in a ClusterQueue, including the workload position in the queue. (#1362, @PBundyra)
+- WaitForPodsReady: Add a config field to allow admins to configure the timestamp used when sorting workloads that were evicted due to their Pods not becoming ready on time. (#1542, @nstogner)
+- WaitForPodsReady: Support a backoff re-queueing mechanism with configurable limit. (#1709, @tenzen-y)
+
+### Feature
+
+- Add Prebuilt Workload support for JobSets. (#1575, @trasc)
+- Add events for transitions of the provisioning AdmissionCheck (#1271, @stuton)
+- Add prebuilt workload support for batch/job. (#1358, @trasc)
+- Add support for groups of plain Pods. (#1319, @achernevskii)
+- Allow configuring featureGates on helm charts. (#1314, @B1F030)
+- At log level 6, the usage of ClusterQueues and cohorts is included in logs.
+  
+  The status of the internal cache and queues is also logged on demand when a SIGUSR2 is sent to kueue, regardless of the log level. (#1528, @alculquicondor)
+- Changing tolerations in an inadmissible job triggers an admission retry with the updated tolerations. (#1304, @stuton)
+- Increase the default number of reconcilers for Pod and Workload objects to 5, each. (#1589, @alculquicondor)
+- Jobs preserve their position in the queue if the number of pods change before being admitted (#1223, @yaroslava-serdiuk)
+- Make the image build setting CGO_ENABLED configurable (#1391, @anishasthana)
+- MultiKueue: Add live status updates for multikueue JobSets (#1668, @trasc)
+- MultiKueue: Support for JobSets. (#1606, @trasc)
+- Support RayCluster as a queue-able workload in Kueue (#1520, @vicentefb)
+- Support for retry of provisioning request.
+  
+  When `ProvisioningACC` is enabled, and there are existing ProvisioningRequests, they are going to be recreated.
+  This may cause job evictions for some long-running jobs which were using the ProvisioningRequests. (#1351, @mimowo)
+- The image gcr.io/k8s-staging-kueue/debug:main, along with the script ./hack/dump_cache.sh can be used to trigger a dump of the internal cache into the logs. (#1541, @alculquicondor)
+- The priority sorting within the cohort could be disabled by setting the feature gate PrioritySortingWithinCohort to false (#1406, @yaroslava-serdiuk)
+- Visibility API: Add HA support. (#1554, @astefanutti)
+
+### Bug or Regression
+
+- Add Missing RBAC on finalizer sub-resources for job integrations. (#1486, @astefanutti)
+- Add Mutating WebhookConfigurations for the AdmissionCheck, RayJob, and JobSet to helm charts (#1567, @B1F030)
+- Add Validating/Mutating WebhookConfigurations for the KubeflowJobs like PyTorchJob (#1460, @tenzen-y)
+- Added event for QuotaReserved and fixed event for Admitted to trigger when admission checks complete (#1436, @trasc)
+- Avoid finished Workloads from blocking quota after a Kueue restart (#1689, @trasc)
+- Avoid recreating a Workload for a finished Job and finalize a job when the workload is declared finished. (#1383, @achernevskii)
+- Do not (re)create ProvReq if the state of admission check is Ready (#1617, @mimowo)
+- Fix Kueue crashing at the log level 6 when re-admitting workloads (#1644, @mimowo)
+- Fix a bug in the pod integration that unexpected errors will occur when the pod isn't found (#1512, @achernevskii)
+- Fix a bug that plain pods managed by kueue will remain in a terminating state, due to a finalizer (#1342, @tenzen-y)
+- Fix client-go libraries bug that can not operate clusterScoped resources like ClusterQueue and ResourceFlavor. (#1294, @tenzen-y)
+- Fix fungibility policy `Preempt` where it was not able to utilize the next flavor if preemption was not possible. (#1366, @alculquicondor)
+- Fix handling of preemption within a cohort when there is no borrowingLimit. In that case,
+  during preemption, the permitted resources to borrow were calculated as if borrowingLimit=0, instead of unlimited.
+  
+  As a consequence, when using `reclaimWithinCohort`, it was possible that a workload, scheduled to ClusterQueue with no borrowingLimit, would preempt more workloads than needed, even though it could fit by borrowing. (#1561, @mimowo)
+- Fix the synchronization of the admission check state based on recreated ProvisioningRequest (#1585, @mimowo)
+- Fixed fungibility policy `whenCanPreempt: Preempt`. The admission should happen in the flavor for which preemptions were issued. (#1332, @alculquicondor)
+- Kueue replicas are advertised as Ready only once the webhooks are functional.
+  
+  This allows users to wait with the first requests until the Kueue deployment is available, so that the 
+  early requests don't fail. (#1676, @mimowo)
+- Pending workload from StrictFIFO ClusterQueue doesn't block borrowing from other ClusterQueues (#1399, @yaroslava-serdiuk)
+- Remove deleted pending workloads from the cache (#1679, @astefanutti)
+- Remove finalizer from Workloads that are orphaned (have no owners). (#1523, @achernevskii)
+- Trigger an eviction for an admitted Job after an admission check changed state to Rejected. (#1562, @trasc)
+- Webhooks are served in non-leading replicas (#1509, @astefanutti)
+
+### Other (Cleanup or Flake)
+
+- Expose utilization functions to setup jobframework reconcilers and webhooks (#1630, @tenzen-y)
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 425a1945ef..cb919826f9 100644
--- a/Makefile
+++ b/Makefile
@@ -83,8 +83,8 @@ LD_FLAGS += -X '$(version_pkg).GitCommit=$(shell git rev-parse HEAD)'
 
 # Update these variables when preparing a new release or a release branch.
 # Then run `make prepare-release-branch`
-RELEASE_VERSION=v0.5.3
-RELEASE_BRANCH=main
+RELEASE_VERSION=v0.6.0
+RELEASE_BRANCH=release-0.6
 
 .PHONY: all
 all: generate fmt vet build
diff --git a/README.md b/README.md
index bd92508549..5ce449faf5 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Read the [overview](https://kueue.sigs.k8s.io/docs/overview/) to learn more.
 To install the latest release of Kueue in your cluster, run the following command:
 
 ```shell
-kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.5.3/manifests.yaml
+kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.0/manifests.yaml
 ```
 
 The controller runs in the `kueue-system` namespace.
diff --git a/charts/kueue/Chart.yaml b/charts/kueue/Chart.yaml
index 69476c0564..3c9ed1ba6d 100644
--- a/charts/kueue/Chart.yaml
+++ b/charts/kueue/Chart.yaml
@@ -18,4 +18,4 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "v0.5.3"
+appVersion: "v0.6.0"
diff --git a/config/components/manager/kustomization.yaml b/config/components/manager/kustomization.yaml
index b7b7487da3..8848b738b3 100644
--- a/config/components/manager/kustomization.yaml
+++ b/config/components/manager/kustomization.yaml
@@ -17,4 +17,4 @@ kind: Kustomization
 images:
 - name: controller
   newName: gcr.io/k8s-staging-kueue/kueue
-  newTag: main
+  newTag: release-0.6
diff --git a/site/config.toml b/site/config.toml
index 882e741af2..a90adbddd7 100644
--- a/site/config.toml
+++ b/site/config.toml
@@ -92,7 +92,7 @@ ignoreFiles = []
   # The major.minor version tag for the version of the docs represented in this
   # branch of the repository. Used in the "version-banner" partial to display a
   # version number for this doc set.
-  version = "v0.5.3"
+  version = "v0.6.0"
 
   # Flag used in the "version-banner" partial to decide whether to display a
   # banner on every page indicating that this is an archived version of the docs.

From 14f20be73c550027d7e1565ea39c293598cf64d9 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Wed, 14 Feb 2024 09:03:09 -0800
Subject: [PATCH 03/48] Fix filename for openvex artifact (#1735)

Change-Id: I2b2aa88eca55d3385cb34cf141b5da3c902c28d7

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 .github/workflows/openvex.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/openvex.yaml b/.github/workflows/openvex.yaml
index 916faddce1..d4bd909b0c 100644
--- a/.github/workflows/openvex.yaml
+++ b/.github/workflows/openvex.yaml
@@ -24,9 +24,9 @@ jobs:
         name: Run vexctl
         with:
           product: pkg:golang/sigs.k8s.io/kueue@${{ env.TAG }}
-          file: /tmp/kueve.vex.json
+          file: /tmp/kueue.vex.json
       - name: Upload openvex data
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          gh release upload $TAG /tmp/kueve.vex.json
+          gh release upload $TAG /tmp/kueue.vex.json

From 58ac69d5b5de8ed8eb5af64de811f6e65b7b1ca8 Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Tue, 20 Feb 2024 12:33:44 -0500
Subject: [PATCH 04/48] Exclude labels, image, preemptionPolicy from pod role
 hash calculation (#1760)

Change-Id: I19ab4c259818b0ba0d25dd3346025c343a757ccc
---
 pkg/controller/jobs/pod/pod_controller.go     |   4 -
 .../jobs/pod/pod_controller_test.go           | 146 +++++++++---------
 pkg/controller/jobs/pod/pod_webhook.go        |  13 --
 pkg/controller/jobs/pod/pod_webhook_test.go   |   4 +-
 .../jobs/pod/pod_controller_test.go           |  90 +++++++----
 5 files changed, 138 insertions(+), 119 deletions(-)

diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index 9cc4a0be44..fafaaee379 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -554,9 +554,6 @@ func getRoleHash(p corev1.Pod) (string, error) {
 	}
 
 	shape := map[string]interface{}{
-		"metadata": map[string]interface{}{
-			"labels": omitKueueLabels(p.ObjectMeta.Labels),
-		},
 		"spec": map[string]interface{}{
 			"initContainers":            containersShape(p.Spec.InitContainers),
 			"containers":                containersShape(p.Spec.Containers),
@@ -565,7 +562,6 @@ func getRoleHash(p corev1.Pod) (string, error) {
 			"tolerations":               p.Spec.Tolerations,
 			"runtimeClassName":          p.Spec.RuntimeClassName,
 			"priority":                  p.Spec.Priority,
-			"preemptionPolicy":          p.Spec.PreemptionPolicy,
 			"topologySpreadConstraints": p.Spec.TopologySpreadConstraints,
 			"overhead":                  p.Spec.Overhead,
 			"resourceClaims":            p.Spec.ResourceClaims,
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index 14205360d4..558995beb6 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -532,7 +532,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Obj(),
@@ -596,7 +596,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Obj(),
@@ -610,7 +610,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Obj(),
@@ -635,7 +635,6 @@ func TestReconciler(t *testing.T) {
 					KueueSchedulingGate().
 					Group("test-group").
 					GroupTotalCount("2").
-					Annotation("kueue.x-k8s.io/role-hash", "60bc72d3").
 					Obj(),
 				*basePodWrapper.
 					Clone().
@@ -645,7 +644,6 @@ func TestReconciler(t *testing.T) {
 					KueueSchedulingGate().
 					Group("test-group").
 					GroupTotalCount("2").
-					Annotation("kueue.x-k8s.io/role-hash", "60bc72d3").
 					Obj(),
 			},
 			wantPods: []corev1.Pod{
@@ -655,7 +653,6 @@ func TestReconciler(t *testing.T) {
 					KueueFinalizer().
 					Group("test-group").
 					GroupTotalCount("2").
-					Annotation("kueue.x-k8s.io/role-hash", "60bc72d3").
 					NodeSelector("kubernetes.io/arch", "arm64").
 					Obj(),
 				*basePodWrapper.
@@ -665,17 +662,16 @@ func TestReconciler(t *testing.T) {
 					KueueFinalizer().
 					Group("test-group").
 					GroupTotalCount("2").
-					Annotation("kueue.x-k8s.io/role-hash", "60bc72d3").
 					NodeSelector("kubernetes.io/arch", "arm64").
 					Obj(),
 			},
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					ReserveQuota(
-						utiltesting.MakeAdmission("cq", "60bc72d3").
+						utiltesting.MakeAdmission("cq", "dc85db45").
 							Assignment(corev1.ResourceCPU, "unit-test-flavor", "2").
 							AssignmentPodCount(2).
 							Obj(),
@@ -685,11 +681,11 @@ func TestReconciler(t *testing.T) {
 			},
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					ReserveQuota(
-						utiltesting.MakeAdmission("cq", "60bc72d3").
+						utiltesting.MakeAdmission("cq", "dc85db45").
 							Assignment(corev1.ResourceCPU, "unit-test-flavor", "2").
 							AssignmentPodCount(2).
 							Obj(),
@@ -754,7 +750,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -768,7 +764,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -777,7 +773,7 @@ func TestReconciler(t *testing.T) {
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					Admitted(true).
-					ReclaimablePods(kueue.ReclaimablePod{Name: "60bc72d3", Count: 1}).
+					ReclaimablePods(kueue.ReclaimablePod{Name: "dc85db45", Count: 1}).
 					Obj(),
 			},
 			workloadCmpOpts: defaultWorkloadCmpOpts,
@@ -822,7 +818,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -836,7 +832,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -881,7 +877,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -894,7 +890,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -951,7 +947,7 @@ func TestReconciler(t *testing.T) {
 			},
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					Queue("test-queue").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
@@ -966,7 +962,7 @@ func TestReconciler(t *testing.T) {
 			},
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					Queue("test-queue").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
@@ -1004,7 +1000,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1023,7 +1019,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1121,7 +1117,7 @@ func TestReconciler(t *testing.T) {
 			wantPods: []corev1.Pod{},
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					ControllerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					ControllerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					Queue("test-queue").
@@ -1192,28 +1188,28 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
 					Queue("user-queue").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
-					ReserveQuota(utiltesting.MakeAdmission("cq", "60bc72d3").AssignmentPodCount(1).Obj()).
+					ReserveQuota(utiltesting.MakeAdmission("cq", "dc85db45").AssignmentPodCount(1).Obj()).
 					Admitted(true).
 					Obj(),
 			},
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
 					Queue("user-queue").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
-					ReserveQuota(utiltesting.MakeAdmission("cq", "60bc72d3").AssignmentPodCount(1).Obj()).
+					ReserveQuota(utiltesting.MakeAdmission("cq", "dc85db45").AssignmentPodCount(1).Obj()).
 					Admitted(true).
 					Obj(),
 			},
@@ -1268,17 +1264,17 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
 					Queue("user-queue").
-					ReserveQuota(utiltesting.MakeAdmission("cq", "60bc72d3").AssignmentPodCount(3).Obj()).
+					ReserveQuota(utiltesting.MakeAdmission("cq", "dc85db45").AssignmentPodCount(3).Obj()).
 					Admitted(true).
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod3", "test-uid").
-					ReclaimablePods(kueue.ReclaimablePod{Name: "60bc72d3", Count: 1}).
+					ReclaimablePods(kueue.ReclaimablePod{Name: "dc85db45", Count: 1}).
 					Obj(),
 			},
 			wantPods: []corev1.Pod{
@@ -1320,14 +1316,14 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
 					Queue("user-queue").
-					ReserveQuota(utiltesting.MakeAdmission("cq", "60bc72d3").AssignmentPodCount(3).Obj()).
+					ReserveQuota(utiltesting.MakeAdmission("cq", "dc85db45").AssignmentPodCount(3).Obj()).
 					ReclaimablePods(kueue.ReclaimablePod{
-						Name:  "60bc72d3",
+						Name:  "dc85db45",
 						Count: 1,
 					}).
 					Admitted(true).
@@ -1335,7 +1331,7 @@ func TestReconciler(t *testing.T) {
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod3", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "replacement", "test-uid").
-					ReclaimablePods(kueue.ReclaimablePod{Name: "60bc72d3", Count: 1}).
+					ReclaimablePods(kueue.ReclaimablePod{Name: "dc85db45", Count: 1}).
 					Obj(),
 			},
 			workloadCmpOpts: defaultWorkloadCmpOpts,
@@ -1411,7 +1407,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1426,7 +1422,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1480,7 +1476,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1500,7 +1496,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1545,7 +1541,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1565,7 +1561,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1655,7 +1651,7 @@ func TestReconciler(t *testing.T) {
 			},
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
-					PodSets(*utiltesting.MakePodSet("60bc72d3", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					PodSets(*utiltesting.MakePodSet("dc85db45", 2).Request(corev1.ResourceCPU, "1").Obj()).
 					Queue("test-queue").
 					Priority(0).
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
@@ -1716,7 +1712,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1730,7 +1726,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1770,7 +1766,7 @@ func TestReconciler(t *testing.T) {
 						*utiltesting.MakePodSet("absent-pod-role", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1786,7 +1782,7 @@ func TestReconciler(t *testing.T) {
 						*utiltesting.MakePodSet("absent-pod-role", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -1882,7 +1878,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
@@ -1980,7 +1976,7 @@ func TestReconciler(t *testing.T) {
 					Label("kueue.x-k8s.io/managed", "true").
 					KueueFinalizer().
 					Group("test-group").
-					Image("test-image-role2", nil).
+					Request(corev1.ResourceMemory, "1Gi").
 					GroupTotalCount("3").
 					StatusPhase(corev1.PodFailed).
 					Obj(),
@@ -2007,7 +2003,7 @@ func TestReconciler(t *testing.T) {
 					Name("pod3").
 					Label("kueue.x-k8s.io/managed", "true").
 					Group("test-group").
-					Image("test-image-role2", nil).
+					Request(corev1.ResourceMemory, "1Gi").
 					GroupTotalCount("3").
 					StatusPhase(corev1.PodFailed).
 					Obj(),
@@ -2015,10 +2011,11 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("4389b941", 1).
+						*utiltesting.MakePodSet("a119f908", 1).
 							Request(corev1.ResourceCPU, "1").
+							Request(corev1.ResourceMemory, "1Gi").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2033,10 +2030,11 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("4389b941", 1).
+						*utiltesting.MakePodSet("a119f908", 1).
 							Request(corev1.ResourceCPU, "1").
+							Request(corev1.ResourceMemory, "1Gi").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2091,7 +2089,7 @@ func TestReconciler(t *testing.T) {
 					Label("kueue.x-k8s.io/managed", "true").
 					KueueFinalizer().
 					Group("test-group").
-					Image("test-image-role2", nil).
+					Request(corev1.ResourceMemory, "1Gi").
 					GroupTotalCount("3").
 					StatusPhase(corev1.PodSucceeded).
 					Obj(),
@@ -2120,7 +2118,7 @@ func TestReconciler(t *testing.T) {
 					Label("kueue.x-k8s.io/managed", "true").
 					KueueFinalizer().
 					Group("test-group").
-					Image("test-image-role2", nil).
+					Request(corev1.ResourceMemory, "1Gi").
 					GroupTotalCount("3").
 					StatusPhase(corev1.PodSucceeded).
 					Obj(),
@@ -2128,10 +2126,11 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("007b32e9", 1).
+						*utiltesting.MakePodSet("4ebdd4a6", 1).
 							Request(corev1.ResourceCPU, "1").
+							Request(corev1.ResourceMemory, "1Gi").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2146,10 +2145,11 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("007b32e9", 1).
+						*utiltesting.MakePodSet("4ebdd4a6", 1).
 							Request(corev1.ResourceCPU, "1").
+							Request(corev1.ResourceMemory, "1Gi").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2159,7 +2159,7 @@ func TestReconciler(t *testing.T) {
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod2", "test-uid").
 					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "pod3", "test-uid").
-					ReclaimablePods(kueue.ReclaimablePod{Name: "007b32e9", Count: 1}).
+					ReclaimablePods(kueue.ReclaimablePod{Name: "4ebdd4a6", Count: 1}).
 					Obj(),
 			},
 			workloadCmpOpts: defaultWorkloadCmpOpts,
@@ -2201,7 +2201,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Obj(),
@@ -2284,7 +2284,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2297,7 +2297,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2356,7 +2356,7 @@ func TestReconciler(t *testing.T) {
 						*utiltesting.MakePodSet("aaf269e6", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2372,7 +2372,7 @@ func TestReconciler(t *testing.T) {
 						*utiltesting.MakePodSet("aaf269e6", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2462,7 +2462,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2474,7 +2474,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2543,7 +2543,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2558,7 +2558,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 2).
+						*utiltesting.MakePodSet("dc85db45", 2).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2631,7 +2631,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").Finalizers(kueue.ResourceInUseFinalizerName).
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 1).
+						*utiltesting.MakePodSet("dc85db45", 1).
 							Request(corev1.ResourceCPU, "1").
 							SchedulingGates(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}).
 							Obj(),
@@ -2760,7 +2760,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2774,7 +2774,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2866,7 +2866,7 @@ func TestReconciler(t *testing.T) {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
@@ -2886,7 +2886,7 @@ func TestReconciler(t *testing.T) {
 			wantWorkloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("test-group", "ns").
 					PodSets(
-						*utiltesting.MakePodSet("60bc72d3", 3).
+						*utiltesting.MakePodSet("dc85db45", 3).
 							Request(corev1.ResourceCPU, "1").
 							Obj(),
 					).
diff --git a/pkg/controller/jobs/pod/pod_webhook.go b/pkg/controller/jobs/pod/pod_webhook.go
index 3a090680f3..b81ae06563 100644
--- a/pkg/controller/jobs/pod/pod_webhook.go
+++ b/pkg/controller/jobs/pod/pod_webhook.go
@@ -20,7 +20,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"strings"
 
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/validation"
@@ -105,21 +104,9 @@ func getPodOptions(integrationOpts map[string]any) (configapi.PodIntegrationOpti
 
 var _ webhook.CustomDefaulter = &PodWebhook{}
 
-func omitKueueLabels(l map[string]string) map[string]string {
-	result := map[string]string{}
-
-	for key, value := range l {
-		if !strings.HasPrefix(key, "kueue.x-k8s.io/") {
-			result[key] = value
-		}
-	}
-	return result
-}
-
 func containersShape(containers []corev1.Container) (result []map[string]interface{}) {
 	for _, c := range containers {
 		result = append(result, map[string]interface{}{
-			"image": c.Image,
 			"resources": map[string]interface{}{
 				"requests": c.Resources.Requests,
 			},
diff --git a/pkg/controller/jobs/pod/pod_webhook_test.go b/pkg/controller/jobs/pod/pod_webhook_test.go
index 2bd4f6759d..f2f8d696ba 100644
--- a/pkg/controller/jobs/pod/pod_webhook_test.go
+++ b/pkg/controller/jobs/pod/pod_webhook_test.go
@@ -233,7 +233,7 @@ func TestDefault(t *testing.T) {
 			want: testingpod.MakePod("test-pod", defaultNamespace.Name).
 				Queue("test-queue").
 				Group("test-group").
-				RoleHash("3b4907a8").
+				RoleHash("a9f06f3a").
 				Label("kueue.x-k8s.io/managed", "true").
 				KueueSchedulingGate().
 				KueueFinalizer().
@@ -250,7 +250,7 @@ func TestDefault(t *testing.T) {
 			want: testingpod.MakePod("test-pod", defaultNamespace.Name).
 				Queue("test-queue").
 				Group("test-group").
-				RoleHash("3b4907a8").
+				RoleHash("a9f06f3a").
 				Label("kueue.x-k8s.io/managed", "true").
 				KueueSchedulingGate().
 				KueueFinalizer().
diff --git a/test/integration/controller/jobs/pod/pod_controller_test.go b/test/integration/controller/jobs/pod/pod_controller_test.go
index 8a0f42a469..5e5333113f 100644
--- a/test/integration/controller/jobs/pod/pod_controller_test.go
+++ b/test/integration/controller/jobs/pod/pod_controller_test.go
@@ -29,6 +29,7 @@ import (
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 
@@ -503,7 +504,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				pod2 := testingpod.MakePod("test-pod2", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
-					Image("test-image", nil).
+					Request(corev1.ResourceCPU, "1").
 					Queue("test-queue").
 					Obj()
 				pod1LookupKey := client.ObjectKeyFromObject(pod1)
@@ -529,15 +530,27 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set")
 
 				ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() {
-					admission := testing.MakeAdmission(clusterQueue.Name, "7c214403", "41b86f81").
-						Assignment(corev1.ResourceCPU, "default", "1").
-						AssignmentPodCount(2).
-						Obj()
+					admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
+						kueue.PodSetAssignment{
+							Name: "4b0469f7",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+						kueue.PodSetAssignment{
+							Name: "bf90803c",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+					).Obj()
 					gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
 					util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
 
 					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
-					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, nil)
+					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 
 					gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed())
 					gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo(
@@ -602,7 +615,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set")
 				originalWorkloadUID := createdWorkload.UID
 
-				admission := testing.MakeAdmission(clusterQueue.Name, "7c214403").
+				admission := testing.MakeAdmission(clusterQueue.Name, "bf90803c").
 					Assignment(corev1.ResourceCPU, "default", "1").
 					AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count).
 					Obj()
@@ -724,7 +737,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set")
 
 				ginkgo.By("checking that pod is unsuspended when workload is admitted")
-				admission := testing.MakeAdmission(clusterQueue.Name, "7c214403").
+				admission := testing.MakeAdmission(clusterQueue.Name, "bf90803c").
 					Assignment(corev1.ResourceCPU, "default", "1").
 					AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count).
 					Obj()
@@ -787,7 +800,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				pod2 := testingpod.MakePod("test-pod2", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
-					Image("test-image", nil).
+					Request(corev1.ResourceCPU, "1").
 					Queue("test-queue").
 					Obj()
 				pod1LookupKey := client.ObjectKeyFromObject(pod1)
@@ -813,15 +826,27 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 
 				createdPod := &corev1.Pod{}
 				ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() {
-					admission := testing.MakeAdmission(clusterQueue.Name, "7c214403", "41b86f81").
-						Assignment(corev1.ResourceCPU, "default", "1").
-						AssignmentPodCount(2).
-						Obj()
+					admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
+						kueue.PodSetAssignment{
+							Name: "4b0469f7",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+						kueue.PodSetAssignment{
+							Name: "bf90803c",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+					).Obj()
 					gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
 					util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
 
 					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
-					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, nil)
+					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 
 					gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed())
 					gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo(
@@ -861,7 +886,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				replacementPod2LookupKey := client.ObjectKeyFromObject(replacementPod2)
 
 				ginkgo.By("checking that unretriable replacement pod is allowed to run", func() {
-					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, nil)
+					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 				})
 
 				ginkgo.By("checking that pod group is finalized when unretriable pod has failed", func() {
@@ -894,13 +919,13 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				pod2 := testingpod.MakePod("test-pod2", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
-					Image("test-image", nil).
+					Request(corev1.ResourceCPU, "1").
 					Queue("test-queue").
 					Obj()
 				excessBasePod := testingpod.MakePod("excess-pod", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
-					Image("test-image", nil).
+					Request(corev1.ResourceCPU, "1").
 					Queue("test-queue")
 
 				pod1LookupKey := client.ObjectKeyFromObject(pod1)
@@ -940,15 +965,27 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				})
 
 				ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() {
-					admission := testing.MakeAdmission(clusterQueue.Name, "7c214403", "41b86f81").
-						Assignment(corev1.ResourceCPU, "default", "1").
-						AssignmentPodCount(2).
-						Obj()
+					admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
+						kueue.PodSetAssignment{
+							Name: "4b0469f7",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+						kueue.PodSetAssignment{
+							Name: "bf90803c",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](1),
+						},
+					).Obj()
 					gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
 					util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
 
 					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
-					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, nil)
+					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 
 					gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed())
 					gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo(
@@ -975,12 +1012,13 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				pod1 := testingpod.MakePod("test-pod1", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
+					Request(corev1.ResourceCPU, "1").
 					Queue("test-queue").
 					Obj()
 				pod2 := testingpod.MakePod("test-pod2", ns.Name).
 					Group("test-group").
 					GroupTotalCount("2").
-					Image("test-image", nil).
+					Request(corev1.ResourceCPU, "2").
 					Queue("test-queue").
 					Obj()
 
@@ -1128,22 +1166,20 @@ var _ = ginkgo.Describe("Pod controller interacting with scheduler", ginkgo.Orde
 		role1Pod1 := basePod.
 			Clone().
 			Name("role1-pod1").
-			Image("role1-image", nil).
 			Obj()
 		role1Pod2 := basePod.
 			Clone().
 			Name("role1-pod2").
-			Image("role1-image", nil).
 			Obj()
 		role2Pod1 := basePod.
 			Clone().
 			Name("role2-pod1").
-			Image("role2-image", nil).
+			Request(corev1.ResourceCPU, "1.5").
 			Obj()
 		role2Pod2 := basePod.
 			Clone().
 			Name("role2-pod2").
-			Image("role2-image", nil).
+			Request(corev1.ResourceCPU, "1.5").
 			Obj()
 
 		ginkgo.By("creating the pods", func() {

From 336fecc98a6c63e2959ad12eef38cce0896c9e79 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Thu, 22 Feb 2024 00:38:23 -0800
Subject: [PATCH 05/48] [release-0.6] Don't deploy visibility by default
 (#1764)

* Don't deploy visibility by default

* Review Remarks

* Review Remarks

* Review Remarks

---------

Co-authored-by: Traian Schiau <traian_schiau@epam.com>
---
 charts/kueue/templates/_helpers.tpl            | 18 ++++++++++++++++++
 .../kueue/templates/visibility/apiservice.yaml |  2 ++
 .../templates/visibility/role_binding.yaml     |  2 ++
 charts/kueue/templates/visibility/service.yaml |  2 ++
 config/alpha-enabled/kustomization.yaml        |  1 +
 config/default/kustomization.yaml              |  1 -
 config/visibility/kustomization.yaml           |  7 +++++++
 hack/update-helm.sh                            |  7 +++++++
 test/e2e/config/kustomization.yaml             |  1 +
 test/e2e/config_1_26/kustomization.yaml        |  1 +
 10 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 config/visibility/kustomization.yaml

diff --git a/charts/kueue/templates/_helpers.tpl b/charts/kueue/templates/_helpers.tpl
index d207db1faa..9b0a8a5606 100644
--- a/charts/kueue/templates/_helpers.tpl
+++ b/charts/kueue/templates/_helpers.tpl
@@ -74,3 +74,21 @@ FeatureGates
 - --feature-gates={{ $features | trimSuffix "," }}
 {{- end }}
 {{- end }}
+
+{{/*
+IsFeatureGateEnabled - outputs true if the feature gate .Feature is enabled in the .List
+Usage:
+  {{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }}
+*/}}
+{{- define "kueue.isFeatureGateEnabled" -}}
+{{- $feature := .Feature }}
+{{- $enabled := false }}
+{{- range .List }}
+{{- if (and (eq .name $feature) (eq .enabled true)) }}
+{{- $enabled = true }}
+{{- end }}
+{{- end }}
+{{- if $enabled }}
+{{- $enabled -}}
+{{- end }}
+{{- end }}
diff --git a/charts/kueue/templates/visibility/apiservice.yaml b/charts/kueue/templates/visibility/apiservice.yaml
index 31cbdf453a..497cea57f5 100644
--- a/charts/kueue/templates/visibility/apiservice.yaml
+++ b/charts/kueue/templates/visibility/apiservice.yaml
@@ -1,3 +1,4 @@
+{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }}
 apiVersion: apiregistration.k8s.io/v1
 kind: APIService
 metadata:
@@ -11,3 +12,4 @@ spec:
     namespace: '{{ .Release.Namespace }}'
   version: v1alpha1
   versionPriority: 100
+{{- end }}
diff --git a/charts/kueue/templates/visibility/role_binding.yaml b/charts/kueue/templates/visibility/role_binding.yaml
index f67ff03225..7cdcc3e376 100644
--- a/charts/kueue/templates/visibility/role_binding.yaml
+++ b/charts/kueue/templates/visibility/role_binding.yaml
@@ -1,3 +1,4 @@
+{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }}
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
@@ -13,3 +14,4 @@ subjects:
   - kind: ServiceAccount
     name: kueue-controller-manager
     namespace: '{{ .Release.Namespace }}'
+{{- end }}
diff --git a/charts/kueue/templates/visibility/service.yaml b/charts/kueue/templates/visibility/service.yaml
index fd88e4f986..662de0024a 100644
--- a/charts/kueue/templates/visibility/service.yaml
+++ b/charts/kueue/templates/visibility/service.yaml
@@ -1,3 +1,4 @@
+{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }}
 apiVersion: v1
 kind: Service
 metadata:
@@ -11,3 +12,4 @@ spec:
       targetPort: 8082
   selector:
     control-plane: controller-manager
+{{- end }}
diff --git a/config/alpha-enabled/kustomization.yaml b/config/alpha-enabled/kustomization.yaml
index 21e4b163d9..ad4c4899e3 100644
--- a/config/alpha-enabled/kustomization.yaml
+++ b/config/alpha-enabled/kustomization.yaml
@@ -3,6 +3,7 @@
 # Use default settings as a base.
 resources:
 - ../default
+- ../visibility
 
 patches:
 # Modify feature gates to enable AllAlpha=true
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
index f4afb3eade..29eebb3b76 100644
--- a/config/default/kustomization.yaml
+++ b/config/default/kustomization.yaml
@@ -17,7 +17,6 @@ resources:
 - ../components/rbac
 - ../components/manager
 - ../components/internalcert
-- ../components/visibility
 # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
 # ../components/crd/kustomization.yaml
 - ../components/webhook
diff --git a/config/visibility/kustomization.yaml b/config/visibility/kustomization.yaml
new file mode 100644
index 0000000000..8dbcc994d9
--- /dev/null
+++ b/config/visibility/kustomization.yaml
@@ -0,0 +1,7 @@
+# This overlay builds the visibility component to be used in combination
+# with other overlays.
+
+namespace: kueue-system
+namePrefix: kueue-
+resources:
+- ../components/visibility
diff --git a/hack/update-helm.sh b/hack/update-helm.sh
index b726593c2f..d258fcbe21 100755
--- a/hack/update-helm.sh
+++ b/hack/update-helm.sh
@@ -269,4 +269,11 @@ for output_file in ${DEST_VISIBILITY_DIR}/*.yaml; do
   if [ "$(cat $output_file | $YQ '.subjects.[] | has("namespace")')" = "true" ]; then
     $YQ -N -i '.subjects.[].namespace = "{{ .Release.Namespace }}"' $output_file
   fi
+
+  {
+  echo '{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }}'
+  cat $output_file
+  echo "{{- end }}"
+  }> ${output_file}.tmp
+  mv ${output_file}.tmp ${output_file}
 done
diff --git a/test/e2e/config/kustomization.yaml b/test/e2e/config/kustomization.yaml
index 56657fe34a..064f203d8b 100644
--- a/test/e2e/config/kustomization.yaml
+++ b/test/e2e/config/kustomization.yaml
@@ -3,6 +3,7 @@ kind: Kustomization
 
 resources:
 - ../../../config/dev
+- ../../../config/visibility
 
 replicas:
 - name: kueue-controller-manager
diff --git a/test/e2e/config_1_26/kustomization.yaml b/test/e2e/config_1_26/kustomization.yaml
index 11d3f3eecb..70be6af201 100644
--- a/test/e2e/config_1_26/kustomization.yaml
+++ b/test/e2e/config_1_26/kustomization.yaml
@@ -3,6 +3,7 @@ kind: Kustomization
 
 resources:
 - ../../../config/dev
+- ../../../config/visibility
 
 patches:
 - path: manager_e2e_patch.yaml

From fe048de9570ebd152135e394a501345c2ae21d38 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Mon, 26 Feb 2024 09:53:45 -0800
Subject: [PATCH 06/48] Remove check for CapacityAvailable ProvReq condition
 (#1771)

Co-authored-by: Yaroslava Serdiuk <yaroslava@google.com>
---
 .../provisioning/controller.go                |  7 ++---
 .../provisioning/controller_test.go           | 31 -------------------
 2 files changed, 3 insertions(+), 35 deletions(-)

diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go
index d55c72ebb9..f94ce6f8ee 100644
--- a/pkg/controller/admissionchecks/provisioning/controller.go
+++ b/pkg/controller/admissionchecks/provisioning/controller.go
@@ -478,9 +478,8 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
 			}
 
 			prFailed := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Failed)
-			prAccepted := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Provisioned)
-			prAvailable := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.CapacityAvailable)
-			log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl), "check", check, "prName", pr.Name, "failed", prFailed, "accepted", prAccepted, "available", prAvailable)
+			prProvisioned := apimeta.IsStatusConditionTrue(pr.Status.Conditions, autoscaling.Provisioned)
+			log.V(3).Info("Synchronizing admission check state based on provisioning request", "wl", klog.KObj(wl), "check", check, "prName", pr.Name, "failed", prFailed, "accepted", prProvisioned)
 
 			switch {
 			case prFailed:
@@ -497,7 +496,7 @@ func (c *Controller) syncCheckStates(ctx context.Context, wl *kueue.Workload, ch
 						checkState.Message = apimeta.FindStatusCondition(pr.Status.Conditions, autoscaling.Failed).Message
 					}
 				}
-			case prAccepted || prAvailable:
+			case prProvisioned:
 				if checkState.State != kueue.CheckStateReady {
 					updated = true
 					checkState.State = kueue.CheckStateReady
diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go
index 9a8d66774b..673c42a0ba 100644
--- a/pkg/controller/admissionchecks/provisioning/controller_test.go
+++ b/pkg/controller/admissionchecks/provisioning/controller_test.go
@@ -479,37 +479,6 @@ func TestReconcile(t *testing.T) {
 					Obj(),
 			},
 		},
-		"when capacity is available": {
-			workload: baseWorkload.DeepCopy(),
-			checks:   []kueue.AdmissionCheck{*baseCheck.DeepCopy()},
-			flavors:  []kueue.ResourceFlavor{*baseFlavor1.DeepCopy(), *baseFlavor2.DeepCopy()},
-			configs:  []kueue.ProvisioningRequestConfig{*baseConfig.DeepCopy()},
-			requests: []autoscaling.ProvisioningRequest{
-				*requestWithCondition(baseRequest, autoscaling.CapacityAvailable, metav1.ConditionTrue),
-			},
-			templates: []corev1.PodTemplate{*baseTemplate1.DeepCopy(), *baseTemplate2.DeepCopy()},
-			wantWorkloads: map[string]*kueue.Workload{
-				baseWorkload.Name: (&utiltesting.WorkloadWrapper{Workload: *baseWorkload.DeepCopy()}).
-					AdmissionChecks(kueue.AdmissionCheckState{
-						Name:  "check1",
-						State: kueue.CheckStateReady,
-						PodSetUpdates: []kueue.PodSetUpdate{
-							{
-								Name:        "ps1",
-								Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"},
-							},
-							{
-								Name:        "ps2",
-								Annotations: map[string]string{"cluster-autoscaler.kubernetes.io/consume-provisioning-request": "wl-check1-1"},
-							},
-						},
-					}, kueue.AdmissionCheckState{
-						Name:  "not-provisioning",
-						State: kueue.CheckStatePending,
-					}).
-					Obj(),
-			},
-		},
 		"when request is provisioned": {
 			workload: baseWorkload.DeepCopy(),
 			checks:   []kueue.AdmissionCheck{*baseCheck.DeepCopy()},

From 80adb72ba5553e21fc9b43269fd231a256cd4c7a Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 27 Feb 2024 07:48:39 -0800
Subject: [PATCH 07/48] [release-0.6] Parallelize integration tests (#1775)

* Parallelize integration tests

* Create parameter INTEGRATION_NPROCS

* test INTEGRATION_NPROCS=2

* test INTEGRATION_NPROCS=8

* revert INTEGRATION_NPROCS

---------

Co-authored-by: Gabe <15304068+gabesaba@users.noreply.github.com>
---
 Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index cb919826f9..3acfe1016a 100644
--- a/Makefile
+++ b/Makefile
@@ -71,6 +71,11 @@ E2E_K8S_VERSIONS ?= 1.26.12 1.27.9 1.28.5 1.29.0
 # Default will delete default kind cluster
 KIND_CLUSTER_NAME ?= kind
 
+# Number of processes to use during integration tests to run specs within a
+# suite in parallel. Suites still run sequentially. User may set this value to 1
+# to run without parallelism.
+INTEGRATION_NPROCS ?= 4
+
 # Setting SHELL to bash allows bash commands to be executed by recipes.
 # This is a requirement for 'setup-envtest.sh' in the test target.
 # Options are set to exit when a recipe line exits non-zero or a piped command fails.
@@ -167,7 +172,7 @@ test: gotestsum ## Run tests.
 .PHONY: test-integration
 test-integration: gomod-download envtest ginkgo mpi-operator-crd ray-operator-crd jobset-operator-crd kf-training-operator-crd  cluster-autoscaler-crd ## Run tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \
-	$(GINKGO) $(GINKGO_ARGS) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)
+	$(GINKGO) $(GINKGO_ARGS) -procs=$(INTEGRATION_NPROCS) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)
 
 CREATE_KIND_CLUSTER ?= true
 .PHONY: test-e2e

From 37774405bb10d5b0dd8d3768c20c052a078de9c6 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Mon, 4 Mar 2024 01:28:53 -0800
Subject: [PATCH 08/48] Use a custom service account and role (#1783)

Use a custom service account and role when generating
the multikueue kubeconfigs.

Co-authored-by: Traian Schiau <traian_schiau@epam.com>
---
 hack/create-multikueue-kubeconfig.sh | 153 +++++++++++++++++++++++++++
 hack/multikueue-e2e-test.sh          |  20 ++--
 2 files changed, 166 insertions(+), 7 deletions(-)
 create mode 100644 hack/create-multikueue-kubeconfig.sh

diff --git a/hack/create-multikueue-kubeconfig.sh b/hack/create-multikueue-kubeconfig.sh
new file mode 100644
index 0000000000..8658f0c2c8
--- /dev/null
+++ b/hack/create-multikueue-kubeconfig.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+
+# Copyright 2024 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+KUBECONFIG_OUT=${1:-kubeconfig}
+MULTIKUEUE_SA=multikueue-sa
+NAMESPACE=kueue-system
+
+echo "Creating a custom MultiKueue Role and Service Account"
+kubectl apply -f - <<EOF
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: ${MULTIKUEUE_SA}
+  namespace: ${NAMESPACE}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: multikueue-role
+rules:
+- apiGroups:
+  - batch
+  resources:
+  - jobs
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - watch
+- apiGroups:
+  - batch
+  resources:
+  - jobs/status
+  verbs:
+  - get
+- apiGroups:
+  - jobset.x-k8s.io
+  resources:
+  - jobsets
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - watch
+- apiGroups:
+  - jobset.x-k8s.io
+  resources:
+  - jobsets/status
+  verbs:
+  - get
+- apiGroups:
+  - kueue.x-k8s.io
+  resources:
+  - workloads
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - watch
+- apiGroups:
+  - kueue.x-k8s.io
+  resources:
+  - workloads/status
+  verbs:
+  - get
+  - patch
+  - update
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: multikueue-crb
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: multikueue-role
+subjects:
+- kind: ServiceAccount
+  name: ${MULTIKUEUE_SA}
+  namespace: ${NAMESPACE}
+EOF
+
+SA_SECRET_NAME=$(kubectl get -n ${NAMESPACE} sa/${MULTIKUEUE_SA} -o "jsonpath={.secrets[0]..name}")
+if [ -z $SA_SECRET_NAME ]
+then
+# Create the secret and bind it to the desired SA
+kubectl apply -f - <<EOF
+apiVersion: v1
+kind: Secret
+type: kubernetes.io/service-account-token
+metadata:
+  name: ${MULTIKUEUE_SA}
+  namespace: ${NAMESPACE}
+  annotations:
+    kubernetes.io/service-account.name: "${MULTIKUEUE_SA}"
+EOF
+
+SA_SECRET_NAME=${MULTIKUEUE_SA}
+fi
+
+# Note: service account token is stored base64-encoded in the secret but must
+# be plaintext in kubeconfig.
+SA_TOKEN=$(kubectl get -n ${NAMESPACE} secrets/${SA_SECRET_NAME} -o "jsonpath={.data['token']}" | base64 -d)
+CA_CERT=$(kubectl get -n ${NAMESPACE} secrets/${SA_SECRET_NAME} -o "jsonpath={.data['ca\.crt']}")
+
+# Extract cluster IP from the current context
+CURRENT_CONTEXT=$(kubectl config current-context)
+CURRENT_CLUSTER=$(kubectl config view -o jsonpath="{.contexts[?(@.name == \"${CURRENT_CONTEXT}\"})].context.cluster}")
+CURRENT_CLUSTER_ADDR=$(kubectl config view -o jsonpath="{.clusters[?(@.name == \"${CURRENT_CLUSTER}\"})].cluster.server}")
+
+echo "Writing kubeconfig in ${KUBECONFIG_OUT}"
+cat > ${KUBECONFIG_OUT} <<EOF
+apiVersion: v1
+clusters:
+- cluster:
+    certificate-authority-data: ${CA_CERT}
+    server: ${CURRENT_CLUSTER_ADDR}
+  name: ${CURRENT_CLUSTER}
+contexts:
+- context:
+    cluster: ${CURRENT_CLUSTER}
+    user: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA}
+  name: ${CURRENT_CONTEXT}
+current-context: ${CURRENT_CONTEXT}
+kind: Config
+preferences: {}
+users:
+- name: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA}
+  user:
+    token: ${SA_TOKEN}
+EOF
+
+echo "Done!"
diff --git a/hack/multikueue-e2e-test.sh b/hack/multikueue-e2e-test.sh
index 5ab8eef3c8..7fcd903a07 100755
--- a/hack/multikueue-e2e-test.sh
+++ b/hack/multikueue-e2e-test.sh
@@ -72,13 +72,6 @@ function startup {
     cluster_create $WORKER1_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml
     cluster_create $WORKER2_KIND_CLUSTER_NAME $ARTIFACTS/worker-cluster.yaml
 
-    # push the worker kubeconfig in a manager's secret
-    $KIND get kubeconfig --name $WORKER1_KIND_CLUSTER_NAME  > ${ARTIFACTS}/worker1.kubeconfig
-    $KIND get kubeconfig --name $WORKER2_KIND_CLUSTER_NAME  > ${ARTIFACTS}/worker2.kubeconfig
-    kubectl config use-context kind-${MANAGER_KIND_CLUSTER_NAME}
-    kubectl create namespace kueue-system
-    kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig
-    kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig
     fi
 }
 
@@ -121,9 +114,22 @@ function kueue_deploy {
     cluster_kueue_deploy $WORKER2_KIND_CLUSTER_NAME
 }
 
+function prepare_secrets {
+    kubectl config use-context kind-${WORKER1_KIND_CLUSTER_NAME}
+    source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker1.kubeconfig
+
+    kubectl config use-context kind-${WORKER2_KIND_CLUSTER_NAME}
+    source ${SOURCE_DIR}/create-multikueue-kubeconfig.sh ${ARTIFACTS}/worker2.kubeconfig
+
+    kubectl config use-context kind-${MANAGER_KIND_CLUSTER_NAME}
+    kubectl create secret generic multikueue1 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker1.kubeconfig
+    kubectl create secret generic multikueue2 -n kueue-system --from-file=kubeconfig=${ARTIFACTS}/worker2.kubeconfig
+}
+
 trap cleanup EXIT
 startup
 kind_load
 kueue_deploy 
+prepare_secrets
 
 $GINKGO $GINKGO_ARGS --junit-report=junit.xml --output-dir=$ARTIFACTS -v ./test/e2e/multikueue/...

From 08ba598de8b424e30b5bb80be47d7d2c63cef117 Mon Sep 17 00:00:00 2001
From: Traian Schiau <55734665+trasc@users.noreply.github.com>
Date: Tue, 5 Mar 2024 12:03:44 +0200
Subject: [PATCH 09/48] [pods]Finalize the replaced pods (#1801)

Signed-off-by: Traian Schiau <traian_schiau@epam.com>
---
 keps/976-plain-pods/README.md                 |  14 +-
 pkg/controller/jobframework/interface.go      |   2 -
 pkg/controller/jobframework/reconciler.go     |   9 -
 pkg/controller/jobs/pod/pod_controller.go     | 231 +++++++----
 .../jobs/pod/pod_controller_test.go           | 392 +++++++++++++++++-
 pkg/util/slices/slices.go                     |  11 +
 pkg/util/slices/slices_test.go                |  33 ++
 pkg/util/testingjobs/pod/wrappers.go          |   7 +
 test/e2e/singlecluster/pod_test.go            |  14 +-
 .../jobs/pod/pod_controller_test.go           |  35 +-
 10 files changed, 642 insertions(+), 106 deletions(-)

diff --git a/keps/976-plain-pods/README.md b/keps/976-plain-pods/README.md
index 55f07bdb43..a12be4eae5 100644
--- a/keps/976-plain-pods/README.md
+++ b/keps/976-plain-pods/README.md
@@ -565,8 +565,9 @@ in-memory Workload, then it is considered unmatching and the Workload is evicted
 In the Pod-group reconciler:
 1. If the Pod is not terminated and doesn't have a deletionTimestamp,
    create a Workload for the pod group if one does not exist.
-2. Remove Pod finalizers if the Pod is terminated and the Workload is finished, has a deletion
-   timestamp or is finished.
+2. Remove Pod finalizers if:
+  - The Pod is terminated and the Workload is finished or has a deletion timestamp.
+  - The Pod Failed and a valid replacement pod was created for it.
 3. Build the in-memory Workload. If its podset counters are greater than the stored Workload,
    then evict the Workload.
 4. For gated pods:
@@ -574,16 +575,15 @@ In the Pod-group reconciler:
 5. If the number of succeeded pods is equal to the admission count, mark the Workload as Finished
    and remove the finalizers from the Pods.
 
-Note that we are only removing Pod finalizers once the Workload is finished. This is a simple way of
-managing finalizers, but it might lead to too many Pods lingering in etcd for a long time after
-terminated.
-
 ### Retrying Failed Pods
 
 The Pod group will generally only be considered finished if all the Pods finish with a Succeeded
 phase.
 This allows the user to send replacement Pods when a Pod in the group fails or if the group is
 preempted. The replacement Pods can have any name, but they must point to the same pod group.
+Once a replacement Pod is created, and Kueue has added it as an owner of the Workload, the
+Failed pod will be finalized. If multiple Pods have Failed, a new Pod is assumed to replace 
+the Pod that failed first. 
 
 To declare that a group is failed, a user can execute one of the following actions:
 1. Issue a Delete for the Workload object. The controller would terminate all running Pods and
@@ -708,4 +708,4 @@ While this would be a clean approach, this proposal is targetting users that don
 wrapping their Pods, and adding one would be a bigger effort than adding annotations. Such amount
 of effort could be similar to migrating from plain Pods to the Job API, which is already supported.
 
-We could reconsider this based on user feedback.
\ No newline at end of file
+We could reconsider this based on user feedback.
diff --git a/pkg/controller/jobframework/interface.go b/pkg/controller/jobframework/interface.go
index fe7ca3d78d..816ae5d3de 100644
--- a/pkg/controller/jobframework/interface.go
+++ b/pkg/controller/jobframework/interface.go
@@ -111,8 +111,6 @@ type ComposableJob interface {
 	FindMatchingWorkloads(ctx context.Context, c client.Client, r record.EventRecorder) (match *kueue.Workload, toDelete []*kueue.Workload, err error)
 	// Stop implements the custom stop procedure for ComposableJob
 	Stop(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, stopReason StopReason, eventMsg string) ([]client.Object, error)
-	// Ensure all members of the ComposableJob are owning the workload
-	EnsureWorkloadOwnedByAllMembers(ctx context.Context, c client.Client, r record.EventRecorder, workload *kueue.Workload) error
 }
 
 func ParentWorkloadName(job GenericJob) string {
diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index 221f22ba04..da20d1521a 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -269,15 +269,6 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
 		return ctrl.Result{}, err
 	}
 
-	// Ensure all members of the composable job own the workload
-	if wl != nil {
-		if cj, implements := job.(ComposableJob); implements {
-			if err := cj.EnsureWorkloadOwnedByAllMembers(ctx, r.client, r.record, wl); err != nil {
-				return ctrl.Result{}, err
-			}
-		}
-	}
-
 	if wl != nil && apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) {
 		// Finalize the job if it's finished
 		if _, finished := job.Finished(); finished {
diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index fafaaee379..071147eaed 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -623,9 +623,13 @@ func (p *Pod) Load(ctx context.Context, c client.Client, key *types.NamespacedNa
 }
 
 func (p *Pod) constructGroupPodSets() ([]kueue.PodSet, error) {
+	return constructGroupPodSets(p.list.Items)
+}
+
+func constructGroupPodSets(pods []corev1.Pod) ([]kueue.PodSet, error) {
 	var resultPodSets []kueue.PodSet
 
-	for _, podInGroup := range p.list.Items {
+	for _, podInGroup := range pods {
 		if !isPodRunnableOrSucceeded(&podInGroup) {
 			continue
 		}
@@ -704,14 +708,12 @@ func (p *Pod) validatePodGroupMetadata(r record.EventRecorder, activePods []core
 
 // runnableOrSucceededPods returns a slice of active pods in the group
 func (p *Pod) runnableOrSucceededPods() []corev1.Pod {
-	activePods := make([]corev1.Pod, 0, len(p.list.Items))
-	for _, pod := range p.list.Items {
-		if isPodRunnableOrSucceeded(&pod) {
-			activePods = append(activePods, pod)
-		}
-	}
+	return utilslices.Pick(p.list.Items, isPodRunnableOrSucceeded)
+}
 
-	return activePods
+// notRunnableNorSucceededPods returns a slice of inactive pods in the group
+func (p *Pod) notRunnableNorSucceededPods() []corev1.Pod {
+	return utilslices.Pick(p.list.Items, func(p *corev1.Pod) bool { return !isPodRunnableOrSucceeded(p) })
 }
 
 // isPodRunnableOrSucceeded returns whether the Pod can eventually run, is Running or Succeeded.
@@ -723,21 +725,55 @@ func isPodRunnableOrSucceeded(p *corev1.Pod) bool {
 	return p.Status.Phase != corev1.PodFailed
 }
 
-// cleanupExcessPods will delete and finalize pods created last if the number of
-// activePods is greater than the totalCount value.
-func (p *Pod) cleanupExcessPods(ctx context.Context, c client.Client, r record.EventRecorder, totalCount int, activePods []corev1.Pod) error {
-	log := ctrl.LoggerFrom(ctx)
-
-	extraPodsCount := len(activePods) - totalCount
-
-	if extraPodsCount <= 0 {
-		return nil
+// lastActiveTime returns the last timestamp on which the pod was observed active:
+// - the time the pod was declared Failed
+// - the deletion time
+func lastActiveTime(p *corev1.Pod) time.Time {
+	lastTransition := metav1.Now()
+	for _, c := range p.Status.Conditions {
+		if c.Type == corev1.ContainersReady {
+			if c.Status == corev1.ConditionFalse && c.Reason == string(corev1.PodFailed) {
+				lastTransition = c.LastTransitionTime
+			}
+			break
+		}
 	}
-	// Do not clean up more pods until observing previous operations
-	if !p.satisfiedExcessPods {
-		return errPendingOps
+	deletionTime := ptr.Deref(p.DeletionTimestamp, metav1.Now())
+	if lastTransition.Before(&deletionTime) {
+		return lastTransition.Time
 	}
+	return deletionTime.Time
+}
 
+// sortInactivePods sorts the provided pods slice based on:
+// - finalizer state (pods with finalizers are first)
+// - lastActiveTime (pods that were active last are first)
+// - creation timestamp (newer pods are first)
+func sortInactivePods(inactivePods []corev1.Pod) {
+	sort.Slice(inactivePods, func(i, j int) bool {
+		pi := &inactivePods[i]
+		pj := &inactivePods[j]
+		iFin := slices.Contains(pi.Finalizers, PodFinalizer)
+		jFin := slices.Contains(pj.Finalizers, PodFinalizer)
+		if iFin != jFin {
+			return iFin
+		}
+
+		iLastActive := lastActiveTime(pi)
+		jLastActive := lastActiveTime(pj)
+
+		if iLastActive.Equal(jLastActive) {
+			return pi.CreationTimestamp.Before(&pj.CreationTimestamp)
+		}
+		return jLastActive.Before(iLastActive)
+	})
+}
+
+// sortActivePods sorts the provided pods slice based on:
+// - finalizer state (pods with no finalizers are last)
+// - gated state (pods that are still gated are last)
+// - creation timestamp (newer pods are last)
+func sortActivePods(activePods []corev1.Pod) {
 	// Sort active pods by creation timestamp
 	sort.Slice(activePods, func(i, j int) bool {
 		pi := &activePods[i]
@@ -754,16 +790,22 @@ func (p *Pod) cleanupExcessPods(ctx context.Context, c client.Client, r record.E
 		if iGated != jGated {
 			return !iGated
 		}
-		return pi.ObjectMeta.CreationTimestamp.Before(&pj.ObjectMeta.CreationTimestamp)
+		return pi.CreationTimestamp.Before(&pj.CreationTimestamp)
 	})
+}
+
+func (p *Pod) removeExcessPods(ctx context.Context, c client.Client, r record.EventRecorder, extraPods []corev1.Pod) error {
+	if len(extraPods) == 0 {
+		return nil
+	}
+
+	log := ctrl.LoggerFrom(ctx)
 
 	// Extract all the latest created extra pods
-	extraPods := activePods[len(activePods)-extraPodsCount:]
 	extraPodsUIDs := utilslices.Map(extraPods, func(p *corev1.Pod) types.UID { return p.UID })
 	p.excessPodExpectations.ExpectUIDs(log, p.key, extraPodsUIDs)
 
 	// Finalize and delete the active pods created last
-
 	err := parallelize.Until(ctx, len(extraPods), func(i int) error {
 		pod := extraPods[i]
 		if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
@@ -774,7 +816,7 @@ func (p *Pod) cleanupExcessPods(ctx context.Context, c client.Client, r record.E
 				return err
 			}
 		}
-		if pod.ObjectMeta.DeletionTimestamp.IsZero() {
+		if pod.DeletionTimestamp.IsZero() {
 			log.V(3).Info("Deleting excess pod in group", "excessPod", klog.KObj(&pod))
 			if err := c.Delete(ctx, &pod); err != nil {
 				// We won't observe this cleanup in the event handler.
@@ -788,31 +830,42 @@ func (p *Pod) cleanupExcessPods(ctx context.Context, c client.Client, r record.E
 	if err != nil {
 		return err
 	}
+	return nil
+}
 
-	// Remove excess pods from the group list
-	newPodsInGroup := make([]corev1.Pod, 0, len(p.list.Items)-len(extraPods))
-	for i := range p.list.Items {
-		found := false
-		for j := range extraPods {
-			if p.list.Items[i].Name == extraPods[j].Name && p.list.Items[i].Namespace == extraPods[j].Namespace {
-				found = true
-				break
-			}
-		}
+func (p *Pod) finalizePods(ctx context.Context, c client.Client, extraPods []corev1.Pod) error {
+	if len(extraPods) == 0 {
+		return nil
+	}
+
+	log := ctrl.LoggerFrom(ctx)
+
+	// Extract all the latest created extra pods
+	extraPodsUIDs := utilslices.Map(extraPods, func(p *corev1.Pod) types.UID { return p.UID })
+	p.excessPodExpectations.ExpectUIDs(log, p.key, extraPodsUIDs)
 
-		if !found {
-			newPodsInGroup = append(newPodsInGroup, p.list.Items[i])
+	err := parallelize.Until(ctx, len(extraPods), func(i int) error {
+		pod := extraPods[i]
+		if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
+			log.V(3).Info("Finalizing pod in group", "Pod", klog.KObj(&pod))
+			if err := c.Update(ctx, &pod); err != nil {
+				// We won't observe this cleanup in the event handler.
+				p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
+				return err
+			}
+		} else {
+			// We don't expect an event in this case.
+			p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
 		}
+		return nil
+	})
+	if err != nil {
+		return err
 	}
-	p.list.Items = newPodsInGroup
-
 	return nil
 }
 
-func (p *Pod) EnsureWorkloadOwnedByAllMembers(ctx context.Context, c client.Client, r record.EventRecorder, workload *kueue.Workload) error {
-	if !p.isGroup {
-		return nil
-	}
+func (p *Pod) ensureWorkloadOwnedByAllMembers(ctx context.Context, c client.Client, r record.EventRecorder, workload *kueue.Workload) error {
 	oldOwnersCnt := len(workload.GetOwnerReferences())
 	for _, pod := range p.list.Items {
 		if err := controllerutil.SetOwnerReference(&pod, workload, c.Scheme()); err != nil {
@@ -871,7 +924,7 @@ func (p *Pod) ConstructComposableWorkload(ctx context.Context, c client.Client,
 		return wl, nil
 	}
 
-	if err := p.finalizeNonRunnableNorSucceededPods(ctx, c); err != nil {
+	if err := p.finalizePods(ctx, c, p.notRunnableNorSucceededPods()); err != nil {
 		return nil, err
 	}
 
@@ -893,9 +946,13 @@ func (p *Pod) ConstructComposableWorkload(ctx context.Context, c client.Client,
 	}
 
 	// Cleanup extra pods if there's any
-	err = p.cleanupExcessPods(ctx, c, r, groupTotalCount, activePods)
-	if err != nil {
-		return nil, err
+	if excessPodsCount := len(activePods) - groupTotalCount; excessPodsCount > 0 {
+		sortActivePods(activePods)
+		err = p.removeExcessPods(ctx, c, r, activePods[len(activePods)-excessPodsCount:])
+		if err != nil {
+			return nil, err
+		}
+		p.list.Items = activePods[:len(activePods)-excessPodsCount]
 	}
 
 	// Construct workload for a pod group
@@ -971,37 +1028,73 @@ func (p *Pod) FindMatchingWorkloads(ctx context.Context, c client.Client, r reco
 
 	// Cleanup excess pods for each workload pod set (role)
 	activePods := p.runnableOrSucceededPods()
+	inactivePods := p.notRunnableNorSucceededPods()
+
+	var keptPods []corev1.Pod
+	var excessActivePods []corev1.Pod
+	var replacedInactivePods []corev1.Pod
+
 	for _, ps := range workload.Spec.PodSets {
-		// Find all the active pods of the role
-		var roleActivePods []corev1.Pod
-		for _, activePod := range activePods {
-			roleHash, err := getRoleHash(activePod)
+		// Find all the active and inactive pods of the role
+		var roleHashErrors []error
+		hasRoleFunc := func(p *corev1.Pod) bool {
+			hash, err := getRoleHash(*p)
 			if err != nil {
-				return nil, nil, fmt.Errorf("failed to calculate pod role hash: %w", err)
+				roleHashErrors = append(roleHashErrors, err)
+				return false
 			}
+			return hash == ps.Name
+		}
+		roleActivePods := utilslices.Pick(activePods, hasRoleFunc)
+		roleInactivePods := utilslices.Pick(inactivePods, hasRoleFunc)
+		if len(roleHashErrors) > 0 {
+			return nil, nil, fmt.Errorf("failed to calculate pod role hash: %w", errors.Join(roleHashErrors...))
+		}
 
-			if ps.Name == roleHash {
-				roleActivePods = append(roleActivePods, activePod)
-			}
+		if excessCount := len(roleActivePods) - int(ps.Count); excessCount > 0 {
+			sortActivePods(roleActivePods)
+			excessActivePods = append(excessActivePods, roleActivePods[len(roleActivePods)-excessCount:]...)
+			keptPods = append(keptPods, roleActivePods[:len(roleActivePods)-excessCount]...)
+		} else {
+			keptPods = append(keptPods, roleActivePods...)
 		}
 
-		// Cleanup excess pods of the role
-		err := p.cleanupExcessPods(ctx, c, r, int(ps.Count), roleActivePods)
-		if err != nil {
-			return nil, nil, err
+		if finalizeablePodsCount := min(len(roleInactivePods), len(roleInactivePods)+len(roleActivePods)-int(ps.Count)); finalizeablePodsCount > 0 {
+			sortInactivePods(roleInactivePods)
+			replacedInactivePods = append(replacedInactivePods, roleInactivePods[len(roleInactivePods)-finalizeablePodsCount:]...)
+			keptPods = append(keptPods, roleInactivePods[:len(roleInactivePods)-finalizeablePodsCount]...)
+		} else {
+			keptPods = append(keptPods, roleInactivePods...)
 		}
 	}
 
-	jobPodSets, err := p.constructGroupPodSets()
+	jobPodSets, err := constructGroupPodSets(keptPods)
 	if err != nil {
 		return nil, nil, err
 	}
 
-	if p.equivalentToWorkload(workload, jobPodSets) {
-		return workload, []*kueue.Workload{}, nil
-	} else {
+	if len(keptPods) == 0 || !p.equivalentToWorkload(workload, jobPodSets) {
 		return nil, []*kueue.Workload{workload}, nil
 	}
+
+	// Do not clean up more pods until observing previous operations
+	if !p.satisfiedExcessPods {
+		return nil, nil, errPendingOps
+	}
+
+	p.list.Items = keptPods
+	if err := p.ensureWorkloadOwnedByAllMembers(ctx, c, r, workload); err != nil {
+		return nil, nil, err
+	}
+
+	if err := p.removeExcessPods(ctx, c, r, excessActivePods); err != nil {
+		return nil, nil, err
+	}
+
+	if err := p.finalizePods(ctx, c, replacedInactivePods); err != nil {
+		return nil, nil, err
+	}
+	return workload, []*kueue.Workload{}, nil
 }
 
 func (p *Pod) equivalentToWorkload(wl *kueue.Workload, jobPodSets []kueue.PodSet) bool {
@@ -1068,20 +1161,6 @@ func (p *Pod) ReclaimablePods() ([]kueue.ReclaimablePod, error) {
 	return result, nil
 }
 
-func (p *Pod) finalizeNonRunnableNorSucceededPods(ctx context.Context, c client.Client) error {
-	for _, p := range p.list.Items {
-		if isPodRunnableOrSucceeded(&p) {
-			continue
-		}
-		if controllerutil.RemoveFinalizer(&p, PodFinalizer) {
-			if err := c.Update(ctx, &p); err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
-
 func IsPodOwnerManagedByKueue(p *Pod) bool {
 	if owner := metav1.GetControllerOf(&p.pod); owner != nil {
 		return jobframework.IsOwnerManagedByKueue(owner) || (owner.Kind == "RayCluster" && strings.HasPrefix(owner.APIVersion, "ray.io/v1alpha1"))
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index 558995beb6..196e6e4ba0 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -1290,7 +1290,6 @@ func TestReconciler(t *testing.T) {
 					Clone().
 					Name("pod2").
 					Label("kueue.x-k8s.io/managed", "true").
-					KueueFinalizer().
 					Group("test-group").
 					GroupTotalCount("3").
 					StatusPhase(corev1.PodFailed).
@@ -2919,6 +2918,397 @@ func TestReconciler(t *testing.T) {
 				},
 			},
 		},
+		"the failed pods are finalized in order": {
+			pods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("active-pod").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("deleted").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					StatusPhase(corev1.PodFailed).
+					DeletionTimestamp(now.Add(-4 * time.Minute)).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-3 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error2").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-3 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement1").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement2").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+			},
+			wantPods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("active-pod").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					Group("test-group").
+					GroupTotalCount("4").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error2").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-3 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement1").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement2").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("4").
+					Obj(),
+			},
+			workloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 4).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "active-pod", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "deleted", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error2", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			wantWorkloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 4).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "active-pod", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "deleted", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error2", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "replacement1", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "replacement2", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			workloadCmpOpts: defaultWorkloadCmpOpts,
+			wantEvents: []utiltesting.EventRecord{
+				{
+					Key:       types.NamespacedName{Name: "test-group", Namespace: "ns"},
+					EventType: "Normal",
+					Reason:    "OwnerReferencesAdded",
+					Message:   "Added 2 owner reference(s)",
+				},
+			},
+		},
+		"no failed pods are finalized while waiting for expectations": {
+			pods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("active-pod").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+			},
+			wantPods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("active-pod").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+			},
+			workloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 2).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "active-pod", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			wantWorkloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 2).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "active-pod", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			workloadCmpOpts: defaultWorkloadCmpOpts,
+			excessPodsExpectations: []keyUIDs{{
+				key:  types.NamespacedName{Name: "test-group", Namespace: "ns"},
+				uids: []types.UID{"some-other-pod"},
+			}},
+		},
+		"no unnecessary additional failed pods are finalized": {
+			pods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error-no-finalizer").
+					Label("kueue.x-k8s.io/managed", "true").
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-3 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+			},
+			wantPods: []corev1.Pod{
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-5 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("finished-with-error-no-finalizer").
+					Label("kueue.x-k8s.io/managed", "true").
+					Group("test-group").
+					GroupTotalCount("2").
+					StatusPhase(corev1.PodFailed).
+					StatusConditions(corev1.PodCondition{
+						Type:               corev1.ContainersReady,
+						Status:             corev1.ConditionFalse,
+						Reason:             string(corev1.PodFailed),
+						LastTransitionTime: metav1.NewTime(now.Add(-3 * time.Minute)).Rfc3339Copy(),
+					}).
+					Obj(),
+				*basePodWrapper.
+					Clone().
+					Name("replacement").
+					Label("kueue.x-k8s.io/managed", "true").
+					KueueFinalizer().
+					Group("test-group").
+					GroupTotalCount("2").
+					Obj(),
+			},
+			workloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 4).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error-no-finalizer", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			wantWorkloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("test-group", "ns").
+					PodSets(
+						*utiltesting.MakePodSet("dc85db45", 4).
+							Request(corev1.ResourceCPU, "1").
+							Obj(),
+					).
+					Queue("user-queue").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "finished-with-error-no-finalizer", "test-uid").
+					OwnerReference(corev1.SchemeGroupVersion.WithKind("Pod"), "replacement", "test-uid").
+					ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(1).Obj()).
+					Admitted(true).
+					Obj(),
+			},
+			workloadCmpOpts: defaultWorkloadCmpOpts,
+			wantEvents: []utiltesting.EventRecord{
+				{
+					Key:       types.NamespacedName{Name: "test-group", Namespace: "ns"},
+					EventType: "Normal",
+					Reason:    "OwnerReferencesAdded",
+					Message:   "Added 1 owner reference(s)",
+				},
+			},
+		},
 	}
 
 	for name, tc := range testCases {
diff --git a/pkg/util/slices/slices.go b/pkg/util/slices/slices.go
index 2d3e3079c4..ae9ebbace5 100644
--- a/pkg/util/slices/slices.go
+++ b/pkg/util/slices/slices.go
@@ -82,3 +82,14 @@ func CmpNoOrder[E comparable, S ~[]E](a, b S) bool {
 	}
 	return true
 }
+
+// Pick creates a new slice containing only the elements for which keep return true.
+func Pick[E any, S ~[]E](s S, keep func(*E) bool) S {
+	var ret S
+	for i := range s {
+		if keep(&s[i]) {
+			ret = append(ret, s[i])
+		}
+	}
+	return ret
+}
diff --git a/pkg/util/slices/slices_test.go b/pkg/util/slices/slices_test.go
index d3a81912f9..f45e35115d 100644
--- a/pkg/util/slices/slices_test.go
+++ b/pkg/util/slices/slices_test.go
@@ -102,3 +102,36 @@ func TestToCmpNoOrder(t *testing.T) {
 		})
 	}
 }
+
+func TestPick(t *testing.T) {
+	cases := map[string]struct {
+		testSlice []int
+		want      []int
+	}{
+		"nil input": {
+			testSlice: nil,
+			want:      nil,
+		},
+		"empty input": {
+			testSlice: []int{},
+			want:      nil,
+		},
+		"no match": {
+			testSlice: []int{1, 3, 5, 7, 9},
+			want:      nil,
+		},
+		"match": {
+			testSlice: []int{1, 2, 3, 4, 5, 6, 7, 8, 9},
+			want:      []int{2, 4, 6, 8},
+		},
+	}
+
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			result := Pick(tc.testSlice, func(i *int) bool { return (*i)%2 == 0 })
+			if diff := cmp.Diff(tc.want, result); diff != "" {
+				t.Errorf("Unexpected result (-want,+got):\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/pkg/util/testingjobs/pod/wrappers.go b/pkg/util/testingjobs/pod/wrappers.go
index aca94e818f..b6564eb0a9 100644
--- a/pkg/util/testingjobs/pod/wrappers.go
+++ b/pkg/util/testingjobs/pod/wrappers.go
@@ -217,6 +217,13 @@ func (p *PodWrapper) CreationTimestamp(t time.Time) *PodWrapper {
 	return p
 }
 
+// DeletionTimestamp sets a creation timestamp for the pod object
+func (p *PodWrapper) DeletionTimestamp(t time.Time) *PodWrapper {
+	timestamp := metav1.NewTime(t).Rfc3339Copy()
+	p.Pod.DeletionTimestamp = &timestamp
+	return p
+}
+
 // Delete sets a deletion timestamp for the pod object
 func (p *PodWrapper) Delete() *PodWrapper {
 	t := metav1.NewTime(time.Now())
diff --git a/test/e2e/singlecluster/pod_test.go b/test/e2e/singlecluster/pod_test.go
index e940e4e806..c86777fa66 100644
--- a/test/e2e/singlecluster/pod_test.go
+++ b/test/e2e/singlecluster/pod_test.go
@@ -217,7 +217,7 @@ var _ = ginkgo.Describe("Pod groups", func() {
 				}, util.Timeout, util.Interval).Should(gomega.Equal(corev1.PodFailed))
 			})
 
-			ginkgo.By("Replacement pod starts", func() {
+			ginkgo.By("Replacement pod starts, and the failed one is deleted", func() {
 				// Use a pod template that can succeed fast.
 				rep := group[2].DeepCopy()
 				rep.Name = "replacement"
@@ -226,6 +226,7 @@ var _ = ginkgo.Describe("Pod groups", func() {
 					var p corev1.Pod
 					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(rep), &p)).To(gomega.Succeed())
 					g.Expect(p.Spec.SchedulingGates).To(gomega.BeEmpty())
+					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(group[0]), &p)).To(testing.BeNotFoundError())
 				}, util.Timeout, util.Interval).Should(gomega.Succeed())
 			})
 
@@ -404,7 +405,7 @@ var _ = ginkgo.Describe("Pod groups", func() {
 								// For replacement pods use args that let it complete fast.
 								rep.Name = "replacement-for-" + rep.Name
 								rep.Spec.Containers[0].Args = []string{"1ms"}
-								gomega.Expect(k8sClient.Create(ctx, rep)).To(gomega.Succeed())
+								g.Expect(k8sClient.Create(ctx, rep)).To(gomega.Succeed())
 								replacementPods[origKey] = client.ObjectKeyFromObject(rep)
 							}
 						}
@@ -412,6 +413,15 @@ var _ = ginkgo.Describe("Pod groups", func() {
 					return len(replacementPods)
 				}, util.Timeout, util.Interval).Should(gomega.Equal(len(defaultPriorityGroup)))
 			})
+			ginkgo.By("Check that the preempted pods are deleted", func() {
+				gomega.Eventually(func(g gomega.Gomega) {
+					var p corev1.Pod
+					for _, origPod := range defaultPriorityGroup {
+						origKey := client.ObjectKeyFromObject(origPod)
+						g.Expect(k8sClient.Get(ctx, origKey, &p)).To(testing.BeNotFoundError())
+					}
+				}, util.Timeout, util.Interval).Should(gomega.Succeed())
+			})
 
 			ginkgo.By("Verify the high-priority pods are scheduled", func() {
 				gomega.Eventually(func(g gomega.Gomega) {
diff --git a/test/integration/controller/jobs/pod/pod_controller_test.go b/test/integration/controller/jobs/pod/pod_controller_test.go
index 5e5333113f..5c8c69557a 100644
--- a/test/integration/controller/jobs/pod/pod_controller_test.go
+++ b/test/integration/controller/jobs/pod/pod_controller_test.go
@@ -676,7 +676,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 					GroupTotalCount("2").
 					Queue("test-queue").
 					Obj()
-				replacementPodLookupKey := client.ObjectKeyFromObject(pod2)
+				replacementPodLookupKey := client.ObjectKeyFromObject(replacementPod)
 
 				ginkgo.By("creating the replacement pod and readmitting the workload will unsuspended the replacement", func() {
 					gomega.Expect(k8sClient.Create(ctx, replacementPod)).Should(gomega.Succeed())
@@ -747,6 +747,9 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				podLookupKey := types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}
 				util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, podLookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 
+				// cache the uid of the workload it should be the same until the execution ends otherwise the workload was recreated
+				wlUid := createdWorkload.UID
+
 				ginkgo.By("Failing the running pod")
 
 				util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, pod)
@@ -769,18 +772,28 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				gomega.Expect(k8sClient.Create(ctx, replacementPod)).Should(gomega.Succeed())
 				replacementPodLookupKey := client.ObjectKeyFromObject(replacementPod)
 
-				// Workload shouldn't be deleted after replacement pod has been created
-				gomega.Consistently(func() error {
-					return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
-				}, util.ConsistentDuration, util.Interval).Should(gomega.Succeed())
+				ginkgo.By("Failing the replacement", func() {
+					util.ExpectPodsFinalized(ctx, k8sClient, podLookupKey)
+					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPodLookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
+					util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, replacementPod)
+				})
 
-				util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPodLookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
+				ginkgo.By("Creating a second replacement pod in the group")
+				replacementPod2 := testingpod.MakePod("replacement-test-pod2", ns.Name).
+					Group("test-group").
+					GroupTotalCount("1").
+					Queue("test-queue").
+					Obj()
+				gomega.Expect(k8sClient.Create(ctx, replacementPod2)).Should(gomega.Succeed())
+				replacementPod2LookupKey := client.ObjectKeyFromObject(replacementPod)
 
-				util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, replacementPod)
-				util.ExpectPodsFinalized(ctx, k8sClient, replacementPodLookupKey)
+				util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
+				util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, replacementPod2)
+				util.ExpectPodsFinalized(ctx, k8sClient, replacementPodLookupKey, replacementPod2LookupKey)
 
 				gomega.Eventually(func(g gomega.Gomega) []metav1.Condition {
 					g.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed())
+					g.Expect(createdWorkload.UID).To(gomega.Equal(wlUid))
 					return createdWorkload.Status.Conditions
 				}, util.Timeout, util.Interval).Should(gomega.ContainElement(
 					gomega.BeComparableTo(
@@ -889,6 +902,10 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 					util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"})
 				})
 
+				ginkgo.By("checking that the replaced pod is finalized", func() {
+					util.ExpectPodsFinalized(ctx, k8sClient, pod1LookupKey)
+				})
+
 				ginkgo.By("checking that pod group is finalized when unretriable pod has failed", func() {
 					util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, replacementPod2)
 
@@ -905,7 +922,7 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 						),
 					), "Expected 'Finished' workload condition")
 
-					util.ExpectPodsFinalized(ctx, k8sClient, pod1LookupKey, pod2LookupKey, replacementPod2LookupKey)
+					util.ExpectPodsFinalized(ctx, k8sClient, pod2LookupKey, replacementPod2LookupKey)
 				})
 			})
 

From bea1d1c2d3f152099523c0d5ba3edb4bffce1d25 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Wed, 6 Mar 2024 08:59:58 -0800
Subject: [PATCH 10/48] [release-0.6] [multikueue] Cluster connection
 monitoring and reconnect. (#1809)

* Cluster connection monitoring and reconnect.

* Review Remarks

---------

Co-authored-by: Traian Schiau <traian_schiau@epam.com>
---
 .../multikueue/jobset_adapter_test.go         |   2 +-
 .../multikueue/multikueuecluster.go           | 141 +++++++++++----
 .../multikueue/multikueuecluster_test.go      | 162 ++++++++++++++++--
 .../multikueue/workload_test.go               |   4 +-
 test/e2e/multikueue/e2e_test.go               |  48 ++++++
 5 files changed, 303 insertions(+), 54 deletions(-)

diff --git a/pkg/controller/admissionchecks/multikueue/jobset_adapter_test.go b/pkg/controller/admissionchecks/multikueue/jobset_adapter_test.go
index 61ac7de838..3796a03287 100644
--- a/pkg/controller/admissionchecks/multikueue/jobset_adapter_test.go
+++ b/pkg/controller/admissionchecks/multikueue/jobset_adapter_test.go
@@ -331,7 +331,7 @@ func TestWlReconcileJobset(t *testing.T) {
 			worker1Builder = worker1Builder.WithLists(&kueue.WorkloadList{Items: tc.worker1Workloads}, &jobset.JobSetList{Items: tc.worker1JobSets})
 			worker1Client := worker1Builder.Build()
 
-			w1remoteClient := newRemoteClient(managerClient, nil, defaultOrigin)
+			w1remoteClient := newRemoteClient(managerClient, nil, nil, defaultOrigin, "")
 			w1remoteClient.client = worker1Client
 
 			cRec.remoteClients["worker1"] = w1remoteClient
diff --git a/pkg/controller/admissionchecks/multikueue/multikueuecluster.go b/pkg/controller/admissionchecks/multikueue/multikueuecluster.go
index beb13f025f..2b957b5395 100644
--- a/pkg/controller/admissionchecks/multikueue/multikueuecluster.go
+++ b/pkg/controller/admissionchecks/multikueue/multikueuecluster.go
@@ -23,6 +23,7 @@ import (
 	"os"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	corev1 "k8s.io/api/core/v1"
@@ -36,26 +37,50 @@ import (
 	"k8s.io/client-go/tools/clientcmd"
 	"k8s.io/client-go/util/workqueue"
 	"k8s.io/klog/v2"
+	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/event"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	"sigs.k8s.io/controller-runtime/pkg/source"
 
 	kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
 	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
 )
 
+const (
+	eventChBufferSize = 10
+
+	// this set will provide waiting time between 0 to 5m20s
+	retryIncrement = 5 * time.Second
+	retryMaxSteps  = 7
+)
+
+// retryAfter returns an exponentially increasing interval between
+// 0 and 2^(retryMaxSteps-1) * retryIncrement
+func retryAfter(failedAttempts uint) time.Duration {
+	if failedAttempts == 0 {
+		return 0
+	}
+	return (1 << (min(failedAttempts, retryMaxSteps) - 1)) * retryIncrement
+}
+
 type clientWithWatchBuilder func(config []byte, options client.Options) (client.WithWatch, error)
 
 type remoteClient struct {
-	localClient client.Client
-	client      client.WithWatch
-	wlUpdateCh  chan<- event.GenericEvent
-	watchCancel map[string]func()
-	kubeconfig  []byte
-	origin      string
+	clusterName  string
+	localClient  client.Client
+	client       client.WithWatch
+	wlUpdateCh   chan<- event.GenericEvent
+	watchEndedCh chan<- event.GenericEvent
+	watchCancel  func()
+	kubeconfig   []byte
+	origin       string
+
+	forceReconnect     atomic.Bool
+	failedConnAttempts uint
 
 	// For unit testing only. There is now need of creating fully functional remote clients in the unit tests
 	// and creating valid kubeconfig content is not trivial.
@@ -63,12 +88,13 @@ type remoteClient struct {
 	builderOverride clientWithWatchBuilder
 }
 
-func newRemoteClient(localClient client.Client, wlUpdateCh chan<- event.GenericEvent, origin string) *remoteClient {
+func newRemoteClient(localClient client.Client, wlUpdateCh, watchEndedCh chan<- event.GenericEvent, origin, clusterName string) *remoteClient {
 	rc := &remoteClient{
-		wlUpdateCh:  wlUpdateCh,
-		localClient: localClient,
-		origin:      origin,
-		watchCancel: map[string]func(){},
+		clusterName:  clusterName,
+		wlUpdateCh:   wlUpdateCh,
+		watchEndedCh: watchEndedCh,
+		localClient:  localClient,
+		origin:       origin,
 	}
 	return rc
 }
@@ -107,13 +133,19 @@ func (*workloadKueueWatcher) GetWorkloadKey(o runtime.Object) (types.NamespacedN
 }
 
 // setConfig - will try to recreate the k8s client and restart watching if the new config is different than
-// the one currently used.
-func (rc *remoteClient) setConfig(watchCtx context.Context, kubeconfig []byte) error {
-	if equality.Semantic.DeepEqual(kubeconfig, rc.kubeconfig) {
-		return nil
+// the one currently used or a reconnect was requested.
+// If the encountered error is not permanent the duration after which a retry should be done is returned.
+func (rc *remoteClient) setConfig(watchCtx context.Context, kubeconfig []byte) (*time.Duration, error) {
+	configChanged := !equality.Semantic.DeepEqual(kubeconfig, rc.kubeconfig)
+	if !configChanged && !rc.forceReconnect.Load() {
+		return nil, nil
 	}
 
 	rc.StopWatchers()
+	if configChanged {
+		rc.kubeconfig = kubeconfig
+		rc.failedConnAttempts = 0
+	}
 
 	builder := newClientWithWatch
 	if rc.builderOverride != nil {
@@ -121,13 +153,16 @@ func (rc *remoteClient) setConfig(watchCtx context.Context, kubeconfig []byte) e
 	}
 	remoteClient, err := builder(kubeconfig, client.Options{Scheme: rc.localClient.Scheme()})
 	if err != nil {
-		return err
+		return nil, err
 	}
+
 	rc.client = remoteClient
 
+	watchCtx, rc.watchCancel = context.WithCancel(watchCtx)
 	err = rc.startWatcher(watchCtx, kueue.GroupVersion.WithKind("Workload").GroupKind().String(), &workloadKueueWatcher{})
 	if err != nil {
-		return err
+		rc.failedConnAttempts++
+		return ptr.To(retryAfter(rc.failedConnAttempts)), err
 	}
 
 	// add a watch for all the adapters implementing multiKueueWatcher
@@ -141,12 +176,14 @@ func (rc *remoteClient) setConfig(watchCtx context.Context, kubeconfig []byte) e
 			// not being able to setup a watcher is not ideal but we can function with only the wl watcher.
 			ctrl.LoggerFrom(watchCtx).V(2).Error(err, "Unable to start the watcher", "kind", kind)
 			// however let's not accept this for now.
-			return err
+			rc.failedConnAttempts++
+			return ptr.To(retryAfter(rc.failedConnAttempts)), err
 		}
 	}
 
-	rc.kubeconfig = kubeconfig
-	return nil
+	rc.forceReconnect.Store(false)
+	rc.failedConnAttempts = 0
+	return nil, nil
 }
 
 func (rc *remoteClient) startWatcher(ctx context.Context, kind string, w multiKueueWatcher) error {
@@ -158,7 +195,6 @@ func (rc *remoteClient) startWatcher(ctx context.Context, kind string, w multiKu
 
 	go func() {
 		log.V(2).Info("Starting watch")
-		defer log.V(2).Info("Watch ended")
 		for r := range newWatcher.ResultChan() {
 			wlKey, err := w.GetWorkloadKey(r.Object)
 			if err != nil {
@@ -167,16 +203,23 @@ func (rc *remoteClient) startWatcher(ctx context.Context, kind string, w multiKu
 				rc.queueWorkloadEvent(ctx, wlKey)
 			}
 		}
+		log.V(2).Info("Watch ended", "ctxErr", ctx.Err())
+		// If the context is not yet Done , queue a reconcile to attempt reconnection
+		if ctx.Err() == nil {
+			oldReconnect := rc.forceReconnect.Swap(true)
+			//reconnect if this is the first watch failing.
+			if !oldReconnect {
+				log.V(2).Info("Queue reconcile for reconnect", "cluster", rc.clusterName)
+				rc.queueWatchEndedEvent(ctx)
+			}
+		}
 	}()
-
-	rc.watchCancel[kind] = newWatcher.Stop
 	return nil
 }
 
 func (rc *remoteClient) StopWatchers() {
-	for kind, stop := range rc.watchCancel {
-		stop()
-		delete(rc.watchCancel, kind)
+	if rc.watchCancel != nil {
+		rc.watchCancel()
 	}
 }
 
@@ -191,6 +234,15 @@ func (rc *remoteClient) queueWorkloadEvent(ctx context.Context, wlKey types.Name
 	}
 }
 
+func (rc *remoteClient) queueWatchEndedEvent(ctx context.Context) {
+	cluster := &kueuealpha.MultiKueueCluster{}
+	if err := rc.localClient.Get(ctx, types.NamespacedName{Name: rc.clusterName}, cluster); err == nil {
+		rc.watchEndedCh <- event.GenericEvent{Object: cluster}
+	} else {
+		ctrl.LoggerFrom(ctx).Error(err, "sending watch ended event")
+	}
+}
+
 // runGC - lists all the remote workloads having the same multikueue-origin and remove those who
 // no longer have a local correspondent (missing or awaiting deletion). If the remote workload
 // is owned by a job, also delete the job.
@@ -264,6 +316,10 @@ type clustersReconciler struct {
 	// and creating valid kubeconfig content is not trivial.
 	// The full client creation and usage is validated in the integration and e2e tests.
 	builderOverride clientWithWatchBuilder
+
+	// watchEndedCh - an event chan used to request the reconciliation of the clusters for which the watch loop
+	// has ended (connection lost).
+	watchEndedCh chan event.GenericEvent
 }
 
 var _ manager.Runnable = (*clustersReconciler)(nil)
@@ -284,13 +340,13 @@ func (c *clustersReconciler) stopAndRemoveCluster(clusterName string) {
 	}
 }
 
-func (c *clustersReconciler) setRemoteClientConfig(ctx context.Context, clusterName string, kubeconfig []byte, origin string) error {
+func (c *clustersReconciler) setRemoteClientConfig(ctx context.Context, clusterName string, kubeconfig []byte, origin string) (*time.Duration, error) {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
 	client, found := c.remoteClients[clusterName]
 	if !found {
-		client = newRemoteClient(c.localClient, c.wlUpdateCh, origin)
+		client = newRemoteClient(c.localClient, c.wlUpdateCh, c.watchEndedCh, origin, clusterName)
 		if c.builderOverride != nil {
 			client.builderOverride = c.builderOverride
 		}
@@ -300,12 +356,11 @@ func (c *clustersReconciler) setRemoteClientConfig(ctx context.Context, clusterN
 	clientLog := ctrl.LoggerFrom(c.rootContext).WithValues("clusterName", clusterName)
 	clientCtx := ctrl.LoggerInto(c.rootContext, clientLog)
 
-	if err := client.setConfig(clientCtx, kubeconfig); err != nil {
+	if retryAfter, err := client.setConfig(clientCtx, kubeconfig); err != nil {
 		ctrl.LoggerFrom(ctx).Error(err, "failed to set kubeConfig in the remote client")
-		delete(c.remoteClients, clusterName)
-		return err
+		return retryAfter, err
 	}
-	return nil
+	return nil, nil
 }
 
 func (a *clustersReconciler) controllerFor(acName string) (*remoteClient, bool) {
@@ -343,9 +398,13 @@ func (c *clustersReconciler) Reconcile(ctx context.Context, req reconcile.Reques
 		return reconcile.Result{}, c.updateStatus(ctx, cluster, false, "BadConfig", err.Error())
 	}
 
-	if err := c.setRemoteClientConfig(ctx, cluster.Name, kubeConfig, c.origin); err != nil {
-		log.Error(err, "setting kubeconfig")
-		return reconcile.Result{}, c.updateStatus(ctx, cluster, false, "ClientConnectionFailed", err.Error())
+	if retryAfter, err := c.setRemoteClientConfig(ctx, cluster.Name, kubeConfig, c.origin); err != nil {
+		log.Error(err, "setting kubeconfig", "retryAfter", retryAfter)
+		if err := c.updateStatus(ctx, cluster, false, "ClientConnectionFailed", err.Error()); err != nil {
+			return reconcile.Result{}, err
+		} else {
+			return reconcile.Result{RequeueAfter: ptr.Deref(retryAfter, 0)}, nil
+		}
 	}
 	return reconcile.Result{}, c.updateStatus(ctx, cluster, true, "Active", "Connected")
 }
@@ -433,9 +492,10 @@ func newClustersReconciler(c client.Client, namespace string, gcInterval time.Du
 		localClient:     c,
 		configNamespace: namespace,
 		remoteClients:   make(map[string]*remoteClient),
-		wlUpdateCh:      make(chan event.GenericEvent, 10),
+		wlUpdateCh:      make(chan event.GenericEvent, eventChBufferSize),
 		gcInterval:      gcInterval,
 		origin:          origin,
+		watchEndedCh:    make(chan event.GenericEvent, eventChBufferSize),
 	}
 }
 
@@ -445,9 +505,18 @@ func (c *clustersReconciler) setupWithManager(mgr ctrl.Manager) error {
 		return err
 	}
 
+	syncHndl := handler.Funcs{
+		GenericFunc: func(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
+			q.Add(reconcile.Request{NamespacedName: types.NamespacedName{
+				Name: e.Object.GetName(),
+			}})
+		},
+	}
+
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&kueuealpha.MultiKueueCluster{}).
 		Watches(&corev1.Secret{}, &secretHandler{client: c.localClient}).
+		WatchesRawSource(&source.Channel{Source: c.watchEndedCh}, syncHndl).
 		Complete(c)
 }
 
diff --git a/pkg/controller/admissionchecks/multikueue/multikueuecluster_test.go b/pkg/controller/admissionchecks/multikueue/multikueuecluster_test.go
index 1d975baf6a..bf99f4dc55 100644
--- a/pkg/controller/admissionchecks/multikueue/multikueuecluster_test.go
+++ b/pkg/controller/admissionchecks/multikueue/multikueuecluster_test.go
@@ -17,8 +17,10 @@ limitations under the License.
 package multikueue
 
 import (
+	"context"
 	"errors"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
@@ -26,7 +28,9 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/watch"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 
 	kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
@@ -38,6 +42,7 @@ import (
 
 var (
 	errInvalidConfig = errors.New("invalid kubeconfig")
+	errCannotWatch   = errors.New("client cannot watch")
 )
 
 func fakeClientBuilder(kubeconfig []byte, options client.Options) (client.WithWatch, error) {
@@ -45,26 +50,36 @@ func fakeClientBuilder(kubeconfig []byte, options client.Options) (client.WithWa
 		return nil, errInvalidConfig
 	}
 	b, _ := getClientBuilder()
+	b = b.WithInterceptorFuncs(interceptor.Funcs{
+		Watch: func(ctx context.Context, client client.WithWatch, obj client.ObjectList, opts ...client.ListOption) (watch.Interface, error) {
+			if string(kubeconfig) == "nowatch" {
+				return nil, errCannotWatch
+			}
+			return client.Watch(ctx, obj, opts...)
+		},
+	})
 	return b.Build(), nil
 }
 
-func newTestClient(config string) *remoteClient {
+func newTestClient(config string, watchCancel func()) *remoteClient {
 	b, _ := getClientBuilder()
 	localClient := b.Build()
 	ret := &remoteClient{
 		kubeconfig:  []byte(config),
 		localClient: localClient,
+		watchCancel: watchCancel,
 
 		builderOverride: fakeClientBuilder,
 	}
-	ret.watchCancel = map[string]func(){
-		"test": func() {
-			ret.kubeconfig = []byte(string(ret.kubeconfig) + " canceled")
-		},
-	}
 	return ret
 }
 
+func setReconnectState(rc *remoteClient, a uint) *remoteClient {
+	rc.failedConnAttempts = a
+	rc.forceReconnect.Store(true)
+	return rc
+}
+
 func makeTestSecret(name string, kubeconfig string) corev1.Secret {
 	return corev1.Secret{
 		ObjectMeta: metav1.ObjectMeta{
@@ -78,6 +93,9 @@ func makeTestSecret(name string, kubeconfig string) corev1.Secret {
 }
 
 func TestUpdateConfig(t *testing.T) {
+	cancelCalledCount := 0
+	cancelCalled := func() { cancelCalledCount++ }
+
 	cases := map[string]struct {
 		reconcileFor  string
 		remoteClients map[string]*remoteClient
@@ -86,6 +104,8 @@ func TestUpdateConfig(t *testing.T) {
 
 		wantRemoteClients map[string]*remoteClient
 		wantClusters      []kueuealpha.MultiKueueCluster
+		wantRequeueAfter  time.Duration
+		wantCancelCalled  int
 	}{
 		"new valid client is added": {
 			reconcileFor: "worker1",
@@ -113,7 +133,7 @@ func TestUpdateConfig(t *testing.T) {
 				makeTestSecret("worker1", "worker1 kubeconfig"),
 			},
 			remoteClients: map[string]*remoteClient{
-				"worker1": newTestClient("worker1 old kubeconfig"),
+				"worker1": newTestClient("worker1 old kubeconfig", cancelCalled),
 			},
 			wantClusters: []kueuealpha.MultiKueueCluster{
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionTrue, "Active", "Connected").Obj(),
@@ -123,6 +143,7 @@ func TestUpdateConfig(t *testing.T) {
 					kubeconfig: []byte("worker1 kubeconfig"),
 				},
 			},
+			wantCancelCalled: 1,
 		},
 		"update client with valid path config": {
 			reconcileFor: "worker1",
@@ -130,7 +151,7 @@ func TestUpdateConfig(t *testing.T) {
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "testdata/worker1KubeConfig").Obj(),
 			},
 			remoteClients: map[string]*remoteClient{
-				"worker1": newTestClient("worker1 old kubeconfig"),
+				"worker1": newTestClient("worker1 old kubeconfig", cancelCalled),
 			},
 			wantClusters: []kueuealpha.MultiKueueCluster{
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "testdata/worker1KubeConfig").Active(metav1.ConditionTrue, "Active", "Connected").Obj(),
@@ -140,6 +161,7 @@ func TestUpdateConfig(t *testing.T) {
 					kubeconfig: []byte("worker1 kubeconfig"),
 				},
 			},
+			wantCancelCalled: 1,
 		},
 		"update client with invalid secret config": {
 			reconcileFor: "worker1",
@@ -150,11 +172,15 @@ func TestUpdateConfig(t *testing.T) {
 				makeTestSecret("worker1", "invalid"),
 			},
 			remoteClients: map[string]*remoteClient{
-				"worker1": newTestClient("worker1 old kubeconfig"),
+				"worker1": newTestClient("worker1 old kubeconfig", cancelCalled),
+			},
+			wantRemoteClients: map[string]*remoteClient{
+				"worker1": newTestClient("invalid", nil),
 			},
 			wantClusters: []kueuealpha.MultiKueueCluster{
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "invalid kubeconfig").Obj(),
 			},
+			wantCancelCalled: 1,
 		},
 		"update client with invalid path config": {
 			reconcileFor: "worker1",
@@ -162,11 +188,12 @@ func TestUpdateConfig(t *testing.T) {
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "").Obj(),
 			},
 			remoteClients: map[string]*remoteClient{
-				"worker1": newTestClient("worker1 old kubeconfig"),
+				"worker1": newTestClient("worker1 old kubeconfig", cancelCalled),
 			},
 			wantClusters: []kueuealpha.MultiKueueCluster{
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.PathLocationType, "").Active(metav1.ConditionFalse, "BadConfig", "open : no such file or directory").Obj(),
 			},
+			wantCancelCalled: 1,
 		},
 		"missing cluster is removed": {
 			reconcileFor: "worker2",
@@ -174,8 +201,8 @@ func TestUpdateConfig(t *testing.T) {
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(),
 			},
 			remoteClients: map[string]*remoteClient{
-				"worker1": newTestClient("worker1 kubeconfig"),
-				"worker2": newTestClient("worker2 kubeconfig"),
+				"worker1": newTestClient("worker1 kubeconfig", cancelCalled),
+				"worker2": newTestClient("worker2 kubeconfig", cancelCalled),
 			},
 			wantClusters: []kueuealpha.MultiKueueCluster{
 				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(),
@@ -185,6 +212,97 @@ func TestUpdateConfig(t *testing.T) {
 					kubeconfig: []byte("worker1 kubeconfig"),
 				},
 			},
+			wantCancelCalled: 1,
+		},
+		"update client config, nowatch": {
+			reconcileFor: "worker1",
+			clusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Obj(),
+			},
+			secrets: []corev1.Secret{
+				makeTestSecret("worker1", "nowatch"),
+			},
+			remoteClients: map[string]*remoteClient{
+				"worker1": newTestClient("worker1 old kubeconfig", cancelCalled),
+			},
+			wantRemoteClients: map[string]*remoteClient{
+				"worker1": setReconnectState(newTestClient("nowatch", nil), 1),
+			},
+			wantClusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(),
+			},
+			wantRequeueAfter: 5 * time.Second,
+			wantCancelCalled: 1,
+		},
+		"update client config, nowatch 3rd try": {
+			reconcileFor: "worker1",
+			clusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(),
+			},
+			secrets: []corev1.Secret{
+				makeTestSecret("worker1", "nowatch"),
+			},
+			remoteClients: map[string]*remoteClient{
+				"worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 2),
+			},
+			wantRemoteClients: map[string]*remoteClient{
+				"worker1": setReconnectState(newTestClient("nowatch", nil), 3),
+			},
+			wantClusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, "worker1").Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").Obj(),
+			},
+			wantRequeueAfter: 20 * time.Second,
+			wantCancelCalled: 1,
+		},
+		"failed attempts are set to 0 on successful connection": {
+			reconcileFor: "worker1",
+			clusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").
+					KubeConfig(kueuealpha.SecretLocationType, "worker1").
+					Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").
+					Obj(),
+			},
+			secrets: []corev1.Secret{
+				makeTestSecret("worker1", "good config"),
+			},
+			remoteClients: map[string]*remoteClient{
+				"worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 5),
+			},
+			wantRemoteClients: map[string]*remoteClient{
+				"worker1": newTestClient("good config", nil),
+			},
+			wantClusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").
+					KubeConfig(kueuealpha.SecretLocationType, "worker1").
+					Active(metav1.ConditionTrue, "Active", "Connected").
+					Obj(),
+			},
+			wantCancelCalled: 1,
+		},
+		"failed attempts are set to 0 on config change": {
+			reconcileFor: "worker1",
+			clusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").
+					KubeConfig(kueuealpha.SecretLocationType, "worker1").
+					Active(metav1.ConditionFalse, "ClientConnectionFailed", "client cannot watch").
+					Obj(),
+			},
+			secrets: []corev1.Secret{
+				makeTestSecret("worker1", "invalid"),
+			},
+			remoteClients: map[string]*remoteClient{
+				"worker1": setReconnectState(newTestClient("nowatch", cancelCalled), 5),
+			},
+			wantRemoteClients: map[string]*remoteClient{
+				"worker1": newTestClient("invalid", nil),
+			},
+			wantClusters: []kueuealpha.MultiKueueCluster{
+				*utiltesting.MakeMultiKueueCluster("worker1").
+					KubeConfig(kueuealpha.SecretLocationType, "worker1").
+					Active(metav1.ConditionFalse, "ClientConnectionFailed", "invalid kubeconfig").
+					Obj(),
+			},
+			wantCancelCalled: 1,
 		},
 	}
 
@@ -204,11 +322,20 @@ func TestUpdateConfig(t *testing.T) {
 			}
 			reconciler.builderOverride = fakeClientBuilder
 
-			_, gotErr := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: tc.reconcileFor}})
+			cancelCalledCount = 0
+			res, gotErr := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: tc.reconcileFor}})
 			if gotErr != nil {
 				t.Errorf("unexpected reconcile error: %s", gotErr)
 			}
 
+			if diff := cmp.Diff(tc.wantRequeueAfter, res.RequeueAfter); diff != "" {
+				t.Errorf("unexpected requeue after (-want/+got):\n%s", diff)
+			}
+
+			if tc.wantCancelCalled != cancelCalledCount {
+				t.Errorf("unexpected watch cancel call count want: %d,  got: %d", tc.wantCancelCalled, cancelCalledCount)
+			}
+
 			lst := &kueuealpha.MultiKueueClusterList{}
 			gotErr = c.List(ctx, lst)
 			if gotErr != nil {
@@ -222,7 +349,12 @@ func TestUpdateConfig(t *testing.T) {
 			}
 
 			if diff := cmp.Diff(tc.wantRemoteClients, reconciler.remoteClients, cmpopts.EquateEmpty(),
-				cmp.Comparer(func(a, b remoteClient) bool { return string(a.kubeconfig) == string(b.kubeconfig) })); diff != "" {
+				cmp.Comparer(func(a, b *remoteClient) bool {
+					if a.failedConnAttempts != b.failedConnAttempts {
+						return false
+					}
+					return string(a.kubeconfig) == string(b.kubeconfig)
+				})); diff != "" {
 				t.Errorf("unexpected controllers (-want/+got):\n%s", diff)
 			}
 		})
@@ -341,7 +473,7 @@ func TestRemoteClientGC(t *testing.T) {
 			worker1Builder = worker1Builder.WithLists(&kueue.WorkloadList{Items: tc.workersWorkloads}, &batchv1.JobList{Items: tc.workersJobs})
 			worker1Client := worker1Builder.Build()
 
-			w1remoteClient := newRemoteClient(managerClient, nil, defaultOrigin)
+			w1remoteClient := newRemoteClient(managerClient, nil, nil, defaultOrigin, "")
 			w1remoteClient.client = worker1Client
 
 			w1remoteClient.runGC(ctx)
diff --git a/pkg/controller/admissionchecks/multikueue/workload_test.go b/pkg/controller/admissionchecks/multikueue/workload_test.go
index eb5e62f6e3..5edcbd994b 100644
--- a/pkg/controller/admissionchecks/multikueue/workload_test.go
+++ b/pkg/controller/admissionchecks/multikueue/workload_test.go
@@ -451,7 +451,7 @@ func TestWlReconcile(t *testing.T) {
 			worker1Builder = worker1Builder.WithLists(&kueue.WorkloadList{Items: tc.worker1Workloads}, &batchv1.JobList{Items: tc.worker1Jobs})
 			worker1Client := worker1Builder.Build()
 
-			w1remoteClient := newRemoteClient(managerClient, nil, defaultOrigin)
+			w1remoteClient := newRemoteClient(managerClient, nil, nil, defaultOrigin, "")
 			w1remoteClient.client = worker1Client
 			cRec.remoteClients["worker1"] = w1remoteClient
 
@@ -481,7 +481,7 @@ func TestWlReconcile(t *testing.T) {
 				})
 				worker2Client = worker2Builder.Build()
 
-				w2remoteClient := newRemoteClient(managerClient, nil, defaultOrigin)
+				w2remoteClient := newRemoteClient(managerClient, nil, nil, defaultOrigin, "")
 				w2remoteClient.client = worker2Client
 				cRec.remoteClients["worker2"] = w2remoteClient
 
diff --git a/test/e2e/multikueue/e2e_test.go b/test/e2e/multikueue/e2e_test.go
index e287148bb3..6b49a084a1 100644
--- a/test/e2e/multikueue/e2e_test.go
+++ b/test/e2e/multikueue/e2e_test.go
@@ -17,6 +17,8 @@ limitations under the License.
 package mke2e
 
 import (
+	"os/exec"
+
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
@@ -340,4 +342,50 @@ var _ = ginkgo.Describe("MultiKueue", func() {
 			})
 		})
 	})
+	ginkgo.When("The connection to a worker cluster is unreliable", func() {
+		ginkgo.It("Should update the cluster status to reflect the connection state", func() {
+			ginkgo.By("Disconnecting worker1 container from the kind network", func() {
+				cmd := exec.Command("docker", "network", "disconnect", "kind", "kind-worker1-control-plane")
+				output, err := cmd.CombinedOutput()
+				gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output)
+			})
+
+			worker1ClusterKey := client.ObjectKeyFromObject(workerCluster1)
+
+			ginkgo.By("Waiting for the cluster do become inactive", func() {
+				readClient := &kueuealpha.MultiKueueCluster{}
+				gomega.Eventually(func(g gomega.Gomega) {
+					g.Expect(k8sManagerClient.Get(ctx, worker1ClusterKey, readClient)).To(gomega.Succeed())
+					g.Expect(readClient.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
+						metav1.Condition{
+							Type:   kueuealpha.MultiKueueClusterActive,
+							Status: metav1.ConditionFalse,
+							Reason: "ClientConnectionFailed",
+						},
+						cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "Message"))))
+				}, util.Timeout, util.Interval).Should(gomega.Succeed())
+			})
+
+			ginkgo.By("Reconnecting worker1 container to the kind network", func() {
+				cmd := exec.Command("docker", "network", "connect", "kind", "kind-worker1-control-plane")
+				output, err := cmd.CombinedOutput()
+				gomega.Expect(err).NotTo(gomega.HaveOccurred(), "%s: %s", err, output)
+			})
+
+			ginkgo.By("Waiting for the cluster do become active", func() {
+				readClient := &kueuealpha.MultiKueueCluster{}
+				gomega.Eventually(func(g gomega.Gomega) {
+					g.Expect(k8sManagerClient.Get(ctx, worker1ClusterKey, readClient)).To(gomega.Succeed())
+					g.Expect(readClient.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
+						metav1.Condition{
+							Type:    kueuealpha.MultiKueueClusterActive,
+							Status:  metav1.ConditionTrue,
+							Reason:  "Active",
+							Message: "Connected",
+						},
+						cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"))))
+				}, util.Timeout, util.Interval).Should(gomega.Succeed())
+			})
+		})
+	})
 })

From 6c72c7bcd9b881966448c85b245db613ca28f1c7 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Thu, 7 Mar 2024 10:59:41 -0800
Subject: [PATCH 11/48] Generate an artifact for the visibility-api (#1813)

Change-Id: I5e44372ee23b253c6eeb2a71c8a97dd0370e5072

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 3acfe1016a..5b72469852 100644
--- a/Makefile
+++ b/Makefile
@@ -289,6 +289,7 @@ artifacts: kustomize yq helm
 	$(KUSTOMIZE) build config/dev -o artifacts/manifests-dev.yaml
 	$(KUSTOMIZE) build config/alpha-enabled -o artifacts/manifests-alpha-enabled.yaml
 	$(KUSTOMIZE) build config/prometheus -o artifacts/prometheus.yaml
+	$(KUSTOMIZE) build config/visibility -o artifacts/visibility-api.yaml
 	@$(call clean-manifests)
 	# Update the image tag and policy
 	$(YQ)  e  '.controllerManager.manager.image.repository = "$(IMAGE_REPO)" | .controllerManager.manager.image.tag = "$(GIT_TAG)" | .controllerManager.manager.image.pullPolicy = "IfNotPresent"' -i charts/kueue/values.yaml

From e65c872ae0aca1fc23215e872bb8286b69445b25 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Wed, 13 Mar 2024 05:07:34 -0700
Subject: [PATCH 12/48] fix incorrect quota management when lendingLimit
 enabled in preemption (#1826)

Signed-off-by: B1F030 <b1fzhang@gmail.com>
Co-authored-by: B1F030 <b1fzhang@gmail.com>
Co-authored-by: kerthcet <kerthcet@gmail.com>
---
 pkg/cache/clusterqueue.go                   |  23 +
 pkg/cache/snapshot.go                       |  14 +-
 pkg/cache/snapshot_test.go                  | 443 ++++++++++++++++++++
 pkg/scheduler/preemption/preemption_test.go |  29 +-
 4 files changed, 501 insertions(+), 8 deletions(-)

diff --git a/pkg/cache/clusterqueue.go b/pkg/cache/clusterqueue.go
index 7fe289a09e..949338e7ff 100644
--- a/pkg/cache/clusterqueue.go
+++ b/pkg/cache/clusterqueue.go
@@ -453,6 +453,29 @@ func updateUsage(wi *workload.Info, flvUsage FlavorResourceQuantities, m int64)
 	}
 }
 
+func updateCohortUsage(wi *workload.Info, cq *ClusterQueue, m int64) {
+	for _, ps := range wi.TotalRequests {
+		for wlRes, wlResFlv := range ps.Flavors {
+			v, wlResExist := ps.Requests[wlRes]
+			flv, flvExist := cq.Cohort.Usage[wlResFlv]
+			if flvExist && wlResExist {
+				if _, exists := flv[wlRes]; exists {
+					after := cq.Usage[wlResFlv][wlRes] - cq.guaranteedQuota(wlResFlv, wlRes)
+					// rollback update cq.Usage
+					before := after - v*m
+					if before > 0 {
+						flv[wlRes] -= before
+					}
+					// simulate updating cq.Usage
+					if after > 0 {
+						flv[wlRes] += after
+					}
+				}
+			}
+		}
+	}
+}
+
 func (c *ClusterQueue) addLocalQueue(q *kueue.LocalQueue) error {
 	qKey := queueKey(q)
 	if _, ok := c.localQueues[qKey]; ok {
diff --git a/pkg/cache/snapshot.go b/pkg/cache/snapshot.go
index e96cbd4867..3a0cc60dde 100644
--- a/pkg/cache/snapshot.go
+++ b/pkg/cache/snapshot.go
@@ -41,18 +41,26 @@ func (s *Snapshot) RemoveWorkload(wl *workload.Info) {
 	delete(cq.Workloads, workload.Key(wl.Obj))
 	updateUsage(wl, cq.Usage, -1)
 	if cq.Cohort != nil {
-		updateUsage(wl, cq.Cohort.Usage, -1)
+		if features.Enabled(features.LendingLimit) {
+			updateCohortUsage(wl, cq, -1)
+		} else {
+			updateUsage(wl, cq.Cohort.Usage, -1)
+		}
 	}
 }
 
-// AddWorkload removes a workload from its corresponding ClusterQueue and
+// AddWorkload adds a workload from its corresponding ClusterQueue and
 // updates resource usage.
 func (s *Snapshot) AddWorkload(wl *workload.Info) {
 	cq := s.ClusterQueues[wl.ClusterQueue]
 	cq.Workloads[workload.Key(wl.Obj)] = wl
 	updateUsage(wl, cq.Usage, 1)
 	if cq.Cohort != nil {
-		updateUsage(wl, cq.Cohort.Usage, 1)
+		if features.Enabled(features.LendingLimit) {
+			updateCohortUsage(wl, cq, 1)
+		} else {
+			updateUsage(wl, cq.Cohort.Usage, 1)
+		}
 	}
 }
 
diff --git a/pkg/cache/snapshot_test.go b/pkg/cache/snapshot_test.go
index d54ae71dbb..abc6a8ab71 100644
--- a/pkg/cache/snapshot_test.go
+++ b/pkg/cache/snapshot_test.go
@@ -864,3 +864,446 @@ func TestSnapshotAddRemoveWorkload(t *testing.T) {
 		})
 	}
 }
+
+func TestSnapshotAddRemoveWorkloadWithLendingLimit(t *testing.T) {
+	_ = features.SetEnable(features.LendingLimit, true)
+	flavors := []*kueue.ResourceFlavor{
+		utiltesting.MakeResourceFlavor("default").Obj(),
+	}
+	clusterQueues := []*kueue.ClusterQueue{
+		utiltesting.MakeClusterQueue("lend-a").
+			Cohort("lend").
+			ResourceGroup(
+				*utiltesting.MakeFlavorQuotas("default").Resource(corev1.ResourceCPU, "10", "", "4").Obj(),
+			).
+			Preemption(kueue.ClusterQueuePreemption{
+				WithinClusterQueue:  kueue.PreemptionPolicyLowerPriority,
+				ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority,
+			}).
+			Obj(),
+		utiltesting.MakeClusterQueue("lend-b").
+			Cohort("lend").
+			ResourceGroup(
+				*utiltesting.MakeFlavorQuotas("default").Resource(corev1.ResourceCPU, "10", "", "6").Obj(),
+			).
+			Preemption(kueue.ClusterQueuePreemption{
+				WithinClusterQueue:  kueue.PreemptionPolicyNever,
+				ReclaimWithinCohort: kueue.PreemptionPolicyAny,
+			}).
+			Obj(),
+	}
+	workloads := []kueue.Workload{
+		*utiltesting.MakeWorkload("lend-a-1", "").
+			Request(corev1.ResourceCPU, "1").
+			ReserveQuota(utiltesting.MakeAdmission("lend-a").Assignment(corev1.ResourceCPU, "default", "1").Obj()).
+			Obj(),
+		*utiltesting.MakeWorkload("lend-a-2", "").
+			Request(corev1.ResourceCPU, "9").
+			ReserveQuota(utiltesting.MakeAdmission("lend-a").Assignment(corev1.ResourceCPU, "default", "9").Obj()).
+			Obj(),
+		*utiltesting.MakeWorkload("lend-a-3", "").
+			Request(corev1.ResourceCPU, "6").
+			ReserveQuota(utiltesting.MakeAdmission("lend-a").Assignment(corev1.ResourceCPU, "default", "6").Obj()).
+			Obj(),
+		*utiltesting.MakeWorkload("lend-b-1", "").
+			Request(corev1.ResourceCPU, "4").
+			ReserveQuota(utiltesting.MakeAdmission("lend-b").Assignment(corev1.ResourceCPU, "default", "4").Obj()).
+			Obj(),
+	}
+
+	ctx := context.Background()
+	cl := utiltesting.NewClientBuilder().WithLists(&kueue.WorkloadList{Items: workloads}).Build()
+
+	cqCache := New(cl)
+	for _, flv := range flavors {
+		cqCache.AddOrUpdateResourceFlavor(flv)
+	}
+	for _, cq := range clusterQueues {
+		if err := cqCache.AddClusterQueue(ctx, cq); err != nil {
+			t.Fatalf("Couldn't add ClusterQueue to cache: %v", err)
+		}
+	}
+	wlInfos := make(map[string]*workload.Info, len(workloads))
+	for _, cq := range cqCache.clusterQueues {
+		for _, wl := range cq.Workloads {
+			wlInfos[workload.Key(wl.Obj)] = wl
+		}
+	}
+	initialSnapshot := cqCache.Snapshot()
+	initialCohortResources := initialSnapshot.ClusterQueues["lend-a"].Cohort.RequestableResources
+	cases := map[string]struct {
+		remove []string
+		add    []string
+		want   Snapshot
+	}{
+		"remove all then add all": {
+			remove: []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			add:    []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			want:   initialSnapshot,
+		},
+		"remove all": {
+			remove: []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 0},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 0},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 0},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove workload, but still using quota over GuaranteedQuota": {
+			remove: []string{"/lend-a-2"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 1_000},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 7_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 4_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove wokload, using same quota as GuaranteedQuota": {
+			remove: []string{"/lend-a-1", "/lend-a-2"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 0},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 6_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 4_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove workload, using less quota than GuaranteedQuota": {
+			remove: []string{"/lend-a-2", "/lend-a-3"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 0},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 1_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 4_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove all then add workload, using less quota than GuaranteedQuota": {
+			remove: []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			add:    []string{"/lend-a-1"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 0},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 1_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 0},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove all then add workload, using same quota as GuaranteedQuota": {
+			remove: []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			add:    []string{"/lend-a-3"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 0},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 6_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 0},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+		"remove all then add workload, using quota over GuaranteedQuota": {
+			remove: []string{"/lend-a-1", "/lend-a-2", "/lend-a-3", "/lend-b-1"},
+			add:    []string{"/lend-a-2"},
+			want: func() Snapshot {
+				cohort := &Cohort{
+					Name:                          "lend",
+					AllocatableResourceGeneration: 2,
+					RequestableResources:          initialCohortResources,
+					Usage: FlavorResourceQuantities{
+						"default": {corev1.ResourceCPU: 3_000},
+					},
+				}
+				return Snapshot{
+					ClusterQueues: map[string]*ClusterQueue{
+						"lend-a": {
+							Name:                          "lend-a",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-a"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 9_000},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 6_000,
+								},
+							},
+						},
+						"lend-b": {
+							Name:                          "lend-b",
+							Cohort:                        cohort,
+							Workloads:                     make(map[string]*workload.Info),
+							ResourceGroups:                cqCache.clusterQueues["lend-b"].ResourceGroups,
+							FlavorFungibility:             defaultFlavorFungibility,
+							AllocatableResourceGeneration: 1,
+							Usage: FlavorResourceQuantities{
+								"default": {corev1.ResourceCPU: 0},
+							},
+							GuaranteedQuota: FlavorResourceQuantities{
+								"default": {
+									corev1.ResourceCPU: 4_000,
+								},
+							},
+						},
+					},
+				}
+			}(),
+		},
+	}
+	cmpOpts := append(snapCmpOpts,
+		cmpopts.IgnoreFields(ClusterQueue{}, "NamespaceSelector", "Preemption", "Status"),
+		cmpopts.IgnoreFields(Snapshot{}, "ResourceFlavors"),
+		cmpopts.IgnoreTypes(&workload.Info{}))
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			snap := cqCache.Snapshot()
+			for _, name := range tc.remove {
+				snap.RemoveWorkload(wlInfos[name])
+			}
+			for _, name := range tc.add {
+				snap.AddWorkload(wlInfos[name])
+			}
+			if diff := cmp.Diff(tc.want, snap, cmpOpts...); diff != "" {
+				t.Errorf("Unexpected snapshot state after operations (-want,+got):\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/pkg/scheduler/preemption/preemption_test.go b/pkg/scheduler/preemption/preemption_test.go
index f4fc733231..36f948ec79 100644
--- a/pkg/scheduler/preemption/preemption_test.go
+++ b/pkg/scheduler/preemption/preemption_test.go
@@ -206,7 +206,6 @@ func TestPreemption(t *testing.T) {
 			Cohort("cohort-lend").
 			ResourceGroup(*utiltesting.MakeFlavorQuotas("default").
 				Resource(corev1.ResourceCPU, "6", "", "4").
-				Resource(corev1.ResourceMemory, "3Gi", "", "2Gi").
 				Obj(),
 			).
 			Preemption(kueue.ClusterQueuePreemption{
@@ -217,13 +216,12 @@ func TestPreemption(t *testing.T) {
 		utiltesting.MakeClusterQueue("lend2").
 			Cohort("cohort-lend").
 			ResourceGroup(*utiltesting.MakeFlavorQuotas("default").
-				Resource(corev1.ResourceCPU, "6", "", "4").
-				Resource(corev1.ResourceMemory, "3Gi", "", "2Gi").
+				Resource(corev1.ResourceCPU, "6", "", "2").
 				Obj(),
 			).
 			Preemption(kueue.ClusterQueuePreemption{
-				WithinClusterQueue:  kueue.PreemptionPolicyNever,
-				ReclaimWithinCohort: kueue.PreemptionPolicyAny,
+				WithinClusterQueue:  kueue.PreemptionPolicyLowerPriority,
+				ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority,
 			}).
 			Obj(),
 	}
@@ -1043,6 +1041,27 @@ func TestPreemption(t *testing.T) {
 			wantPreempted:      sets.New("/lend1-low", "/lend2-low"),
 			enableLendingLimit: true,
 		},
+		"cannot preempt from other ClusterQueues if exceeds requestable quota including lending limit": {
+			admitted: []kueue.Workload{
+				*utiltesting.MakeWorkload("lend2-low", "").
+					Priority(-1).
+					Request(corev1.ResourceCPU, "10").
+					ReserveQuota(utiltesting.MakeAdmission("lend2").Assignment(corev1.ResourceCPU, "default", "10").Obj()).
+					Obj(),
+			},
+			incoming: utiltesting.MakeWorkload("in", "").
+				Request(corev1.ResourceCPU, "9").
+				Obj(),
+			targetCQ: "lend1",
+			assignment: singlePodSetAssignment(flavorassigner.ResourceAssignment{
+				corev1.ResourceCPU: &flavorassigner.FlavorAssignment{
+					Name: "default",
+					Mode: flavorassigner.Preempt,
+				},
+			}),
+			wantPreempted:      nil,
+			enableLendingLimit: true,
+		},
 	}
 	for name, tc := range cases {
 		t.Run(name, func(t *testing.T) {

From 27dac3dab76335792a43c6a63fe4232ebc322103 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Wed, 13 Mar 2024 06:57:46 -0700
Subject: [PATCH 13/48] [release-0.6] Avoid requeue on ClusterQueue status
 update (#1832)

* Avoid requeue on ClusterQueue status update

Change-Id: Id028414a730a238549b7fddf5d42a2eafdf959c3

* Reclaimable pods can put pod out of inadmissible

Change-Id: Id52ef3c92abd84317c4a7a71c3a20f6f28d67d25

* Delay requeueing based on RequeueState

Change-Id: I5431e741cb30541a846b65fb26aa0b7b631eb80d

---------

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 .../core/clusterqueue_controller.go           |  3 +-
 pkg/controller/core/workload_controller.go    | 34 +++++++++++++++++--
 pkg/metrics/metrics.go                        |  6 ++--
 pkg/queue/cluster_queue_impl.go               |  1 +
 pkg/queue/cluster_queue_impl_test.go          | 12 +++++++
 pkg/queue/manager.go                          |  4 +--
 pkg/queue/manager_test.go                     |  4 +--
 test/integration/scheduler/scheduler_test.go  |  3 ++
 test/util/util.go                             | 15 ++++++++
 9 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/pkg/controller/core/clusterqueue_controller.go b/pkg/controller/core/clusterqueue_controller.go
index 8d15d75b0a..757eb2185b 100644
--- a/pkg/controller/core/clusterqueue_controller.go
+++ b/pkg/controller/core/clusterqueue_controller.go
@@ -302,11 +302,12 @@ func (r *ClusterQueueReconciler) Update(e event.UpdateEvent) bool {
 		return true
 	}
 	defer r.notifyWatchers(oldCq, newCq)
+	specUpdated := !equality.Semantic.DeepEqual(oldCq.Spec, newCq.Spec)
 
 	if err := r.cache.UpdateClusterQueue(newCq); err != nil {
 		log.Error(err, "Failed to update clusterQueue in cache")
 	}
-	if err := r.qManager.UpdateClusterQueue(context.Background(), newCq); err != nil {
+	if err := r.qManager.UpdateClusterQueue(context.Background(), newCq, specUpdated); err != nil {
 		log.Error(err, "Failed to update clusterQueue in queue manager")
 	}
 
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index d7c7bebcac..9c8aa9666f 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -24,6 +24,7 @@ import (
 	"github.com/go-logr/logr"
 	corev1 "k8s.io/api/core/v1"
 	nodev1 "k8s.io/api/node/v1"
+	"k8s.io/apimachinery/pkg/api/equality"
 	apimeta "k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -541,9 +542,38 @@ func (r *WorkloadReconciler) Update(e event.UpdateEvent) bool {
 				log.Error(err, "Failed to delete workload from cache")
 			}
 		})
-		if !r.queues.AddOrUpdateWorkload(wlCopy) {
-			log.V(2).Info("Queue for workload didn't exist; ignored for now")
+		var backoff time.Duration
+		if wlCopy.Status.RequeueState != nil && wlCopy.Status.RequeueState.RequeueAt != nil {
+			backoff = time.Until(wl.Status.RequeueState.RequeueAt.Time)
+		}
+		if backoff <= 0 {
+			if !r.queues.AddOrUpdateWorkload(wlCopy) {
+				log.V(2).Info("Queue for workload didn't exist; ignored for now")
+			}
+		} else {
+			log.V(3).Info("Workload to be requeued after backoff", "backoff", backoff, "requeueAt", wl.Status.RequeueState.RequeueAt.Time)
+			time.AfterFunc(backoff, func() {
+				updatedWl := kueue.Workload{}
+				err := r.client.Get(ctx, client.ObjectKeyFromObject(wl), &updatedWl)
+				if err == nil && workloadStatus(&updatedWl) == pending {
+					if !r.queues.AddOrUpdateWorkload(wlCopy) {
+						log.V(2).Info("Queue for workload didn't exist; ignored for now")
+					} else {
+						log.V(3).Info("Workload requeued after backoff")
+					}
+				}
+			})
 		}
+	case prevStatus == admitted && status == admitted && !equality.Semantic.DeepEqual(oldWl.Status.ReclaimablePods, wl.Status.ReclaimablePods):
+		// trigger the move of associated inadmissibleWorkloads, if there are any.
+		r.queues.QueueAssociatedInadmissibleWorkloadsAfter(ctx, wl, func() {
+			// Update the workload from cache while holding the queues lock
+			// to guarantee that requeued workloads are taken into account before
+			// the next scheduling cycle.
+			if err := r.cache.UpdateWorkload(oldWl, wlCopy); err != nil {
+				log.Error(err, "Failed to delete workload from cache")
+			}
+		})
 
 	default:
 		// Workload update in the cache is handled here; however, some fields are immutable
diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go
index 665e215c5d..74ded34f31 100644
--- a/pkg/metrics/metrics.go
+++ b/pkg/metrics/metrics.go
@@ -55,7 +55,7 @@ const (
 var (
 	CQStatuses = []ClusterQueueStatus{CQStatusPending, CQStatusActive, CQStatusTerminating}
 
-	admissionAttemptsTotal = prometheus.NewCounterVec(
+	AdmissionAttemptsTotal = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: constants.KueueName,
 			Name:      "admission_attempts_total",
@@ -177,7 +177,7 @@ For a ClusterQueue, the metric only reports a value of 1 for one of the statuses
 )
 
 func AdmissionAttempt(result AdmissionResult, duration time.Duration) {
-	admissionAttemptsTotal.WithLabelValues(string(result)).Inc()
+	AdmissionAttemptsTotal.WithLabelValues(string(result)).Inc()
 	admissionAttemptDuration.WithLabelValues(string(result)).Observe(duration.Seconds())
 }
 
@@ -290,7 +290,7 @@ func ClearClusterQueueResourceReservations(cqName, flavor, resource string) {
 
 func Register() {
 	metrics.Registry.MustRegister(
-		admissionAttemptsTotal,
+		AdmissionAttemptsTotal,
 		admissionAttemptDuration,
 		PendingWorkloads,
 		ReservingActiveWorkloads,
diff --git a/pkg/queue/cluster_queue_impl.go b/pkg/queue/cluster_queue_impl.go
index 84d2f0bfbd..cdde522513 100644
--- a/pkg/queue/cluster_queue_impl.go
+++ b/pkg/queue/cluster_queue_impl.go
@@ -116,6 +116,7 @@ func (c *clusterQueueBase) PushOrUpdate(wInfo *workload.Info) {
 		// to potentially become admissible, unless the Eviction status changed
 		// which can affect the workloads order in the queue.
 		if equality.Semantic.DeepEqual(oldInfo.Obj.Spec, wInfo.Obj.Spec) &&
+			equality.Semantic.DeepEqual(oldInfo.Obj.Status.ReclaimablePods, wInfo.Obj.Status.ReclaimablePods) &&
 			equality.Semantic.DeepEqual(apimeta.FindStatusCondition(oldInfo.Obj.Status.Conditions, kueue.WorkloadEvicted),
 				apimeta.FindStatusCondition(wInfo.Obj.Status.Conditions, kueue.WorkloadEvicted)) {
 			c.inadmissibleWorkloads[key] = wInfo
diff --git a/pkg/queue/cluster_queue_impl_test.go b/pkg/queue/cluster_queue_impl_test.go
index 5297b0fde3..bdaa3bf352 100644
--- a/pkg/queue/cluster_queue_impl_test.go
+++ b/pkg/queue/cluster_queue_impl_test.go
@@ -328,6 +328,18 @@ func TestClusterQueueImpl(t *testing.T) {
 			inadmissibleWorkloadsToRequeue: []*workload.Info{workload.NewInfo(workloads[1]), workload.NewInfo(workloads[1])},
 			wantPending:                    1,
 		},
+		"update reclaimable pods in inadmissible": {
+			inadmissibleWorkloadsToRequeue: []*workload.Info{
+				workload.NewInfo(utiltesting.MakeWorkload("w", "").PodSets(*utiltesting.MakePodSet("main", 1).Request(corev1.ResourceCPU, "1").Obj()).Obj()),
+			},
+			workloadsToUpdate: []*kueue.Workload{
+				utiltesting.MakeWorkload("w", "").PodSets(*utiltesting.MakePodSet("main", 2).Request(corev1.ResourceCPU, "1").Obj()).
+					ReclaimablePods(kueue.ReclaimablePod{Name: "main", Count: 1}).
+					Obj(),
+			},
+			wantActiveWorkloads: []string{"/w"},
+			wantPending:         1,
+		},
 	}
 
 	for name, test := range tests {
diff --git a/pkg/queue/manager.go b/pkg/queue/manager.go
index f7f0b33db7..14f6bb2581 100644
--- a/pkg/queue/manager.go
+++ b/pkg/queue/manager.go
@@ -141,7 +141,7 @@ func (m *Manager) AddClusterQueue(ctx context.Context, cq *kueue.ClusterQueue) e
 	return nil
 }
 
-func (m *Manager) UpdateClusterQueue(ctx context.Context, cq *kueue.ClusterQueue) error {
+func (m *Manager) UpdateClusterQueue(ctx context.Context, cq *kueue.ClusterQueue, specUpdated bool) error {
 	m.Lock()
 	defer m.Unlock()
 	cqImpl, ok := m.clusterQueues[cq.Name]
@@ -162,7 +162,7 @@ func (m *Manager) UpdateClusterQueue(ctx context.Context, cq *kueue.ClusterQueue
 
 	// TODO(#8): Selectively move workloads based on the exact event.
 	// If any workload becomes admissible or the queue becomes active.
-	if m.queueAllInadmissibleWorkloadsInCohort(ctx, cqImpl) || (!oldActive && cqImpl.Active()) {
+	if (specUpdated && m.queueAllInadmissibleWorkloadsInCohort(ctx, cqImpl)) || (!oldActive && cqImpl.Active()) {
 		m.reportPendingWorkloads(cq.Name, cqImpl)
 		m.Broadcast()
 	}
diff --git a/pkg/queue/manager_test.go b/pkg/queue/manager_test.go
index c42509b836..f170f2072c 100644
--- a/pkg/queue/manager_test.go
+++ b/pkg/queue/manager_test.go
@@ -166,7 +166,7 @@ func TestUpdateClusterQueue(t *testing.T) {
 
 	// Put cq2 in the same cohort as cq1.
 	clusterQueues[1].Spec.Cohort = clusterQueues[0].Spec.Cohort
-	if err := manager.UpdateClusterQueue(ctx, clusterQueues[1]); err != nil {
+	if err := manager.UpdateClusterQueue(ctx, clusterQueues[1], true); err != nil {
 		t.Fatalf("Failed to update ClusterQueue: %v", err)
 	}
 
@@ -225,7 +225,7 @@ func TestClusterQueueToActive(t *testing.T) {
 		t.Fatalf("Failed adding clusterQueue %v", err)
 	}
 
-	if err := manager.UpdateClusterQueue(ctx, runningCq); err != nil {
+	if err := manager.UpdateClusterQueue(ctx, runningCq, false); err != nil {
 		t.Fatalf("Failed to update ClusterQueue: %v", err)
 	}
 
diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go
index f6d2b00a7d..f139809fae 100644
--- a/test/integration/scheduler/scheduler_test.go
+++ b/test/integration/scheduler/scheduler_test.go
@@ -616,12 +616,14 @@ var _ = ginkgo.Describe("Scheduler", func() {
 			util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true)
 		})
 		ginkgo.It("Should re-enqueue by the update event of ClusterQueue", func() {
+			metrics.AdmissionAttemptsTotal.Reset()
 			wl := testing.MakeWorkload("on-demand-wl", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "6").Obj()
 			gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed())
 			util.ExpectWorkloadsToBePending(ctx, k8sClient, wl)
 			util.ExpectPendingWorkloadsMetric(cq, 0, 1)
 			util.ExpectReservingActiveWorkloadsMetric(cq, 0)
 			util.ExpectAdmittedWorkloadsTotalMetric(cq, 0)
+			util.ExpectAdmissionAttemptsMetric(1, 0)
 
 			ginkgo.By("updating ClusterQueue")
 			updatedCq := &kueue.ClusterQueue{}
@@ -645,6 +647,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
 			util.ExpectPendingWorkloadsMetric(cq, 0, 0)
 			util.ExpectReservingActiveWorkloadsMetric(cq, 1)
 			util.ExpectAdmittedWorkloadsTotalMetric(cq, 1)
+			util.ExpectAdmissionAttemptsMetric(1, 1)
 		})
 	})
 
diff --git a/test/util/util.go b/test/util/util.go
index 882d013bf8..2c38cf86cb 100644
--- a/test/util/util.go
+++ b/test/util/util.go
@@ -372,6 +372,21 @@ func ExpectWorkloadToBeAdmittedAs(ctx context.Context, k8sClient client.Client,
 	}, Timeout, Interval).Should(gomega.BeComparableTo(admission))
 }
 
+var attemptStatuses = []metrics.AdmissionResult{metrics.AdmissionResultInadmissible, metrics.AdmissionResultSuccess}
+
+func ExpectAdmissionAttemptsMetric(pending, admitted int) {
+	vals := []int{pending, admitted}
+
+	for i, status := range attemptStatuses {
+		metric := metrics.AdmissionAttemptsTotal.WithLabelValues(string(status))
+		gomega.EventuallyWithOffset(1, func() int {
+			v, err := testutil.GetCounterMetricValue(metric)
+			gomega.Expect(err).ToNot(gomega.HaveOccurred())
+			return int(v)
+		}, Timeout, Interval).Should(gomega.Equal(vals[i]), "pending_workloads with status=%s", status)
+	}
+}
+
 var pendingStatuses = []string{metrics.PendingStatusActive, metrics.PendingStatusInadmissible}
 
 func ExpectPendingWorkloadsMetric(cq *kueue.ClusterQueue, active, inadmissible int) {

From f363420338f3f8bae3e1040f166f880c89ab5160 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Wed, 13 Mar 2024 13:56:37 -0700
Subject: [PATCH 14/48] Fix number of reconcilers for Pod integration (#1837)

Change-Id: If8f430731a6967a361d0f5172ed408f57a0687bc

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 config/components/manager/controller_manager_config.yaml | 2 +-
 pkg/controller/jobs/pod/pod_controller.go                | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/config/components/manager/controller_manager_config.yaml b/config/components/manager/controller_manager_config.yaml
index 15adb60561..e698ecf762 100644
--- a/config/components/manager/controller_manager_config.yaml
+++ b/config/components/manager/controller_manager_config.yaml
@@ -13,7 +13,7 @@ leaderElection:
 controller:
   groupKindConcurrency:
     Job.batch: 5
-    Pod.: 5
+    Pod: 5
     Workload.kueue.x-k8s.io: 5
     LocalQueue.kueue.x-k8s.io: 1
     ClusterQueue.kueue.x-k8s.io: 1
diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index 071147eaed..d2eb86e040 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -43,6 +43,7 @@ import (
 	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 
 	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
@@ -108,9 +109,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
 }
 
 func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
+	concurrency := mgr.GetControllerOptions().GroupKindConcurrency[gvk.GroupKind().String()]
+	ctrl.Log.V(3).Info("Setting up Pod reconciler", "concurrency", concurrency)
 	return ctrl.NewControllerManagedBy(mgr).
 		Watches(&corev1.Pod{}, &podEventHandler{cleanedUpPodsExpectations: r.expectationsStore}).Named("v1_pod").
 		Watches(&kueue.Workload{}, &workloadHandler{}).
+		WithOptions(controller.Options{
+			MaxConcurrentReconciles: concurrency,
+		}).
 		Complete(r)
 }
 

From d7df9e5141d334ce02a99e507cea8bdc23156b4c Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Wed, 13 Mar 2024 22:47:42 -0700
Subject: [PATCH 15/48] [e2e] Restore the manager's image from the bash
 scripts. (#1830) (#1836)

* [e2e] Return ginkgo exit code

* [e2e] Restore manager's image in sh

* Cleanup and remarks

Change-Id: I66ccb5173149458dfcdd64d117dd81cd3ded9c5c

Co-authored-by: Traian Schiau <55734665+trasc@users.noreply.github.com>
---
 Makefile                    | 2 --
 hack/e2e-common.sh          | 9 +++++++++
 hack/e2e-test.sh            | 3 ++-
 hack/multikueue-e2e-test.sh | 3 ++-
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 5b72469852..f6732d900c 100644
--- a/Makefile
+++ b/Makefile
@@ -190,14 +190,12 @@ run-test-e2e-%: K8S_VERSION = $(@:run-test-e2e-%=%)
 run-test-e2e-%: FORCE
 	@echo Running e2e for k8s ${K8S_VERSION}
 	E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" ./hack/e2e-test.sh
-	@$(call clean-manifests)
 
 JOBSET_VERSION = $(shell $(GO_CMD) list -m -f "{{.Version}}" sigs.k8s.io/jobset)
 run-test-multikueue-e2e-%: K8S_VERSION = $(@:run-test-multikueue-e2e-%=%)
 run-test-multikueue-e2e-%: FORCE
 	@echo Running multikueue e2e for k8s ${K8S_VERSION}
 	E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) ./hack/multikueue-e2e-test.sh
-	@$(call clean-manifests)
 
 .PHONY: ci-lint
 ci-lint: golangci-lint
diff --git a/hack/e2e-common.sh b/hack/e2e-common.sh
index b664e4e8ee..749cad1f6c 100644
--- a/hack/e2e-common.sh
+++ b/hack/e2e-common.sh
@@ -14,6 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+export KUSTOMIZE="$ROOT_DIR"/bin/kustomize
+export GINKGO="$ROOT_DIR"/bin/ginkgo
+export KIND="$ROOT_DIR"/bin/kind
+export YQ="$ROOT_DIR"/bin/yq
 
 
 # $1 - cluster name
@@ -57,3 +61,8 @@ function cluster_kueue_deploy {
     fi
 }
 
+export INITIAL_IMAGE=$($YQ '.images[] | select(.name == "controller") | [.newName, .newTag] | join(":")' config/components/manager/kustomization.yaml)
+
+function restore_managers_image {
+    (cd config/components/manager && $KUSTOMIZE edit set image controller=$INITIAL_IMAGE)
+}
diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh
index 61f60202c4..7039875bb9 100755
--- a/hack/e2e-test.sh
+++ b/hack/e2e-test.sh
@@ -35,7 +35,8 @@ function cleanup {
         fi
 	cluster_cleanup $KIND_CLUSTER_NAME
     fi
-    exit 0
+    #do the image restore here for the case when an error happened during deploy
+    restore_managers_image
 }
 
 function startup {
diff --git a/hack/multikueue-e2e-test.sh b/hack/multikueue-e2e-test.sh
index 7fcd903a07..b60ac50698 100755
--- a/hack/multikueue-e2e-test.sh
+++ b/hack/multikueue-e2e-test.sh
@@ -46,7 +46,8 @@ function cleanup {
         cluster_cleanup $WORKER1_KIND_CLUSTER_NAME
         cluster_cleanup $WORKER2_KIND_CLUSTER_NAME
     fi
-    exit 0
+    #do the image restore here for the case when an error happened during deploy
+    restore_managers_image
 }
 
 

From 3adac9075ef7fa150f9c5d8b23308b5c38414e1c Mon Sep 17 00:00:00 2001
From: Yuki Iwai <yuki.iwai.tz@gmail.com>
Date: Fri, 15 Mar 2024 02:29:53 +0900
Subject: [PATCH 16/48] WaitForPodsReady: Reset the requeueState while
 reconciling instead of webhook (#1843)

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
---
 charts/kueue/templates/webhook/webhook.yaml   |  2 --
 config/components/webhook/manifests.yaml      |  2 --
 pkg/controller/core/workload_controller.go    |  6 ++++
 pkg/webhooks/workload_webhook.go              |  6 +---
 pkg/webhooks/workload_webhook_test.go         | 16 ----------
 .../scheduler/podsready/scheduler_test.go     | 25 ++++++++++++++-
 test/integration/webhook/workload_test.go     | 31 -------------------
 7 files changed, 31 insertions(+), 57 deletions(-)

diff --git a/charts/kueue/templates/webhook/webhook.yaml b/charts/kueue/templates/webhook/webhook.yaml
index 1de8e46241..413ebb38e4 100644
--- a/charts/kueue/templates/webhook/webhook.yaml
+++ b/charts/kueue/templates/webhook/webhook.yaml
@@ -287,10 +287,8 @@ webhooks:
           - v1beta1
         operations:
           - CREATE
-          - UPDATE
         resources:
           - workloads
-          - workloads/status
     sideEffects: None
 ---
 apiVersion: admissionregistration.k8s.io/v1
diff --git a/config/components/webhook/manifests.yaml b/config/components/webhook/manifests.yaml
index 46378039a5..c7bed96edc 100644
--- a/config/components/webhook/manifests.yaml
+++ b/config/components/webhook/manifests.yaml
@@ -267,10 +267,8 @@ webhooks:
     - v1beta1
     operations:
     - CREATE
-    - UPDATE
     resources:
     - workloads
-    - workloads/status
   sideEffects: None
 ---
 apiVersion: admissionregistration.k8s.io/v1
diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 9c8aa9666f..6452ca8b5a 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -149,6 +149,12 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 	ctx = ctrl.LoggerInto(ctx, log)
 	log.V(2).Info("Reconciling Workload")
 
+	// If a deactivated workload is re-activated, we need to reset the RequeueState.
+	if wl.Status.RequeueState != nil && ptr.Deref(wl.Spec.Active, true) && workload.IsEvictedByDeactivation(&wl) {
+		wl.Status.RequeueState = nil
+		return ctrl.Result{}, workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
+	}
+
 	if len(wl.ObjectMeta.OwnerReferences) == 0 && !wl.DeletionTimestamp.IsZero() {
 		return ctrl.Result{}, workload.RemoveFinalizer(ctx, r.client, &wl)
 	}
diff --git a/pkg/webhooks/workload_webhook.go b/pkg/webhooks/workload_webhook.go
index d9f2edc3ec..86ee9a0e84 100644
--- a/pkg/webhooks/workload_webhook.go
+++ b/pkg/webhooks/workload_webhook.go
@@ -50,7 +50,7 @@ func setupWebhookForWorkload(mgr ctrl.Manager) error {
 		Complete()
 }
 
-// +kubebuilder:webhook:path=/mutate-kueue-x-k8s-io-v1beta1-workload,mutating=true,failurePolicy=fail,sideEffects=None,groups=kueue.x-k8s.io,resources=workloads;workloads/status,verbs=create;update,versions=v1beta1,name=mworkload.kb.io,admissionReviewVersions=v1
+// +kubebuilder:webhook:path=/mutate-kueue-x-k8s-io-v1beta1-workload,mutating=true,failurePolicy=fail,sideEffects=None,groups=kueue.x-k8s.io,resources=workloads,verbs=create,versions=v1beta1,name=mworkload.kb.io,admissionReviewVersions=v1
 
 var _ webhook.CustomDefaulter = &WorkloadWebhook{}
 
@@ -76,10 +76,6 @@ func (w *WorkloadWebhook) Default(ctx context.Context, obj runtime.Object) error
 		}
 	}
 
-	// If a deactivated workload is re-activated, we need to reset the RequeueState.
-	if ptr.Deref(wl.Spec.Active, true) && workload.IsEvictedByDeactivation(wl) && workload.HasRequeueState(wl) {
-		wl.Status.RequeueState = nil
-	}
 	return nil
 }
 
diff --git a/pkg/webhooks/workload_webhook_test.go b/pkg/webhooks/workload_webhook_test.go
index 78051be403..8414a22286 100644
--- a/pkg/webhooks/workload_webhook_test.go
+++ b/pkg/webhooks/workload_webhook_test.go
@@ -78,22 +78,6 @@ func TestWorkloadWebhookDefault(t *testing.T) {
 				},
 			},
 		},
-		"re-activated workload with re-queue state is reset the re-queue state": {
-			wl: *testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).
-				Condition(metav1.Condition{
-					Type:   kueue.WorkloadEvicted,
-					Status: metav1.ConditionTrue,
-					Reason: kueue.WorkloadEvictedByDeactivation,
-				}).RequeueState(ptr.To[int32](5), ptr.To(metav1.Now())).
-				Obj(),
-			wantWl: *testingutil.MakeWorkload(testWorkloadName, testWorkloadNamespace).
-				Condition(metav1.Condition{
-					Type:   kueue.WorkloadEvicted,
-					Status: metav1.ConditionTrue,
-					Reason: kueue.WorkloadEvictedByDeactivation,
-				}).
-				Obj(),
-		},
 	}
 	for name, tc := range cases {
 		t.Run(name, func(t *testing.T) {
diff --git a/test/integration/scheduler/podsready/scheduler_test.go b/test/integration/scheduler/podsready/scheduler_test.go
index e76ac9b6b9..523890354d 100644
--- a/test/integration/scheduler/podsready/scheduler_test.go
+++ b/test/integration/scheduler/podsready/scheduler_test.go
@@ -248,7 +248,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
 			// To avoid flakiness, we don't verify if the workload has a QuotaReserved=false with pending reason here.
 		})
 
-		ginkgo.It("Should re-admit a timed out workload and deactivate a workload exceeded the re-queue count limit", func() {
+		ginkgo.It("Should re-admit a timed out workload and deactivate a workload exceeded the re-queue count limit. After that re-activating a workload", func() {
 			ginkgo.By("create the 'prod' workload")
 			prodWl := testing.MakeWorkload("prod", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Obj()
 			gomega.Expect(k8sClient.Create(ctx, prodWl)).Should(gomega.Succeed())
@@ -276,6 +276,29 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
 				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(prodWl), prodWl))
 				g.Expect(ptr.Deref(prodWl.Spec.Active, true)).Should(gomega.BeFalse())
 			}, util.Timeout, util.Interval).Should(gomega.Succeed())
+
+			ginkgo.By("verify the re-activated inactive 'prod' workload re-queue state is reset")
+			// TODO: Once we move a logic to issue the Eviction with InactiveWorkload reason, we need to remove the below updates.
+			// REF: https://github.com/kubernetes-sigs/kueue/issues/1841
+			gomega.Eventually(func(g gomega.Gomega) {
+				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(prodWl), prodWl)).Should(gomega.Succeed())
+				apimeta.SetStatusCondition(&prodWl.Status.Conditions, metav1.Condition{
+					Type:    kueue.WorkloadEvicted,
+					Status:  metav1.ConditionTrue,
+					Reason:  kueue.WorkloadEvictedByDeactivation,
+					Message: "evicted by Test",
+				})
+				g.Expect(k8sClient.Status().Update(ctx, prodWl)).Should(gomega.Succeed())
+			}, util.Timeout, util.Interval).Should(gomega.Succeed(), "Job reconciler should add an Evicted condition with InactiveWorkload to the Workload")
+			gomega.Eventually(func(g gomega.Gomega) {
+				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(prodWl), prodWl)).Should(gomega.Succeed())
+				prodWl.Spec.Active = ptr.To(true)
+				g.Expect(k8sClient.Update(ctx, prodWl)).Should(gomega.Succeed())
+			}, util.Timeout, util.Interval).Should(gomega.Succeed(), "Reactivate inactive Workload")
+			gomega.Eventually(func(g gomega.Gomega) {
+				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(prodWl), prodWl)).Should(gomega.Succeed())
+				g.Expect(prodWl.Status.RequeueState).Should(gomega.BeNil())
+			}, util.Timeout, util.Interval).Should(gomega.Succeed())
 		})
 
 		ginkgo.It("Should unblock admission of new workloads in other ClusterQueues once the admitted workload exceeds timeout", func() {
diff --git a/test/integration/webhook/workload_test.go b/test/integration/webhook/workload_test.go
index 8b97543b0f..bf49b6a365 100644
--- a/test/integration/webhook/workload_test.go
+++ b/test/integration/webhook/workload_test.go
@@ -82,37 +82,6 @@ var _ = ginkgo.Describe("Workload defaulting webhook", func() {
 
 			gomega.Expect(created.Spec.PodSets[0].Name).Should(gomega.Equal(kueue.DefaultPodSetName))
 		})
-		ginkgo.It("Should reset re-queue state", func() {
-			ginkgo.By("Creating a new inactive Workload")
-			workload := testing.MakeWorkload(workloadName, ns.Name).
-				Active(false).
-				Obj()
-			gomega.Expect(k8sClient.Create(ctx, workload)).Should(gomega.Succeed())
-			gomega.Eventually(func(g gomega.Gomega) {
-				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), workload))
-				workload.Status = kueue.WorkloadStatus{
-					Conditions: []metav1.Condition{{
-						Type:               kueue.WorkloadEvicted,
-						Reason:             kueue.WorkloadEvictedByDeactivation,
-						Status:             metav1.ConditionTrue,
-						LastTransitionTime: metav1.Now(),
-					}},
-					RequeueState: &kueue.RequeueState{
-						Count:     ptr.To[int32](10),
-						RequeueAt: ptr.To(metav1.Now()),
-					},
-				}
-				g.Expect(k8sClient.Status().Update(ctx, workload)).Should(gomega.Succeed())
-			}, util.Timeout, util.Interval).Should(gomega.Succeed())
-			ginkgo.By("Activate a Workload")
-			gomega.Eventually(func(g gomega.Gomega) {
-				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), workload))
-				workload.Spec.Active = ptr.To(true)
-				g.Expect(k8sClient.Update(ctx, workload)).Should(gomega.Succeed())
-				g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(workload), workload))
-				g.Expect(workload.Status.RequeueState).Should(gomega.BeNil(), "re-queue state should be reset")
-			}, util.Timeout, util.Interval).Should(gomega.Succeed())
-		})
 	})
 })
 

From 2e60870d5efd66de894618e9a9b1c45d5ac14a2f Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Thu, 14 Mar 2024 11:16:59 -0700
Subject: [PATCH 17/48] Do not make API calls when the workload already has
 condition Admitted=false (#1845)

Change-Id: Ib10c1874bfb3f5810f83d751e4d80ca6eb031bfa
---
 pkg/controller/core/workload_controller.go | 36 +++++++++++-----------
 pkg/controller/jobframework/reconciler.go  |  3 +-
 pkg/scheduler/scheduler.go                 |  9 +++---
 pkg/scheduler/scheduler_test.go            | 28 ++++++++++++-----
 pkg/workload/admissionchecks.go            |  3 +-
 pkg/workload/workload.go                   | 17 +++++++---
 6 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 6452ca8b5a..75fb07589f 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -217,25 +217,25 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		return ctrl.Result{}, err
 	}
 
-	if !r.queues.QueueForWorkloadExists(&wl) {
+	switch {
+	case !r.queues.QueueForWorkloadExists(&wl):
 		log.V(3).Info("Workload is inadmissible because of missing LocalQueue", "localQueue", klog.KRef(wl.Namespace, wl.Spec.QueueName))
-		workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("LocalQueue %s doesn't exist", wl.Spec.QueueName))
-		err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
-		return ctrl.Result{}, client.IgnoreNotFound(err)
-	}
-
-	if !cqOk {
+		if workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("LocalQueue %s doesn't exist", wl.Spec.QueueName)) {
+			err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
+			return ctrl.Result{}, client.IgnoreNotFound(err)
+		}
+	case !cqOk:
 		log.V(3).Info("Workload is inadmissible because of missing ClusterQueue", "clusterQueue", klog.KRef("", cqName))
-		workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s doesn't exist", cqName))
-		err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
-		return ctrl.Result{}, client.IgnoreNotFound(err)
-	}
-
-	if !r.cache.ClusterQueueActive(cqName) {
+		if workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s doesn't exist", cqName)) {
+			err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
+			return ctrl.Result{}, client.IgnoreNotFound(err)
+		}
+	case !r.cache.ClusterQueueActive(cqName):
 		log.V(3).Info("Workload is inadmissible because ClusterQueue is inactive", "clusterQueue", klog.KRef("", cqName))
-		workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is inactive", cqName))
-		err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
-		return ctrl.Result{}, client.IgnoreNotFound(err)
+		if workload.UnsetQuotaReservationWithCondition(&wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is inactive", cqName)) {
+			err := workload.ApplyAdmissionStatus(ctx, r.client, &wl, true)
+			return ctrl.Result{}, client.IgnoreNotFound(err)
+		}
 	}
 
 	return ctrl.Result{}, nil
@@ -293,13 +293,13 @@ func (r *WorkloadReconciler) reconcileOnClusterQueueActiveState(ctx context.Cont
 
 	if err != nil || !queue.DeletionTimestamp.IsZero() {
 		log.V(3).Info("Workload is inadmissible because the ClusterQueue is terminating or missing", "clusterQueue", klog.KRef("", cqName))
-		workload.UnsetQuotaReservationWithCondition(wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is terminating or missing", cqName))
+		_ = workload.UnsetQuotaReservationWithCondition(wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is terminating or missing", cqName))
 		return true, workload.ApplyAdmissionStatus(ctx, r.client, wl, true)
 	}
 
 	if queueStopPolicy != kueue.None {
 		log.V(3).Info("Workload is inadmissible because the ClusterQueue is stopped", "clusterQueue", klog.KRef("", cqName))
-		workload.UnsetQuotaReservationWithCondition(wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is stopped", cqName))
+		_ = workload.UnsetQuotaReservationWithCondition(wl, "Inadmissible", fmt.Sprintf("ClusterQueue %s is stopped", cqName))
 		return true, workload.ApplyAdmissionStatus(ctx, r.client, wl, true)
 	}
 
diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index da20d1521a..1a40c6555d 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -371,8 +371,7 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
 		if workload.HasQuotaReservation(wl) {
 			if !job.IsActive() {
 				log.V(6).Info("The job is no longer active, clear the workloads admission")
-				workload.UnsetQuotaReservationWithCondition(wl, "Pending", evCond.Message)
-				_ = workload.SyncAdmittedCondition(wl)
+				_ = workload.UnsetQuotaReservationWithCondition(wl, "Pending", evCond.Message)
 				err := workload.ApplyAdmissionStatus(ctx, r.client, wl, true)
 				if err != nil {
 					return ctrl.Result{}, fmt.Errorf("clearing admission: %w", err)
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index 0433946328..b2e454aa9a 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -585,10 +585,11 @@ func (s *Scheduler) requeueAndUpdate(log logr.Logger, ctx context.Context, e ent
 	log.V(2).Info("Workload re-queued", "workload", klog.KObj(e.Obj), "clusterQueue", klog.KRef("", e.ClusterQueue), "queue", klog.KRef(e.Obj.Namespace, e.Obj.Spec.QueueName), "requeueReason", e.requeueReason, "added", added)
 
 	if e.status == notNominated || e.status == skipped {
-		workload.UnsetQuotaReservationWithCondition(e.Obj, "Pending", e.inadmissibleMsg)
-		err := workload.ApplyAdmissionStatus(ctx, s.client, e.Obj, true)
-		if err != nil {
-			log.Error(err, "Could not update Workload status")
+		if workload.UnsetQuotaReservationWithCondition(e.Obj, "Pending", e.inadmissibleMsg) {
+			err := workload.ApplyAdmissionStatus(ctx, s.client, e.Obj, true)
+			if err != nil {
+				log.Error(err, "Could not update Workload status")
+			}
 		}
 		s.recorder.Eventf(e.Obj, corev1.EventTypeNormal, "Pending", api.TruncateEventMessage(e.inadmissibleMsg))
 	}
diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go
index fc30490347..641d7863c3 100644
--- a/pkg/scheduler/scheduler_test.go
+++ b/pkg/scheduler/scheduler_test.go
@@ -36,6 +36,7 @@ import (
 	"k8s.io/client-go/tools/record"
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
 
 	config "sigs.k8s.io/kueue/apis/config/v1beta1"
 	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
@@ -2004,11 +2005,12 @@ func TestRequeueAndUpdate(t *testing.T) {
 	w1 := utiltesting.MakeWorkload("w1", "ns1").Queue(q1.Name).Obj()
 
 	cases := []struct {
-		name             string
-		e                entry
-		wantWorkloads    map[string][]string
-		wantInadmissible map[string][]string
-		wantStatus       kueue.WorkloadStatus
+		name              string
+		e                 entry
+		wantWorkloads     map[string][]string
+		wantInadmissible  map[string][]string
+		wantStatus        kueue.WorkloadStatus
+		wantStatusUpdates int
 	}{
 		{
 			name: "workload didn't fit",
@@ -2028,6 +2030,7 @@ func TestRequeueAndUpdate(t *testing.T) {
 			wantInadmissible: map[string][]string{
 				"cq": {workload.Key(w1)},
 			},
+			wantStatusUpdates: 1,
 		},
 		{
 			name: "assumed",
@@ -2068,6 +2071,7 @@ func TestRequeueAndUpdate(t *testing.T) {
 			wantWorkloads: map[string][]string{
 				"cq": {workload.Key(w1)},
 			},
+			wantStatusUpdates: 1,
 		},
 	}
 
@@ -2076,9 +2080,14 @@ func TestRequeueAndUpdate(t *testing.T) {
 			ctx, log := utiltesting.ContextWithLog(t)
 			scheme := runtime.NewScheme()
 
+			updates := 0
 			objs := []client.Object{w1, q1, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ns1"}}}
-			clientBuilder := utiltesting.NewClientBuilder().WithObjects(objs...).WithStatusSubresource(objs...)
-			cl := clientBuilder.Build()
+			cl := utiltesting.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{
+				SubResourcePatch: func(ctx context.Context, client client.Client, subResourceName string, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error {
+					updates++
+					return client.SubResource(subResourceName).Patch(ctx, obj, patch, opts...)
+				},
+			}).WithObjects(objs...).WithStatusSubresource(objs...).Build()
 			broadcaster := record.NewBroadcaster()
 			recorder := broadcaster.NewRecorder(scheme, corev1.EventSource{Component: constants.AdmissionName})
 			cqCache := cache.New(cl)
@@ -2121,6 +2130,11 @@ func TestRequeueAndUpdate(t *testing.T) {
 			if diff := cmp.Diff(tc.wantStatus, updatedWl.Status, ignoreConditionTimestamps); diff != "" {
 				t.Errorf("Unexpected status after updating (-want,+got):\n%s", diff)
 			}
+			// Make sure a second call doesn't make unnecessary updates.
+			scheduler.requeueAndUpdate(log, ctx, tc.e)
+			if updates != tc.wantStatusUpdates {
+				t.Errorf("Observed %d status updates, want %d", updates, tc.wantStatusUpdates)
+			}
 		})
 	}
 }
diff --git a/pkg/workload/admissionchecks.go b/pkg/workload/admissionchecks.go
index b1d3ebcb3d..e831144681 100644
--- a/pkg/workload/admissionchecks.go
+++ b/pkg/workload/admissionchecks.go
@@ -58,8 +58,7 @@ func SyncAdmittedCondition(w *kueue.Workload) bool {
 		newCondition.Message = "The workload has not all checks ready"
 	}
 
-	apimeta.SetStatusCondition(&w.Status.Conditions, newCondition)
-	return true
+	return apimeta.SetStatusCondition(&w.Status.Conditions, newCondition)
 }
 
 // FindAdmissionCheck - returns a pointer to the check identified by checkName if found in checks.
diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go
index 67c7fc9f37..e002b0226a 100644
--- a/pkg/workload/workload.go
+++ b/pkg/workload/workload.go
@@ -301,7 +301,10 @@ func UpdateStatus(ctx context.Context,
 	return c.Status().Patch(ctx, newWl, client.Apply, client.FieldOwner(managerPrefix+"-"+condition.Type))
 }
 
-func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message string) {
+// UnsetQuotaReservationWithCondition sets the QuotaReserved condition to false and clears
+// the admission.
+// Returns whether any change was done.
+func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message string) bool {
 	condition := metav1.Condition{
 		Type:               kueue.WorkloadQuotaReserved,
 		Status:             metav1.ConditionFalse,
@@ -309,11 +312,17 @@ func UnsetQuotaReservationWithCondition(wl *kueue.Workload, reason, message stri
 		Reason:             reason,
 		Message:            api.TruncateConditionMessage(message),
 	}
-	apimeta.SetStatusCondition(&wl.Status.Conditions, condition)
-	wl.Status.Admission = nil
+	changed := apimeta.SetStatusCondition(&wl.Status.Conditions, condition)
+	if wl.Status.Admission != nil {
+		wl.Status.Admission = nil
+		changed = true
+	}
 
 	// Reset the admitted condition if necessary.
-	_ = SyncAdmittedCondition(wl)
+	if SyncAdmittedCondition(wl) {
+		changed = true
+	}
+	return changed
 }
 
 // BaseSSAWorkload creates a new object based on the input workload that

From eb01ce90e02dbbcc46df0d1f4246aae0c9f825a1 Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Thu, 14 Mar 2024 12:34:54 -0700
Subject: [PATCH 18/48] Prepare release v0.6.1 (#1847)

Change-Id: I7cc659b2b013cd4a0b907b0e3002b877dac2e4ae
---
 CHANGELOG/CHANGELOG-0.6.md | 22 ++++++++++++++++++++++
 Makefile                   |  2 +-
 README.md                  |  2 +-
 charts/kueue/Chart.yaml    |  2 +-
 site/config.toml           |  2 +-
 5 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG/CHANGELOG-0.6.md b/CHANGELOG/CHANGELOG-0.6.md
index b39a971b1a..3755d13687 100644
--- a/CHANGELOG/CHANGELOG-0.6.md
+++ b/CHANGELOG/CHANGELOG-0.6.md
@@ -1,3 +1,25 @@
+## v0.6.1
+
+Changes Since `v0.6.0`:
+
+### Feature
+
+- Added MultiKueue worker connection monitoring and reconnect. (#1809, @trasc)
+- The Failed pods in a pod-group are finalized once a replacement pods are created. (#1801, @trasc)
+
+### Bug or Regression
+
+- Exclude Pod labels, preemptionPolicy and container images when determining whether pods in a pod group have the same shape. (#1760, @alculquicondor)
+- Fix incorrect quota management when lendingLimit enabled in preemption (#1826, @kerthcet, @B1F030)
+- Fix the configuration for the number of reconcilers for the Pod integration. It was only reconciling one group at a time. (#1837, @alculquicondor)
+- Kueue visibility API is no longer installed by default. Users can install it via helm or applying the visibility-api.yaml artifact. (#1764, @trasc)
+- WaitForPodsReady: Fix a bug that the requeueState isn't reset. (#1843, @tenzen-y)
+
+### Other (Cleanup or Flake)
+
+- Avoid API calls for admission attempts when Workload already has condition Admitted=false (#1845, @alculquicondor)
+- Skip requeueing of Workloads when there is a status update for a ClusterQueue, saving on API calls for Workloads that were already attempted for admission. (#1832, @alculquicondor)
+
 ## v0.6.0
 
 Changes since `v0.5.0`:
diff --git a/Makefile b/Makefile
index f6732d900c..cfa51ee78c 100644
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@ LD_FLAGS += -X '$(version_pkg).GitCommit=$(shell git rev-parse HEAD)'
 
 # Update these variables when preparing a new release or a release branch.
 # Then run `make prepare-release-branch`
-RELEASE_VERSION=v0.6.0
+RELEASE_VERSION=v0.6.1
 RELEASE_BRANCH=release-0.6
 
 .PHONY: all
diff --git a/README.md b/README.md
index 5ce449faf5..dbe6ae2820 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Read the [overview](https://kueue.sigs.k8s.io/docs/overview/) to learn more.
 To install the latest release of Kueue in your cluster, run the following command:
 
 ```shell
-kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.0/manifests.yaml
+kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.1/manifests.yaml
 ```
 
 The controller runs in the `kueue-system` namespace.
diff --git a/charts/kueue/Chart.yaml b/charts/kueue/Chart.yaml
index 3c9ed1ba6d..ab98fe240f 100644
--- a/charts/kueue/Chart.yaml
+++ b/charts/kueue/Chart.yaml
@@ -18,4 +18,4 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "v0.6.0"
+appVersion: "v0.6.1"
diff --git a/site/config.toml b/site/config.toml
index a90adbddd7..7d8733915d 100644
--- a/site/config.toml
+++ b/site/config.toml
@@ -92,7 +92,7 @@ ignoreFiles = []
   # The major.minor version tag for the version of the docs represented in this
   # branch of the repository. Used in the "version-banner" partial to display a
   # version number for this doc set.
-  version = "v0.6.0"
+  version = "v0.6.1"
 
   # Flag used in the "version-banner" partial to decide whether to display a
   # banner on every page indicating that this is an archived version of the docs.

From 48a19b460e21c7073db0deb234e4f712cf9df609 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Fri, 15 Mar 2024 15:44:29 -0700
Subject: [PATCH 19/48] updated ExpectPendingWorkloadsMetric inadmissible count
 for no fit cluster (#1854)

Co-authored-by: Vicente Ferrara <vicenteferrara@google.com>
---
 .../controller/jobs/raycluster/raycluster_controller_test.go    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/controller/jobs/raycluster/raycluster_controller_test.go b/test/integration/controller/jobs/raycluster/raycluster_controller_test.go
index ffc19e4d9d..ef3e063564 100644
--- a/test/integration/controller/jobs/raycluster/raycluster_controller_test.go
+++ b/test/integration/controller/jobs/raycluster/raycluster_controller_test.go
@@ -581,7 +581,7 @@ var _ = ginkgo.Describe("RayCluster Job controller interacting with scheduler",
 				Should(gomega.Succeed())
 			return *createdJob2.Spec.Suspend
 		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
-		util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 0)
+		util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 1)
 		util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 1)
 
 		ginkgo.By("deleting the job", func() {

From 0c92bbffd9229db9a09d1baf7e315cb3ce831f3d Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 19 Mar 2024 11:20:15 -0700
Subject: [PATCH 20/48] Fix preemption blocked by earlier pending Workload
 (#1868)

Change-Id: I69584dd95c57539e163067a4fb93cdb32fc57461

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 pkg/scheduler/scheduler.go                    | 20 +++--
 pkg/scheduler/scheduler_test.go               | 54 +++++++++++++
 test/integration/scheduler/preemption_test.go | 80 ++++++++++++++++++-
 3 files changed, 148 insertions(+), 6 deletions(-)

diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index b2e454aa9a..752ddbca1a 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -202,19 +202,23 @@ func (s *Scheduler) schedule(ctx context.Context) {
 	// head got admitted that should be scheduled in the cohort before the heads
 	// of other clusterQueues.
 	cycleCohortsUsage := cohortsUsage{}
+	cycleCohortsSkipPreemption := sets.New[string]()
 	for i := range entries {
 		e := &entries[i]
-		if e.assignment.RepresentativeMode() == flavorassigner.NoFit {
+		mode := e.assignment.RepresentativeMode()
+		if mode == flavorassigner.NoFit {
 			continue
 		}
 
 		cq := snapshot.ClusterQueues[e.ClusterQueue]
 		if cq.Cohort != nil {
 			sum := cycleCohortsUsage.totalUsageForCommonFlavorResources(cq.Cohort.Name, e.assignment.Usage)
-			// If the workload uses resources that were potentially assumed in this cycle and will no longer fit in the
-			// cohort. If a resource of a flavor is used only once or for the first time in the cycle the checks done by
-			// the flavorassigner are still valid.
-			if cycleCohortsUsage.hasCommonFlavorResources(cq.Cohort.Name, e.assignment.Usage) && !cq.FitInCohort(sum) {
+			// Check whether there was an assignment in this cycle that could render the next assignments invalid:
+			// - If the workload no longer fits in the cohort.
+			// - If there was another assignment in the cohort, then the preemption calculation is no longer valid.
+			if cycleCohortsUsage.hasCommonFlavorResources(cq.Cohort.Name, e.assignment.Usage) &&
+				((mode == flavorassigner.Fit && !cq.FitInCohort(sum)) ||
+					(mode == flavorassigner.Preempt && cycleCohortsSkipPreemption.Has(cq.Cohort.Name))) {
 				e.status = skipped
 				e.inadmissibleMsg = "other workloads in the cohort were prioritized"
 				// When the workload needs borrowing and there is another workload in cohort doesn't
@@ -241,6 +245,9 @@ func (s *Scheduler) schedule(ctx context.Context) {
 					e.inadmissibleMsg += fmt.Sprintf(". Pending the preemption of %d workload(s)", preempted)
 					e.requeueReason = queue.RequeueReasonPendingPreemption
 				}
+				if cq.Cohort != nil {
+					cycleCohortsSkipPreemption.Insert(cq.Cohort.Name)
+				}
 			} else {
 				log.V(2).Info("Workload requires preemption, but there are no candidate workloads allowed for preemption", "preemption", cq.Preemption)
 			}
@@ -262,6 +269,9 @@ func (s *Scheduler) schedule(ctx context.Context) {
 		if err := s.admit(ctx, e, cq.AdmissionChecks); err != nil {
 			e.inadmissibleMsg = fmt.Sprintf("Failed to admit workload: %v", err)
 		}
+		if cq.Cohort != nil {
+			cycleCohortsSkipPreemption.Insert(cq.Cohort.Name)
+		}
 	}
 
 	// 6. Requeue the heads that were not scheduled.
diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go
index 641d7863c3..f969bbf894 100644
--- a/pkg/scheduler/scheduler_test.go
+++ b/pkg/scheduler/scheduler_test.go
@@ -716,6 +716,60 @@ func TestSchedule(t *testing.T) {
 				"eng-alpha/borrower": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "60").Obj(),
 			},
 		},
+		"multiple CQs need preemption": {
+			additionalClusterQueues: []kueue.ClusterQueue{
+				*utiltesting.MakeClusterQueue("other-alpha").
+					Cohort("other").
+					ResourceGroup(
+						*utiltesting.MakeFlavorQuotas("on-demand").
+							Resource(corev1.ResourceCPU, "50", "50").Obj(),
+					).
+					Obj(),
+				*utiltesting.MakeClusterQueue("other-beta").
+					Cohort("other").
+					Preemption(kueue.ClusterQueuePreemption{
+						ReclaimWithinCohort: kueue.PreemptionPolicyAny,
+						WithinClusterQueue:  kueue.PreemptionPolicyLowerPriority,
+					}).
+					ResourceGroup(
+						*utiltesting.MakeFlavorQuotas("on-demand").
+							Resource(corev1.ResourceCPU, "50", "10").Obj(),
+					).
+					Obj(),
+			},
+			additionalLocalQueues: []kueue.LocalQueue{
+				*utiltesting.MakeLocalQueue("other", "eng-alpha").ClusterQueue("other-alpha").Obj(),
+				*utiltesting.MakeLocalQueue("other", "eng-beta").ClusterQueue("other-beta").Obj(),
+			},
+			workloads: []kueue.Workload{
+				*utiltesting.MakeWorkload("preemptor", "eng-beta").
+					Priority(-1).
+					Queue("other").
+					Request(corev1.ResourceCPU, "1").
+					Obj(),
+				*utiltesting.MakeWorkload("pending", "eng-alpha").
+					Priority(1).
+					Queue("other").
+					Request(corev1.ResourceCPU, "1").
+					Obj(),
+				*utiltesting.MakeWorkload("use-all", "eng-alpha").
+					Request(corev1.ResourceCPU, "100").
+					ReserveQuota(utiltesting.MakeAdmission("other-alpha").Assignment(corev1.ResourceCPU, "on-demand", "100").Obj()).
+					Obj(),
+			},
+			wantLeft: map[string][]string{
+				// Preemptor is not admitted in this cycle.
+				"other-beta": {"eng-beta/preemptor"},
+			},
+			wantInadmissibleLeft: map[string][]string{
+				"other-alpha": {"eng-alpha/pending"},
+			},
+			wantPreempted: sets.New("eng-alpha/use-all"),
+			wantAssignments: map[string]kueue.Admission{
+				// Removal from cache for the preempted workloads is deferred until we receive Workload updates
+				"eng-alpha/use-all": *utiltesting.MakeAdmission("other-alpha").Assignment(corev1.ResourceCPU, "on-demand", "100").Obj(),
+			},
+		},
 		"cannot borrow resource not listed in clusterQueue": {
 			workloads: []kueue.Workload{
 				*utiltesting.MakeWorkload("new", "eng-alpha").
diff --git a/test/integration/scheduler/preemption_test.go b/test/integration/scheduler/preemption_test.go
index e1e1f89d65..5caf8b9245 100644
--- a/test/integration/scheduler/preemption_test.go
+++ b/test/integration/scheduler/preemption_test.go
@@ -358,6 +358,84 @@ var _ = ginkgo.Describe("Preemption", func() {
 		})
 	})
 
+	ginkgo.Context("In a cohort with StrictFIFO", func() {
+		var (
+			alphaCQ, betaCQ *kueue.ClusterQueue
+			alphaLQ, betaLQ *kueue.LocalQueue
+			oneFlavor       *kueue.ResourceFlavor
+		)
+
+		ginkgo.BeforeEach(func() {
+			oneFlavor = testing.MakeResourceFlavor("one").Obj()
+			gomega.Expect(k8sClient.Create(ctx, oneFlavor)).To(gomega.Succeed())
+
+			alphaCQ = testing.MakeClusterQueue("alpha-cq").
+				Cohort("all").
+				QueueingStrategy(kueue.StrictFIFO).
+				ResourceGroup(*testing.MakeFlavorQuotas("one").Resource(corev1.ResourceCPU, "2").Obj()).
+				Preemption(kueue.ClusterQueuePreemption{
+					WithinClusterQueue:  kueue.PreemptionPolicyLowerPriority,
+					ReclaimWithinCohort: kueue.PreemptionPolicyAny,
+				}).
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, alphaCQ)).To(gomega.Succeed())
+			alphaLQ = testing.MakeLocalQueue("alpha-lq", ns.Name).ClusterQueue("alpha-cq").Obj()
+			gomega.Expect(k8sClient.Create(ctx, alphaLQ)).To(gomega.Succeed())
+			betaCQ = testing.MakeClusterQueue("beta-cq").
+				Cohort("all").
+				QueueingStrategy(kueue.StrictFIFO).
+				ResourceGroup(*testing.MakeFlavorQuotas("one").Resource(corev1.ResourceCPU, "2").Obj()).
+				Preemption(kueue.ClusterQueuePreemption{
+					WithinClusterQueue:  kueue.PreemptionPolicyLowerPriority,
+					ReclaimWithinCohort: kueue.PreemptionPolicyAny,
+				}).
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, betaCQ)).To(gomega.Succeed())
+			betaLQ = testing.MakeLocalQueue("beta-lq", ns.Name).ClusterQueue("beta-cq").Obj()
+			gomega.Expect(k8sClient.Create(ctx, betaLQ)).To(gomega.Succeed())
+		})
+
+		ginkgo.AfterEach(func() {
+			gomega.Expect(util.DeleteWorkloadsInNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
+			gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, alphaLQ)).To(gomega.Succeed())
+			gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, alphaCQ)).To(gomega.Succeed())
+			gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, betaLQ)).To(gomega.Succeed())
+			gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, betaCQ)).To(gomega.Succeed())
+		})
+
+		ginkgo.It("Should reclaim from cohort even if another CQ has pending workloads", func() {
+			useAllAlphaWl := testing.MakeWorkload("use-all", ns.Name).
+				Queue("alpha-lq").
+				Priority(1).
+				Request(corev1.ResourceCPU, "4").
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, useAllAlphaWl)).To(gomega.Succeed())
+			util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, useAllAlphaWl)
+
+			pendingAlphaWl := testing.MakeWorkload("pending", ns.Name).
+				Queue("alpha-lq").
+				Priority(0).
+				Request(corev1.ResourceCPU, "1").
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, pendingAlphaWl)).To(gomega.Succeed())
+			util.ExpectWorkloadsToBePending(ctx, k8sClient, pendingAlphaWl)
+
+			ginkgo.By("Creating a workload to reclaim quota")
+
+			preemptorBetaWl := testing.MakeWorkload("preemptor", ns.Name).
+				Queue("beta-lq").
+				Priority(-1).
+				Request(corev1.ResourceCPU, "1").
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, preemptorBetaWl)).To(gomega.Succeed())
+			util.ExpectWorkloadsToBePreempted(ctx, k8sClient, useAllAlphaWl)
+			util.FinishEvictionForWorkloads(ctx, k8sClient, useAllAlphaWl)
+			util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, preemptorBetaWl)
+			util.ExpectWorkloadsToBePending(ctx, k8sClient, useAllAlphaWl, pendingAlphaWl)
+		})
+
+	})
+
 	ginkgo.Context("When most quota is in a shared ClusterQueue in a cohort", func() {
 		var (
 			aStandardCQ, aBestEffortCQ, bStandardCQ, bBestEffortCQ, sharedCQ *kueue.ClusterQueue
@@ -550,7 +628,7 @@ var _ = ginkgo.Describe("Preemption", func() {
 		})
 	})
 
-	ginkgo.Context("Should be able to preempt when lending limit enabled", func() {
+	ginkgo.Context("When lending limit enabled", func() {
 		var (
 			prodCQ *kueue.ClusterQueue
 			devCQ  *kueue.ClusterQueue

From 4cebd3e6867c4ecd2425b9b77bad2b9e71bfb8b4 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Thu, 21 Mar 2024 14:51:52 -0700
Subject: [PATCH 21/48] Avoid unnecessary preemptions when candidates have the
 same timestamp (#1880)

Change-Id: I68155a467880059c65deb75b00939b14551924cd

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 pkg/scheduler/preemption/preemption.go        | 14 ++++-
 pkg/scheduler/preemption/preemption_test.go   | 18 +++++--
 pkg/util/testing/wrappers.go                  | 12 ++++-
 test/integration/scheduler/preemption_test.go | 52 ++++++++++++++++++-
 test/util/util.go                             | 26 ++++++++--
 5 files changed, 110 insertions(+), 12 deletions(-)

diff --git a/pkg/scheduler/preemption/preemption.go b/pkg/scheduler/preemption/preemption.go
index 71dbdccfb1..36cb2555d4 100644
--- a/pkg/scheduler/preemption/preemption.go
+++ b/pkg/scheduler/preemption/preemption.go
@@ -389,6 +389,7 @@ func workloadFits(wlReq cache.FlavorResourceQuantities, cq *cache.ClusterQueue,
 }
 
 // candidatesOrdering criteria:
+// 0. Workloads already marked for preemption first.
 // 1. Workloads from other ClusterQueues in the cohort before the ones in the
 // same ClusterQueue as the preemptor.
 // 2. Workloads with lower priority first.
@@ -397,6 +398,11 @@ func candidatesOrdering(candidates []*workload.Info, cq string, now time.Time) f
 	return func(i, j int) bool {
 		a := candidates[i]
 		b := candidates[j]
+		aEvicted := meta.IsStatusConditionTrue(a.Obj.Status.Conditions, kueue.WorkloadEvicted)
+		bEvicted := meta.IsStatusConditionTrue(b.Obj.Status.Conditions, kueue.WorkloadEvicted)
+		if aEvicted != bEvicted {
+			return aEvicted
+		}
 		aInCQ := a.ClusterQueue == cq
 		bInCQ := b.ClusterQueue == cq
 		if aInCQ != bInCQ {
@@ -407,7 +413,13 @@ func candidatesOrdering(candidates []*workload.Info, cq string, now time.Time) f
 		if pa != pb {
 			return pa < pb
 		}
-		return quotaReservationTime(b.Obj, now).Before(quotaReservationTime(a.Obj, now))
+		timeA := quotaReservationTime(a.Obj, now)
+		timeB := quotaReservationTime(b.Obj, now)
+		if !timeA.Equal(timeB) {
+			return timeA.After(timeB)
+		}
+		// Arbitrary comparison for deterministic sorting.
+		return a.Obj.UID < b.Obj.UID
 	}
 }
 
diff --git a/pkg/scheduler/preemption/preemption_test.go b/pkg/scheduler/preemption/preemption_test.go
index 36f948ec79..bdd8ae3fd1 100644
--- a/pkg/scheduler/preemption/preemption_test.go
+++ b/pkg/scheduler/preemption/preemption_test.go
@@ -1127,15 +1127,25 @@ func TestCandidatesOrdering(t *testing.T) {
 			Obj()),
 		workload.NewInfo(utiltesting.MakeWorkload("low", "").
 			ReserveQuota(utiltesting.MakeAdmission("self").Obj()).
-			Priority(10).
 			Priority(-10).
 			Obj()),
 		workload.NewInfo(utiltesting.MakeWorkload("other", "").
 			ReserveQuota(utiltesting.MakeAdmission("other").Obj()).
 			Priority(10).
 			Obj()),
-		workload.NewInfo(utiltesting.MakeWorkload("old", "").
-			ReserveQuota(utiltesting.MakeAdmission("self").Obj()).
+		workload.NewInfo(utiltesting.MakeWorkload("evicted", "").
+			SetOrReplaceCondition(metav1.Condition{
+				Type:   kueue.WorkloadEvicted,
+				Status: metav1.ConditionTrue,
+			}).
+			Obj()),
+		workload.NewInfo(utiltesting.MakeWorkload("old-a", "").
+			UID("old-a").
+			ReserveQuotaAt(utiltesting.MakeAdmission("self").Obj(), now).
+			Obj()),
+		workload.NewInfo(utiltesting.MakeWorkload("old-b", "").
+			UID("old-b").
+			ReserveQuotaAt(utiltesting.MakeAdmission("self").Obj(), now).
 			Obj()),
 		workload.NewInfo(utiltesting.MakeWorkload("current", "").
 			ReserveQuota(utiltesting.MakeAdmission("self").Obj()).
@@ -1151,7 +1161,7 @@ func TestCandidatesOrdering(t *testing.T) {
 	for i, c := range candidates {
 		gotNames[i] = workload.Key(c.Obj)
 	}
-	wantCandidates := []string{"/other", "/low", "/current", "/old", "/high"}
+	wantCandidates := []string{"/evicted", "/other", "/low", "/current", "/old-a", "/old-b", "/high"}
 	if diff := cmp.Diff(wantCandidates, gotNames); diff != "" {
 		t.Errorf("Sorted with wrong order (-want,+got):\n%s", diff)
 	}
diff --git a/pkg/util/testing/wrappers.go b/pkg/util/testing/wrappers.go
index cd668927f2..1ac7d18398 100644
--- a/pkg/util/testing/wrappers.go
+++ b/pkg/util/testing/wrappers.go
@@ -82,6 +82,11 @@ func (w *WorkloadWrapper) Clone() *WorkloadWrapper {
 	return &WorkloadWrapper{Workload: *w.DeepCopy()}
 }
 
+func (w *WorkloadWrapper) UID(uid types.UID) *WorkloadWrapper {
+	w.Workload.UID = uid
+	return w
+}
+
 func (w *WorkloadWrapper) Finalizers(fin ...string) *WorkloadWrapper {
 	w.ObjectMeta.Finalizers = fin
 	return w
@@ -116,11 +121,16 @@ func (w *WorkloadWrapper) Active(a bool) *WorkloadWrapper {
 
 // ReserveQuota sets workload admission and adds a "QuotaReserved" status condition
 func (w *WorkloadWrapper) ReserveQuota(a *kueue.Admission) *WorkloadWrapper {
+	return w.ReserveQuotaAt(a, time.Now())
+}
+
+// ReserveQuotaAt sets workload admission and adds a "QuotaReserved" status condition
+func (w *WorkloadWrapper) ReserveQuotaAt(a *kueue.Admission, now time.Time) *WorkloadWrapper {
 	w.Status.Admission = a
 	w.Status.Conditions = []metav1.Condition{{
 		Type:               kueue.WorkloadQuotaReserved,
 		Status:             metav1.ConditionTrue,
-		LastTransitionTime: metav1.Now(),
+		LastTransitionTime: metav1.NewTime(now),
 		Reason:             "AdmittedByTest",
 		Message:            fmt.Sprintf("Admitted by ClusterQueue %s", w.Status.Admission.ClusterQueue),
 	}}
diff --git a/test/integration/scheduler/preemption_test.go b/test/integration/scheduler/preemption_test.go
index 5caf8b9245..3d92ef0188 100644
--- a/test/integration/scheduler/preemption_test.go
+++ b/test/integration/scheduler/preemption_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package scheduler
 
 import (
+	"fmt"
 	"time"
 
 	"github.com/onsi/ginkgo/v2"
@@ -306,7 +307,7 @@ var _ = ginkgo.Describe("Preemption", func() {
 			util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, betaCQ.Name, betaLowWl)
 		})
 
-		ginkgo.It("Should preempt all necessary workloads in concurrent scheduling", func() {
+		ginkgo.It("Should preempt all necessary workloads in concurrent scheduling with different priorities", func() {
 			ginkgo.By("Creating workloads in beta-cq that borrow quota")
 
 			betaMidWl := testing.MakeWorkload("beta-mid", ns.Name).
@@ -345,7 +346,7 @@ var _ = ginkgo.Describe("Preemption", func() {
 			util.FinishEvictionForWorkloads(ctx, k8sClient, betaMidWl)
 
 			// one of alpha-mid and gamma-mid should be admitted
-			gomega.Eventually(func() []*kueue.Workload { return util.FilterAdmittedWorkloads(ctx, k8sClient, alphaMidWl, gammaMidWl) }, util.Interval*4, util.Interval).Should(gomega.HaveLen(1))
+			util.ExpectWorkloadsToBeAdmittedCount(ctx, k8sClient, 1, alphaMidWl, gammaMidWl)
 
 			// betaHighWl remains admitted
 			util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, betaCQ.Name, betaHighWl)
@@ -356,6 +357,53 @@ var _ = ginkgo.Describe("Preemption", func() {
 			util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, alphaCQ.Name, alphaMidWl)
 			util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, gammaCQ.Name, gammaMidWl)
 		})
+
+		ginkgo.It("Should preempt all necessary workloads in concurrent scheduling with the same priority", func() {
+			var betaWls []*kueue.Workload
+			for i := 0; i < 3; i++ {
+				wl := testing.MakeWorkload(fmt.Sprintf("beta-%d", i), ns.Name).
+					Queue(betaQ.Name).
+					Request(corev1.ResourceCPU, "2").
+					Obj()
+				gomega.Expect(k8sClient.Create(ctx, wl)).To(gomega.Succeed())
+				betaWls = append(betaWls, wl)
+			}
+			util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, betaWls...)
+
+			ginkgo.By("Creating preempting pods")
+
+			alphaWl := testing.MakeWorkload("alpha", ns.Name).
+				Queue(alphaQ.Name).
+				Request(corev1.ResourceCPU, "2").
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, alphaWl)).To(gomega.Succeed())
+
+			gammaWl := testing.MakeWorkload("gamma", ns.Name).
+				Queue(gammaQ.Name).
+				Request(corev1.ResourceCPU, "2").
+				Obj()
+			gomega.Expect(k8sClient.Create(ctx, gammaWl)).To(gomega.Succeed())
+
+			var evictedWorkloads []*kueue.Workload
+			gomega.Eventually(func() int {
+				evictedWorkloads = util.FilterEvictedWorkloads(ctx, k8sClient, betaWls...)
+				return len(evictedWorkloads)
+			}, util.Timeout, util.Interval).Should(gomega.Equal(1), "Number of evicted workloads")
+
+			ginkgo.By("Finishing eviction for first set of preempted workloads")
+			util.FinishEvictionForWorkloads(ctx, k8sClient, evictedWorkloads...)
+			util.ExpectWorkloadsToBeAdmittedCount(ctx, k8sClient, 1, alphaWl, gammaWl)
+
+			gomega.Eventually(func() int {
+				evictedWorkloads = util.FilterEvictedWorkloads(ctx, k8sClient, betaWls...)
+				return len(evictedWorkloads)
+			}, util.Timeout, util.Interval).Should(gomega.Equal(2), "Number of evicted workloads")
+
+			ginkgo.By("Finishing eviction for second set of preempted workloads")
+			util.FinishEvictionForWorkloads(ctx, k8sClient, evictedWorkloads...)
+			util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, alphaWl, gammaWl)
+			util.ExpectWorkloadsToBeAdmittedCount(ctx, k8sClient, 1, betaWls...)
+		})
 	})
 
 	ginkgo.Context("In a cohort with StrictFIFO", func() {
diff --git a/test/util/util.go b/test/util/util.go
index 2c38cf86cb..315ce860fe 100644
--- a/test/util/util.go
+++ b/test/util/util.go
@@ -244,11 +244,21 @@ func ExpectWorkloadsToHaveQuotaReservation(ctx context.Context, k8sClient client
 }
 
 func FilterAdmittedWorkloads(ctx context.Context, k8sClient client.Client, wls ...*kueue.Workload) []*kueue.Workload {
+	return filterWorkloads(ctx, k8sClient, workload.HasQuotaReservation, wls...)
+}
+
+func FilterEvictedWorkloads(ctx context.Context, k8sClient client.Client, wls ...*kueue.Workload) []*kueue.Workload {
+	return filterWorkloads(ctx, k8sClient, func(wl *kueue.Workload) bool {
+		return apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadEvicted)
+	}, wls...)
+}
+
+func filterWorkloads(ctx context.Context, k8sClient client.Client, filter func(*kueue.Workload) bool, wls ...*kueue.Workload) []*kueue.Workload {
 	ret := make([]*kueue.Workload, 0, len(wls))
 	var updatedWorkload kueue.Workload
 	for _, wl := range wls {
 		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedWorkload)
-		if err == nil && workload.HasQuotaReservation(&updatedWorkload) {
+		if err == nil && filter(&updatedWorkload) {
 			ret = append(ret, wl)
 		}
 	}
@@ -274,17 +284,25 @@ func ExpectWorkloadsToBePending(ctx context.Context, k8sClient client.Client, wl
 }
 
 func ExpectWorkloadsToBeAdmitted(ctx context.Context, k8sClient client.Client, wls ...*kueue.Workload) {
-	gomega.EventuallyWithOffset(1, func() int {
+	expectWorkloadsToBeAdmittedCountWithOffset(ctx, 2, k8sClient, len(wls), wls...)
+}
+
+func ExpectWorkloadsToBeAdmittedCount(ctx context.Context, k8sClient client.Client, count int, wls ...*kueue.Workload) {
+	expectWorkloadsToBeAdmittedCountWithOffset(ctx, 2, k8sClient, count, wls...)
+}
+
+func expectWorkloadsToBeAdmittedCountWithOffset(ctx context.Context, offset int, k8sClient client.Client, count int, wls ...*kueue.Workload) {
+	gomega.EventuallyWithOffset(offset, func() int {
 		admitted := 0
 		var updatedWorkload kueue.Workload
 		for _, wl := range wls {
-			gomega.ExpectWithOffset(1, k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedWorkload)).To(gomega.Succeed())
+			gomega.ExpectWithOffset(offset, k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &updatedWorkload)).To(gomega.Succeed())
 			if apimeta.IsStatusConditionTrue(updatedWorkload.Status.Conditions, kueue.WorkloadAdmitted) {
 				admitted++
 			}
 		}
 		return admitted
-	}, Timeout, Interval).Should(gomega.Equal(len(wls)), "Not enough workloads are admitted")
+	}, Timeout, Interval).Should(gomega.Equal(count), "Not enough workloads are admitted")
 }
 
 func ExpectWorkloadToFinish(ctx context.Context, k8sClient client.Client, wlKey client.ObjectKey) {

From e628d0053cde2cb6f5a83ddcd68066fa88a45d6f Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 05:27:20 -0700
Subject: [PATCH 22/48] Use the setup-evnvtest version compatible with go 1.21
 (#1914)

Co-authored-by: Patryk Bundyra <pbundyra@google.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index cfa51ee78c..fbac3d8aa4 100644
--- a/Makefile
+++ b/Makefile
@@ -332,7 +332,7 @@ kustomize: ## Download kustomize locally if necessary.
 ENVTEST = $(PROJECT_DIR)/bin/setup-envtest
 .PHONY: envtest
 envtest: ## Download envtest-setup locally if necessary.
-	@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on $(GO_CMD) install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest
+	@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on $(GO_CMD) install sigs.k8s.io/controller-runtime/tools/setup-envtest@v0.0.0-20240320141353-395cfc7486e6
 
 GINKGO = $(PROJECT_DIR)/bin/ginkgo
 .PHONY: ginkgo

From 3968266820f538e8fc6a2c8f26d1ded1852b88f3 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 07:37:20 -0700
Subject: [PATCH 23/48] [release-0.6] Dereference StopPolicy before equality
 check (#1917)

* Dereference StopPolicy before equality check

* Update pkg/controller/core/workload_controller.go

Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Update pkg/controller/core/workload_controller.go

Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

---------

Co-authored-by: Gabe <15304068+gabesaba@users.noreply.github.com>
Co-authored-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
---
 pkg/controller/core/workload_controller.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go
index 75fb07589f..beb372c2f9 100644
--- a/pkg/controller/core/workload_controller.go
+++ b/pkg/controller/core/workload_controller.go
@@ -750,7 +750,7 @@ func (w *workloadCqHandler) Update(ctx context.Context, ev event.UpdateEvent, wq
 
 	if !newCq.DeletionTimestamp.IsZero() ||
 		!slices.CmpNoOrder(oldCq.Spec.AdmissionChecks, newCq.Spec.AdmissionChecks) ||
-		oldCq.Spec.StopPolicy != newCq.Spec.StopPolicy {
+		!ptr.Equal(oldCq.Spec.StopPolicy, newCq.Spec.StopPolicy) {
 		w.queueReconcileForWorkloads(ctx, newCq.Name, wq)
 	}
 }

From 54a6b43ea626b067c796082dfc550b6c32c2642d Mon Sep 17 00:00:00 2001
From: Yuki Iwai <yuki.iwai.tz@gmail.com>
Date: Tue, 26 Mar 2024 16:33:20 +0100
Subject: [PATCH 24/48] Introduce the ClusterAutoscaler APIs module (#1915)

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
---
 Makefile                                      |  2 +-
 cmd/kueue/main.go                             |  2 +-
 go.mod                                        | 16 +++++-----
 go.sum                                        | 32 +++++++++----------
 .../provisioning/controller.go                |  2 +-
 .../provisioning/controller_test.go           |  2 +-
 .../admissionchecks/provisioning/indexer.go   |  2 +-
 .../provisioning/indexer_test.go              |  2 +-
 .../provisioning/provisioning_test.go         |  2 +-
 .../provisioning/suite_test.go                |  2 +-
 10 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Makefile b/Makefile
index fbac3d8aa4..e88b5c3923 100644
--- a/Makefile
+++ b/Makefile
@@ -389,7 +389,7 @@ jobset-operator-crd:
 	cp -f $(JOBSETROOT)/config/components/crd/bases/* $(PROJECT_DIR)/dep-crds/jobset-operator/
 
 
-CAROOT = $(shell $(GO_CMD) list -m -f "{{.Dir}}" k8s.io/autoscaler/cluster-autoscaler)
+CAROOT = $(shell $(GO_CMD) list -m -f "{{.Dir}}" k8s.io/autoscaler/cluster-autoscaler/apis)
 .PHONY: cluster-autoscaler-crd
 cluster-autoscaler-crd:
 	mkdir -p $(PROJECT_DIR)/dep-crds/cluster-autoscaler/
diff --git a/cmd/kueue/main.go b/cmd/kueue/main.go
index 56e5c59e36..7137d32bb0 100644
--- a/cmd/kueue/main.go
+++ b/cmd/kueue/main.go
@@ -35,7 +35,7 @@ import (
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/client-go/discovery"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/rest"
diff --git a/go.mod b/go.mod
index ab96339d49..43ba6dceaf 100644
--- a/go.mod
+++ b/go.mod
@@ -8,19 +8,19 @@ require (
 	github.com/kubeflow/common v0.4.7
 	github.com/kubeflow/mpi-operator v0.4.0
 	github.com/kubeflow/training-operator v1.7.0
-	github.com/onsi/ginkgo/v2 v2.15.0
+	github.com/onsi/ginkgo/v2 v2.16.0
 	github.com/onsi/gomega v1.31.1
 	github.com/open-policy-agent/cert-controller v0.10.1
 	github.com/prometheus/client_golang v1.18.0
 	github.com/prometheus/client_model v0.5.0
 	github.com/ray-project/kuberay/ray-operator v1.1.0-alpha.0
 	go.uber.org/zap v1.26.0
-	k8s.io/api v0.29.1
-	k8s.io/apimachinery v0.29.1
+	k8s.io/api v0.29.2
+	k8s.io/apimachinery v0.29.2
 	k8s.io/apiserver v0.29.1
-	k8s.io/autoscaler/cluster-autoscaler v0.0.0-20230925095857-cf8c507d2421
-	k8s.io/client-go v0.29.1
-	k8s.io/code-generator v0.29.1
+	k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb
+	k8s.io/client-go v0.29.2
+	k8s.io/code-generator v0.29.2
 	k8s.io/component-base v0.29.1
 	k8s.io/component-helpers v0.29.1
 	k8s.io/klog/v2 v2.110.1
@@ -104,12 +104,12 @@ require (
 	golang.org/x/mod v0.14.0 // indirect
 	golang.org/x/net v0.20.0 // indirect
 	golang.org/x/oauth2 v0.12.0 // indirect
-	golang.org/x/sync v0.5.0 // indirect
+	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/sys v0.16.0 // indirect
 	golang.org/x/term v0.16.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
 	golang.org/x/time v0.3.0 // indirect
-	golang.org/x/tools v0.16.1 // indirect
+	golang.org/x/tools v0.17.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/appengine v1.6.8 // indirect
 	google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 // indirect
diff --git a/go.sum b/go.sum
index 49931f4ec3..4b1c37dd2c 100644
--- a/go.sum
+++ b/go.sum
@@ -153,8 +153,8 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY=
-github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM=
+github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM=
+github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs=
 github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo=
 github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0=
 github.com/open-policy-agent/cert-controller v0.10.1 h1:RXSYoyn8FdCenWecRP//UV5nbVfmstNpj4kHQFkvPK4=
@@ -275,8 +275,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
-golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -307,8 +307,8 @@ golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roY
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA=
-golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0=
+golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc=
+golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -345,20 +345,20 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-k8s.io/api v0.29.1 h1:DAjwWX/9YT7NQD4INu49ROJuZAAAP/Ijki48GUPzxqw=
-k8s.io/api v0.29.1/go.mod h1:7Kl10vBRUXhnQQI8YR/R327zXC8eJ7887/+Ybta+RoQ=
+k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A=
+k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0=
 k8s.io/apiextensions-apiserver v0.29.0 h1:0VuspFG7Hj+SxyF/Z/2T0uFbI5gb5LRgEyUVE3Q4lV0=
 k8s.io/apiextensions-apiserver v0.29.0/go.mod h1:TKmpy3bTS0mr9pylH0nOt/QzQRrW7/h7yLdRForMZwc=
-k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc=
-k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU=
+k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8=
+k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU=
 k8s.io/apiserver v0.29.1 h1:e2wwHUfEmMsa8+cuft8MT56+16EONIEK8A/gpBSco+g=
 k8s.io/apiserver v0.29.1/go.mod h1:V0EpkTRrJymyVT3M49we8uh2RvXf7fWC5XLB0P3SwRw=
-k8s.io/autoscaler/cluster-autoscaler v0.0.0-20230925095857-cf8c507d2421 h1:h8E8WHuSP3aArI7k9n/E4r98S5PkXQ+hiQSprylSy7Q=
-k8s.io/autoscaler/cluster-autoscaler v0.0.0-20230925095857-cf8c507d2421/go.mod h1:CIDuB0kVY16I0kzpUJa+eYDbj9xbclVbs8aJtgL3WzA=
-k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A=
-k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks=
-k8s.io/code-generator v0.29.1 h1:8ba8BdtSmAVHgAMpzThb/fuyQeTRtN7NtN7VjMcDLew=
-k8s.io/code-generator v0.29.1/go.mod h1:FwFi3C9jCrmbPjekhaCYcYG1n07CYiW1+PAPCockaos=
+k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb h1:ycQ/tSpcJEUHHx0pv6MXdq4NcRflCvFX6SMwmKROiis=
+k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb/go.mod h1:LPhCVj3E5Lp9W6HGVlW664m/X+KN2firfF3wtBBji54=
+k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg=
+k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA=
+k8s.io/code-generator v0.29.2 h1:c9/iw2KnNpw2IRV+wwuG/Wns2TjPSgjWzbbjTevyiHI=
+k8s.io/code-generator v0.29.2/go.mod h1:FwFi3C9jCrmbPjekhaCYcYG1n07CYiW1+PAPCockaos=
 k8s.io/component-base v0.29.1 h1:MUimqJPCRnnHsskTTjKD+IC1EHBbRCVyi37IoFBrkYw=
 k8s.io/component-base v0.29.1/go.mod h1:fP9GFjxYrLERq1GcWWZAE3bqbNcDKDytn2srWuHTtKc=
 k8s.io/component-helpers v0.29.1 h1:54MMEDu6xeJmMtAKztsPwu0kJKr4+jCUzaEIn2UXRoc=
diff --git a/pkg/controller/admissionchecks/provisioning/controller.go b/pkg/controller/admissionchecks/provisioning/controller.go
index f94ce6f8ee..9c3910ac6c 100644
--- a/pkg/controller/admissionchecks/provisioning/controller.go
+++ b/pkg/controller/admissionchecks/provisioning/controller.go
@@ -32,7 +32,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/client-go/util/workqueue"
 	"k8s.io/klog/v2"
diff --git a/pkg/controller/admissionchecks/provisioning/controller_test.go b/pkg/controller/admissionchecks/provisioning/controller_test.go
index 673c42a0ba..1e14bb1a5b 100644
--- a/pkg/controller/admissionchecks/provisioning/controller_test.go
+++ b/pkg/controller/admissionchecks/provisioning/controller_test.go
@@ -27,7 +27,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
diff --git a/pkg/controller/admissionchecks/provisioning/indexer.go b/pkg/controller/admissionchecks/provisioning/indexer.go
index cfbe1301da..91f48e02ca 100644
--- a/pkg/controller/admissionchecks/provisioning/indexer.go
+++ b/pkg/controller/admissionchecks/provisioning/indexer.go
@@ -21,7 +21,7 @@ import (
 	"fmt"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
diff --git a/pkg/controller/admissionchecks/provisioning/indexer_test.go b/pkg/controller/admissionchecks/provisioning/indexer_test.go
index 584cefe866..24e9ddaaf6 100644
--- a/pkg/controller/admissionchecks/provisioning/indexer_test.go
+++ b/pkg/controller/admissionchecks/provisioning/indexer_test.go
@@ -25,7 +25,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
diff --git a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
index 65c820f8a0..18ebcdaae9 100644
--- a/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
+++ b/test/integration/controller/admissionchecks/provisioning/provisioning_test.go
@@ -26,7 +26,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
diff --git a/test/integration/controller/admissionchecks/provisioning/suite_test.go b/test/integration/controller/admissionchecks/provisioning/suite_test.go
index 817a376c65..8f13d61fc2 100644
--- a/test/integration/controller/admissionchecks/provisioning/suite_test.go
+++ b/test/integration/controller/admissionchecks/provisioning/suite_test.go
@@ -23,7 +23,7 @@ import (
 
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
-	autoscaling "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/apis/autoscaling.x-k8s.io/v1beta1"
+	autoscaling "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/client-go/rest"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/manager"

From 61e74267832b44467090612d6427c25fd6ac18d5 Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Tue, 26 Mar 2024 08:33:26 -0700
Subject: [PATCH 25/48] Fix Pod finalizers for succeeded groups (#1916)

Change-Id: I7f17d51df66de6766af44e48f45b4165e8bd3e30

Co-authored-by: Aldo Culquicondor <acondor@google.com>
---
 pkg/controller/jobframework/reconciler.go     |  7 +--
 .../jobs/pod/pod_controller_test.go           |  3 +-
 .../jobs/pod/pod_controller_test.go           | 60 +++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/pkg/controller/jobframework/reconciler.go b/pkg/controller/jobframework/reconciler.go
index 1a40c6555d..5370048a72 100644
--- a/pkg/controller/jobframework/reconciler.go
+++ b/pkg/controller/jobframework/reconciler.go
@@ -270,11 +270,8 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
 	}
 
 	if wl != nil && apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) {
-		// Finalize the job if it's finished
-		if _, finished := job.Finished(); finished {
-			if err := r.finalizeJob(ctx, job); err != nil {
-				return ctrl.Result{}, err
-			}
+		if err := r.finalizeJob(ctx, job); err != nil {
+			return ctrl.Result{}, err
 		}
 
 		r.record.Eventf(object, corev1.EventTypeNormal, ReasonFinishedWorkload,
diff --git a/pkg/controller/jobs/pod/pod_controller_test.go b/pkg/controller/jobs/pod/pod_controller_test.go
index 196e6e4ba0..4a7a597a88 100644
--- a/pkg/controller/jobs/pod/pod_controller_test.go
+++ b/pkg/controller/jobs/pod/pod_controller_test.go
@@ -977,10 +977,11 @@ func TestReconciler(t *testing.T) {
 			},
 			workloadCmpOpts: defaultWorkloadCmpOpts,
 		},
-		"workload is not deleted if one of the pods in the finished group is absent": {
+		"Pods are finalized even if one of the pods in the finished group is absent": {
 			pods: []corev1.Pod{
 				*basePodWrapper.
 					Clone().
+					KueueFinalizer().
 					Label("kueue.x-k8s.io/managed", "true").
 					Group("test-group").
 					GroupTotalCount("2").
diff --git a/test/integration/controller/jobs/pod/pod_controller_test.go b/test/integration/controller/jobs/pod/pod_controller_test.go
index 5c8c69557a..3a52ee545b 100644
--- a/test/integration/controller/jobs/pod/pod_controller_test.go
+++ b/test/integration/controller/jobs/pod/pod_controller_test.go
@@ -18,6 +18,7 @@ package pod
 
 import (
 	"fmt"
+	"strconv"
 	"time"
 
 	"github.com/google/go-cmp/cmp"
@@ -1024,6 +1025,65 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
 				})
 			})
 
+			ginkgo.It("Should finalize all Succeeded Pods when deleted", func() {
+				ginkgo.By("Creating pods with queue name")
+				// Use a number of Pods big enough to cause conflicts when removing finalizers >50% of the time.
+				const podCount = 7
+				pods := make([]*corev1.Pod, podCount)
+				for i := range pods {
+					pods[i] = testingpod.MakePod(fmt.Sprintf("test-pod-%d", i), ns.Name).
+						Group("test-group").
+						GroupTotalCount(strconv.Itoa(podCount)).
+						Request(corev1.ResourceCPU, "1").
+						Queue("test-queue").
+						Obj()
+					gomega.Expect(k8sClient.Create(ctx, pods[i])).Should(gomega.Succeed())
+				}
+
+				ginkgo.By("checking that workload is created for the pod group")
+				wlLookupKey := types.NamespacedName{
+					Namespace: pods[0].Namespace,
+					Name:      "test-group",
+				}
+				createdWorkload := &kueue.Workload{}
+				gomega.Eventually(func() error {
+					return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
+				}, util.Timeout, util.Interval).Should(gomega.Succeed())
+
+				ginkgo.By("Admitting workload", func() {
+					admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
+						kueue.PodSetAssignment{
+							Name: "4b0469f7",
+							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
+								corev1.ResourceCPU: "default",
+							},
+							Count: ptr.To[int32](podCount),
+						},
+					).Obj()
+					gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
+					util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
+
+					for i := range pods {
+						util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, client.ObjectKeyFromObject(pods[i]), map[string]string{"kubernetes.io/arch": "arm64"})
+					}
+				})
+
+				ginkgo.By("Finishing and deleting Pods", func() {
+					util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pods...)
+					for i := range pods {
+						gomega.Expect(k8sClient.Delete(ctx, pods[i])).To(gomega.Succeed())
+					}
+
+					gomega.Eventually(func(g gomega.Gomega) {
+						for i := range pods {
+							key := types.NamespacedName{Namespace: ns.Name, Name: fmt.Sprintf("test-pod-%d", i)}
+							g.Expect(k8sClient.Get(ctx, key, &corev1.Pod{})).To(testing.BeNotFoundError())
+						}
+					}, util.Timeout, util.Interval).Should(gomega.Succeed())
+				})
+
+			})
+
 			ginkgo.It("Should finalize workload if pods are absent", func() {
 				ginkgo.By("Creating pods with queue name")
 				pod1 := testingpod.MakePod("test-pod1", ns.Name).

From 406a08875d2e685e1c4e0439af9099203ff65d9c Mon Sep 17 00:00:00 2001
From: k8s-infra-cherrypick-robot
 <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com>
Date: Fri, 5 Apr 2024 14:02:30 -0700
Subject: [PATCH 26/48] Index PodGroupName (#1953)

Co-authored-by: Gabe <15304068+gabesaba@users.noreply.github.com>
---
 pkg/controller/jobs/pod/indexer.go        | 38 +++++++++++++++++++++++
 pkg/controller/jobs/pod/pod_controller.go | 16 +++++++---
 2 files changed, 49 insertions(+), 5 deletions(-)
 create mode 100644 pkg/controller/jobs/pod/indexer.go

diff --git a/pkg/controller/jobs/pod/indexer.go b/pkg/controller/jobs/pod/indexer.go
new file mode 100644
index 0000000000..24f1831942
--- /dev/null
+++ b/pkg/controller/jobs/pod/indexer.go
@@ -0,0 +1,38 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pod
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	PodGroupNameCacheKey = "PodGroupNameCacheKey"
+)
+
+func IndexPodGroupName(o client.Object) []string {
+	pod, ok := o.(*corev1.Pod)
+	if !ok {
+		return nil
+	}
+
+	if labelValue, exists := pod.GetLabels()[GroupNameLabel]; exists {
+		return []string{labelValue}
+	}
+	return nil
+}
diff --git a/pkg/controller/jobs/pod/pod_controller.go b/pkg/controller/jobs/pod/pod_controller.go
index d2eb86e040..7b8c8410fd 100644
--- a/pkg/controller/jobs/pod/pod_controller.go
+++ b/pkg/controller/jobs/pod/pod_controller.go
@@ -476,7 +476,13 @@ func (p *Pod) Stop(ctx context.Context, c client.Client, _ []podset.PodSetInfo,
 }
 
 func SetupIndexes(ctx context.Context, indexer client.FieldIndexer) error {
-	return jobframework.SetupWorkloadOwnerIndex(ctx, indexer, gvk)
+	if err := indexer.IndexField(ctx, &corev1.Pod{}, PodGroupNameCacheKey, IndexPodGroupName); err != nil {
+		return err
+	}
+	if err := jobframework.SetupWorkloadOwnerIndex(ctx, indexer, gvk); err != nil {
+		return err
+	}
+	return nil
 }
 
 func CanSupportIntegration(opts ...jobframework.Option) (bool, error) {
@@ -496,8 +502,8 @@ func (p *Pod) Finalize(ctx context.Context, c client.Client) error {
 	if groupName == "" {
 		podsInGroup.Items = append(podsInGroup.Items, *p.Object().(*corev1.Pod))
 	} else {
-		if err := c.List(ctx, &podsInGroup, client.MatchingLabels{
-			GroupNameLabel: groupName,
+		if err := c.List(ctx, &podsInGroup, client.MatchingFields{
+			PodGroupNameCacheKey: groupName,
 		}, client.InNamespace(p.pod.Namespace)); err != nil {
 			return err
 		}
@@ -611,8 +617,8 @@ func (p *Pod) Load(ctx context.Context, c client.Client, key *types.NamespacedNa
 	// and update the expectations after we've retrieved active pods from the store.
 	p.satisfiedExcessPods = p.excessPodExpectations.Satisfied(ctrl.LoggerFrom(ctx), *key)
 
-	if err := c.List(ctx, &p.list, client.MatchingLabels{
-		GroupNameLabel: key.Name,
+	if err := c.List(ctx, &p.list, client.MatchingFields{
+		PodGroupNameCacheKey: key.Name,
 	}, client.InNamespace(key.Namespace)); err != nil {
 		return false, err
 	}

From 223106e443fc3a247010bc055918d9aab372b5bb Mon Sep 17 00:00:00 2001
From: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
Date: Wed, 10 Apr 2024 01:44:48 -0400
Subject: [PATCH 27/48] Prepare release v0.6.2 (#1965)

Change-Id: Icabeb0cca6f7680173d72be29a293731f93d8f9b
---
 CHANGELOG/CHANGELOG-0.6.md | 15 +++++++++++++++
 Makefile                   |  2 +-
 README.md                  |  2 +-
 charts/kueue/Chart.yaml    |  2 +-
 site/config.toml           |  2 +-
 5 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG/CHANGELOG-0.6.md b/CHANGELOG/CHANGELOG-0.6.md
index 3755d13687..9f5bdf7137 100644
--- a/CHANGELOG/CHANGELOG-0.6.md
+++ b/CHANGELOG/CHANGELOG-0.6.md
@@ -1,3 +1,18 @@
+## v0.6.2
+
+Changes since `v0.6.1`:
+
+### Bug or Regression
+
+- Avoid unnecessary preemptions when there are multiple candidates for preemption with the same admission timestamp (#1880, @alculquicondor)
+- Fix Pods in Pod groups stuck with finalizers when deleted immediately after Succeeded (#1916, @alculquicondor)
+- Fix preemption to reclaim quota that is blocked by an earlier pending Workload from another ClusterQueue in the same cohort. (#1868, @alculquicondor)
+- Reduce number of Workload reconciliations due to wrong equality check. (#1917, @gabesaba)
+
+### Other (Cleanup or Flake)
+
+- Improve pod integration performance (#1953, @gabesaba)
+
 ## v0.6.1
 
 Changes Since `v0.6.0`:
diff --git a/Makefile b/Makefile
index e88b5c3923..5ebe93fed6 100644
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@ LD_FLAGS += -X '$(version_pkg).GitCommit=$(shell git rev-parse HEAD)'
 
 # Update these variables when preparing a new release or a release branch.
 # Then run `make prepare-release-branch`
-RELEASE_VERSION=v0.6.1
+RELEASE_VERSION=v0.6.2
 RELEASE_BRANCH=release-0.6
 
 .PHONY: all
diff --git a/README.md b/README.md
index dbe6ae2820..553bfe6626 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Read the [overview](https://kueue.sigs.k8s.io/docs/overview/) to learn more.
 To install the latest release of Kueue in your cluster, run the following command:
 
 ```shell
-kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.1/manifests.yaml
+kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.6.2/manifests.yaml
 ```
 
 The controller runs in the `kueue-system` namespace.
diff --git a/charts/kueue/Chart.yaml b/charts/kueue/Chart.yaml
index ab98fe240f..5604645330 100644
--- a/charts/kueue/Chart.yaml
+++ b/charts/kueue/Chart.yaml
@@ -18,4 +18,4 @@ version: 0.1.0
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "v0.6.1"
+appVersion: "v0.6.2"
diff --git a/site/config.toml b/site/config.toml
index 7d8733915d..0675d4e167 100644
--- a/site/config.toml
+++ b/site/config.toml
@@ -92,7 +92,7 @@ ignoreFiles = []
   # The major.minor version tag for the version of the docs represented in this
   # branch of the repository. Used in the "version-banner" partial to display a
   # version number for this doc set.
-  version = "v0.6.1"
+  version = "v0.6.2"
 
   # Flag used in the "version-banner" partial to decide whether to display a
   # banner on every page indicating that this is an archived version of the docs.

From 3552c014cf302ed1928183ea7060e28657096487 Mon Sep 17 00:00:00 2001
From: Dimitri Saridakis <dimitri.saridakis@gmail.com>
Date: Wed, 24 Jan 2024 09:52:24 +0000
Subject: [PATCH 28/48] CARRY: Edit args for FIPS compliant build

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index ef863e4494..8e16cd690c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@ ARG BASE_IMAGE
 # Build the manager binary
 FROM --platform=${BUILDPLATFORM} ${BUILDER_IMAGE} as builder
 
-ARG CGO_ENABLED
+ARG CGO_ENABLED=1
 ARG TARGETARCH
 
 WORKDIR /workspace

From d9409cb1a0d59029cec11088c2205b0dc4f9e3cb Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <antonin@stefanutti.fr>
Date: Wed, 31 Jan 2024 16:09:39 +0100
Subject: [PATCH 29/48] CARRY: Add RHOAI Kustomize overlay

---
 config/rhoai/kustomization.yaml         | 28 +++++++++++++++++++++++++
 config/rhoai/manager_config_patch.yaml  | 21 +++++++++++++++++++
 config/rhoai/manager_webhook_patch.yaml | 23 ++++++++++++++++++++
 3 files changed, 72 insertions(+)
 create mode 100644 config/rhoai/kustomization.yaml
 create mode 100644 config/rhoai/manager_config_patch.yaml
 create mode 100644 config/rhoai/manager_webhook_patch.yaml

diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
new file mode 100644
index 0000000000..40e91f9a80
--- /dev/null
+++ b/config/rhoai/kustomization.yaml
@@ -0,0 +1,28 @@
+# Adds namespace to all resources.
+namespace: opendatahub
+
+# Value of this field is prepended to the
+# names of all resources, e.g. a deployment named
+# "wordpress" becomes "alices-wordpress".
+# Note that it should also match with the prefix (text before '-') of the namespace
+# field above.
+namePrefix: kueue-
+
+# Labels to add to all resources and selectors.
+#commonLabels:
+#  someName: someValue
+
+resources:
+- ../components/crd
+- ../components/rbac
+- ../components/manager
+- ../components/internalcert
+- ../components/webhook
+# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
+#- ../components/prometheus
+
+patches:
+# Mount the controller config file for loading manager configurations
+# through a ComponentConfig type
+- path: manager_config_patch.yaml
+- path: manager_webhook_patch.yaml
diff --git a/config/rhoai/manager_config_patch.yaml b/config/rhoai/manager_config_patch.yaml
new file mode 100644
index 0000000000..2e451fbf2c
--- /dev/null
+++ b/config/rhoai/manager_config_patch.yaml
@@ -0,0 +1,21 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: controller-manager
+  namespace: system
+spec:
+  template:
+    spec:
+      containers:
+      - name: manager
+        args:
+        - "--config=/controller_manager_config.yaml"
+        - "--zap-log-level=2"
+        volumeMounts:
+        - name: manager-config
+          mountPath: /controller_manager_config.yaml
+          subPath: controller_manager_config.yaml
+      volumes:
+      - name: manager-config
+        configMap:
+          name: manager-config
diff --git a/config/rhoai/manager_webhook_patch.yaml b/config/rhoai/manager_webhook_patch.yaml
new file mode 100644
index 0000000000..738de350b7
--- /dev/null
+++ b/config/rhoai/manager_webhook_patch.yaml
@@ -0,0 +1,23 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: controller-manager
+  namespace: system
+spec:
+  template:
+    spec:
+      containers:
+      - name: manager
+        ports:
+        - containerPort: 9443
+          name: webhook-server
+          protocol: TCP
+        volumeMounts:
+        - mountPath: /tmp/k8s-webhook-server/serving-certs
+          name: cert
+          readOnly: true
+      volumes:
+      - name: cert
+        secret:
+          defaultMode: 420
+          secretName: webhook-server-cert

From 24098e7688f9e95e25b4222d45554d13d77132df Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <antonin@stefanutti.fr>
Date: Wed, 31 Jan 2024 17:38:20 +0100
Subject: [PATCH 30/48] PATCH: Remove Pod webhooks

---
 config/components/webhook/kustomization.yaml | 30 ---------------
 config/components/webhook/manifests.yaml     | 39 --------------------
 2 files changed, 69 deletions(-)

diff --git a/config/components/webhook/kustomization.yaml b/config/components/webhook/kustomization.yaml
index 1c17012696..9cf26134e4 100644
--- a/config/components/webhook/kustomization.yaml
+++ b/config/components/webhook/kustomization.yaml
@@ -4,33 +4,3 @@ resources:
 
 configurations:
 - kustomizeconfig.yaml
-
-patches:
-- patch: |-
-    apiVersion: admissionregistration.k8s.io/v1
-    kind: MutatingWebhookConfiguration
-    metadata:
-      name: mutating-webhook-configuration
-    webhooks:
-    - name: mpod.kb.io
-      namespaceSelector:
-        matchExpressions:
-        - key: kubernetes.io/metadata.name
-          operator: NotIn
-          values:
-            - kube-system
-            - kueue-system
-- patch: |-
-    apiVersion: admissionregistration.k8s.io/v1
-    kind: ValidatingWebhookConfiguration
-    metadata:
-      name: validating-webhook-configuration
-    webhooks:
-    - name: vpod.kb.io
-      namespaceSelector:
-        matchExpressions:
-        - key: kubernetes.io/metadata.name
-          operator: NotIn
-          values:
-          - kube-system
-          - kueue-system
diff --git a/config/components/webhook/manifests.yaml b/config/components/webhook/manifests.yaml
index c7bed96edc..63d2f8b82e 100644
--- a/config/components/webhook/manifests.yaml
+++ b/config/components/webhook/manifests.yaml
@@ -156,25 +156,6 @@ webhooks:
     resources:
     - mpijobs
   sideEffects: None
-- admissionReviewVersions:
-  - v1
-  clientConfig:
-    service:
-      name: webhook-service
-      namespace: system
-      path: /mutate--v1-pod
-  failurePolicy: Fail
-  name: mpod.kb.io
-  rules:
-  - apiGroups:
-    - ""
-    apiVersions:
-    - v1
-    operations:
-    - CREATE
-    resources:
-    - pods
-  sideEffects: None
 - admissionReviewVersions:
   - v1
   clientConfig:
@@ -436,26 +417,6 @@ webhooks:
     resources:
     - mpijobs
   sideEffects: None
-- admissionReviewVersions:
-  - v1
-  clientConfig:
-    service:
-      name: webhook-service
-      namespace: system
-      path: /validate--v1-pod
-  failurePolicy: Fail
-  name: vpod.kb.io
-  rules:
-  - apiGroups:
-    - ""
-    apiVersions:
-    - v1
-    operations:
-    - CREATE
-    - UPDATE
-    resources:
-    - pods
-  sideEffects: None
 - admissionReviewVersions:
   - v1
   clientConfig:

From 6d7b754b06eed2c67e122790aac96dcd4312a34a Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <antonin@stefanutti.fr>
Date: Fri, 2 Feb 2024 09:43:36 +0100
Subject: [PATCH 31/48] CARRY: Integrate with OpenShift user workload
 monitoring

---
 config/rhoai/auth_proxy_service_patch.yaml |  6 ++++++
 config/rhoai/kustomization.yaml            |  5 +++--
 config/rhoai/manager_metrics_patch.yaml    | 14 ++++++++++++++
 config/rhoai/monitor.yaml                  | 14 ++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 config/rhoai/auth_proxy_service_patch.yaml
 create mode 100644 config/rhoai/manager_metrics_patch.yaml
 create mode 100644 config/rhoai/monitor.yaml

diff --git a/config/rhoai/auth_proxy_service_patch.yaml b/config/rhoai/auth_proxy_service_patch.yaml
new file mode 100644
index 0000000000..7836a7eeb7
--- /dev/null
+++ b/config/rhoai/auth_proxy_service_patch.yaml
@@ -0,0 +1,6 @@
+$patch: delete
+apiVersion: v1
+kind: Service
+metadata:
+  name: controller-manager-metrics-service
+  namespace: system
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 40e91f9a80..d48bd01c05 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -18,11 +18,12 @@ resources:
 - ../components/manager
 - ../components/internalcert
 - ../components/webhook
-# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
-#- ../components/prometheus
+- monitor.yaml
 
 patches:
 # Mount the controller config file for loading manager configurations
 # through a ComponentConfig type
 - path: manager_config_patch.yaml
 - path: manager_webhook_patch.yaml
+- path: manager_metrics_patch.yaml
+- path: auth_proxy_service_patch.yaml
diff --git a/config/rhoai/manager_metrics_patch.yaml b/config/rhoai/manager_metrics_patch.yaml
new file mode 100644
index 0000000000..f4e3c86f1a
--- /dev/null
+++ b/config/rhoai/manager_metrics_patch.yaml
@@ -0,0 +1,14 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: controller-manager
+  namespace: system
+spec:
+  template:
+    spec:
+      containers:
+      - name: manager
+        ports:
+        - containerPort: 8080
+          protocol: TCP
+          name: metrics
diff --git a/config/rhoai/monitor.yaml b/config/rhoai/monitor.yaml
new file mode 100644
index 0000000000..da660010c1
--- /dev/null
+++ b/config/rhoai/monitor.yaml
@@ -0,0 +1,14 @@
+# Prometheus Pod Monitor (Metrics)
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  labels:
+    control-plane: controller-manager
+  name: controller-manager-metrics-monitor
+  namespace: system
+spec:
+  selector:
+    matchLabels:
+      control-plane: controller-manager
+  podMetricsEndpoints:
+    - port: metrics

From 247d3ea45ddaff5f88cc22c8d6cf67db34b5a233 Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <astefanutti@users.noreply.github.com>
Date: Mon, 5 Feb 2024 11:43:51 +0100
Subject: [PATCH 32/48] CARRY: Add RHOAI container image override (#9)

---
 config/rhoai/kustomization.yaml        | 17 +++++++++++++++++
 config/rhoai/manager_config_patch.yaml |  1 +
 config/rhoai/params.env                |  1 +
 config/rhoai/params.yaml               |  3 +++
 4 files changed, 22 insertions(+)
 create mode 100644 config/rhoai/params.env
 create mode 100644 config/rhoai/params.yaml

diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index d48bd01c05..1e1a89dde4 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -8,6 +8,23 @@ namespace: opendatahub
 # field above.
 namePrefix: kueue-
 
+configMapGenerator:
+- name: rhoai-config
+  envs:
+    - params.env
+
+configurations:
+  - params.yaml
+
+vars:
+- name: image
+  objref:
+    kind: ConfigMap
+    name: rhoai-config
+    apiVersion: v1
+  fieldref:
+    fieldpath: data.odh-kueue-controller-image
+
 # Labels to add to all resources and selectors.
 #commonLabels:
 #  someName: someValue
diff --git a/config/rhoai/manager_config_patch.yaml b/config/rhoai/manager_config_patch.yaml
index 2e451fbf2c..fe17c5a694 100644
--- a/config/rhoai/manager_config_patch.yaml
+++ b/config/rhoai/manager_config_patch.yaml
@@ -8,6 +8,7 @@ spec:
     spec:
       containers:
       - name: manager
+        image: $(image)
         args:
         - "--config=/controller_manager_config.yaml"
         - "--zap-log-level=2"
diff --git a/config/rhoai/params.env b/config/rhoai/params.env
new file mode 100644
index 0000000000..5884c42113
--- /dev/null
+++ b/config/rhoai/params.env
@@ -0,0 +1 @@
+odh-kueue-controller-image=gcr.io/k8s-staging-kueue/kueue:main
diff --git a/config/rhoai/params.yaml b/config/rhoai/params.yaml
new file mode 100644
index 0000000000..43509ff293
--- /dev/null
+++ b/config/rhoai/params.yaml
@@ -0,0 +1,3 @@
+varReference:
+  - path: spec/template/spec/containers[]/image
+    kind: Deployment

From 9a73f05df55f7026a1a390127679b0689cb421ff Mon Sep 17 00:00:00 2001
From: Shilpa Chugh <shchugh@redhat.com>
Date: Wed, 7 Feb 2024 20:19:36 +0530
Subject: [PATCH 33/48] CARRY: Update OWNERS file for openshift CI Job setup
 (#13)

Co-authored-by: Shilpa Chugh <shchugh@shchugh-thinkpadt14sgen1.remote.csb>
---
 OWNERS | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/OWNERS b/OWNERS
index c4f13a002b..8add8425da 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,17 +1,23 @@
 # See the OWNERS docs at https://go.k8s.io/owners
 
 approvers:
-- alculquicondor
-- tenzen-y
+- anishasthana
+- astefanutti
+- jbusche
+- kpostoffice
+- maxusmusti
+- sutaakar
+- tedhtchang
 
 reviewers:
-- alculquicondor
-- denkensk
-- kerthcet
-- mimowo
-- tenzen-y
-- trasc
-
-emeritus_approvers:
-- ahg-g
-- denkensk
+- anishasthana
+- astefanutti
+- ChristianZaccaria
+- dimakis
+- Fiona-Waters
+- jbusche
+- kpostoffice
+- maxusmusti
+- MichaelClifford
+- sutaakar
+- tedhtchang

From c5a1a4152cb2e09a440da989913096498f21e7bf Mon Sep 17 00:00:00 2001
From: Eoin Gallinagh <34043122+VanillaSpoon@users.noreply.github.com>
Date: Fri, 9 Feb 2024 12:58:16 +0000
Subject: [PATCH 34/48] CARRY: Configure Kueue Pod webhooks so they do not
 interfere with infra Pods (#10)

---
 config/components/webhook/kustomization.yaml | 30 +++++++++++++++
 config/components/webhook/manifests.yaml     | 39 ++++++++++++++++++++
 config/rhoai/kustomization.yaml              |  2 +
 config/rhoai/mutating_webhook_patch.yaml     |  7 ++++
 config/rhoai/validating_webhook_patch.yaml   |  7 ++++
 5 files changed, 85 insertions(+)
 create mode 100644 config/rhoai/mutating_webhook_patch.yaml
 create mode 100644 config/rhoai/validating_webhook_patch.yaml

diff --git a/config/components/webhook/kustomization.yaml b/config/components/webhook/kustomization.yaml
index 9cf26134e4..1c17012696 100644
--- a/config/components/webhook/kustomization.yaml
+++ b/config/components/webhook/kustomization.yaml
@@ -4,3 +4,33 @@ resources:
 
 configurations:
 - kustomizeconfig.yaml
+
+patches:
+- patch: |-
+    apiVersion: admissionregistration.k8s.io/v1
+    kind: MutatingWebhookConfiguration
+    metadata:
+      name: mutating-webhook-configuration
+    webhooks:
+    - name: mpod.kb.io
+      namespaceSelector:
+        matchExpressions:
+        - key: kubernetes.io/metadata.name
+          operator: NotIn
+          values:
+            - kube-system
+            - kueue-system
+- patch: |-
+    apiVersion: admissionregistration.k8s.io/v1
+    kind: ValidatingWebhookConfiguration
+    metadata:
+      name: validating-webhook-configuration
+    webhooks:
+    - name: vpod.kb.io
+      namespaceSelector:
+        matchExpressions:
+        - key: kubernetes.io/metadata.name
+          operator: NotIn
+          values:
+          - kube-system
+          - kueue-system
diff --git a/config/components/webhook/manifests.yaml b/config/components/webhook/manifests.yaml
index 63d2f8b82e..c7bed96edc 100644
--- a/config/components/webhook/manifests.yaml
+++ b/config/components/webhook/manifests.yaml
@@ -156,6 +156,25 @@ webhooks:
     resources:
     - mpijobs
   sideEffects: None
+- admissionReviewVersions:
+  - v1
+  clientConfig:
+    service:
+      name: webhook-service
+      namespace: system
+      path: /mutate--v1-pod
+  failurePolicy: Fail
+  name: mpod.kb.io
+  rules:
+  - apiGroups:
+    - ""
+    apiVersions:
+    - v1
+    operations:
+    - CREATE
+    resources:
+    - pods
+  sideEffects: None
 - admissionReviewVersions:
   - v1
   clientConfig:
@@ -417,6 +436,26 @@ webhooks:
     resources:
     - mpijobs
   sideEffects: None
+- admissionReviewVersions:
+  - v1
+  clientConfig:
+    service:
+      name: webhook-service
+      namespace: system
+      path: /validate--v1-pod
+  failurePolicy: Fail
+  name: vpod.kb.io
+  rules:
+  - apiGroups:
+    - ""
+    apiVersions:
+    - v1
+    operations:
+    - CREATE
+    - UPDATE
+    resources:
+    - pods
+  sideEffects: None
 - admissionReviewVersions:
   - v1
   clientConfig:
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 1e1a89dde4..3a7025e5f3 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -44,3 +44,5 @@ patches:
 - path: manager_webhook_patch.yaml
 - path: manager_metrics_patch.yaml
 - path: auth_proxy_service_patch.yaml
+- path: mutating_webhook_patch.yaml
+- path: validating_webhook_patch.yaml
diff --git a/config/rhoai/mutating_webhook_patch.yaml b/config/rhoai/mutating_webhook_patch.yaml
new file mode 100644
index 0000000000..31a80969a3
--- /dev/null
+++ b/config/rhoai/mutating_webhook_patch.yaml
@@ -0,0 +1,7 @@
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  name: mutating-webhook-configuration
+webhooks:
+  - $patch: delete
+    name: mpod.kb.io
diff --git a/config/rhoai/validating_webhook_patch.yaml b/config/rhoai/validating_webhook_patch.yaml
new file mode 100644
index 0000000000..711b05dc29
--- /dev/null
+++ b/config/rhoai/validating_webhook_patch.yaml
@@ -0,0 +1,7 @@
+apiVersion: admissionregistration.k8s.io/v1
+kind: ValidatingWebhookConfiguration
+metadata:
+  name: validating-webhook-configuration
+webhooks:
+  - $patch: delete
+    name: vpod.kb.io

From a0c796a00c76934091b4203e85cb5f13dcb6c31f Mon Sep 17 00:00:00 2001
From: oksanabaza <obazylie@redhat.com>
Date: Wed, 7 Feb 2024 10:34:10 +0000
Subject: [PATCH 35/48] CARRY: Bind admin roles in Kueue

---
 config/rhoai/binding_admin_roles.yaml | 18 ++++++++++++++++++
 config/rhoai/kustomization.yaml       |  3 +++
 2 files changed, 21 insertions(+)
 create mode 100644 config/rhoai/binding_admin_roles.yaml

diff --git a/config/rhoai/binding_admin_roles.yaml b/config/rhoai/binding_admin_roles.yaml
new file mode 100644
index 0000000000..61be865881
--- /dev/null
+++ b/config/rhoai/binding_admin_roles.yaml
@@ -0,0 +1,18 @@
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: admin-rolebinding
+subjects:
+  - kind: Group
+    apiGroup: rbac.authorization.k8s.io
+    name: rhods-admins
+  - kind: Group
+    apiGroup: rbac.authorization.k8s.io
+    name: odh-admins
+  - kind: Group
+    apiGroup: rbac.authorization.k8s.io
+    name: dedicated-admins
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: kueue-batch-admin-role
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 3a7025e5f3..069b5e0d6d 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -1,3 +1,5 @@
+# RHOAI configuration for Kueue.
+
 # Adds namespace to all resources.
 namespace: opendatahub
 
@@ -36,6 +38,7 @@ resources:
 - ../components/internalcert
 - ../components/webhook
 - monitor.yaml
+- binding_admin_roles.yaml
 
 patches:
 # Mount the controller config file for loading manager configurations

From 20727dca9f06c28ae6f255178720cac39ac8faa9 Mon Sep 17 00:00:00 2001
From: Shilpa Chugh <shchugh@shchugh-thinkpadt14sgen1.remote.csb>
Date: Mon, 19 Feb 2024 16:47:10 +0530
Subject: [PATCH 36/48] PATCH:Change func to wait until the admission check is
 properly removed

---
 test/e2e/singlecluster/e2e_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/singlecluster/e2e_test.go b/test/e2e/singlecluster/e2e_test.go
index 98db10cae2..6f62f3aa2a 100644
--- a/test/e2e/singlecluster/e2e_test.go
+++ b/test/e2e/singlecluster/e2e_test.go
@@ -357,7 +357,7 @@ var _ = ginkgo.Describe("Kueue", func() {
 			gomega.Expect(util.DeleteAllJobsInNamespace(ctx, k8sClient, ns)).Should(gomega.Succeed())
 			util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueue, true)
 			util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandRF, true)
-			gomega.Expect(k8sClient.Delete(ctx, check)).Should(gomega.Succeed())
+			util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, check, true)
 		})
 
 		ginkgo.It("Should unsuspend a job only after all checks are cleared", func() {

From 323ee05bb86396b6c9a9c04e01a32e47cc36e6a7 Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <antonin@stefanutti.fr>
Date: Wed, 21 Feb 2024 10:30:30 +0100
Subject: [PATCH 37/48] DROP: Use uniquely identifying labels

---
 config/components/manager/manager.yaml | 7 -------
 config/components/webhook/service.yaml | 2 --
 config/rhoai/kustomization.yaml        | 5 +++--
 config/rhoai/monitor.yaml              | 5 ++---
 4 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/config/components/manager/manager.yaml b/config/components/manager/manager.yaml
index 6121c3672d..d15d0c2dc3 100644
--- a/config/components/manager/manager.yaml
+++ b/config/components/manager/manager.yaml
@@ -10,19 +10,12 @@ kind: Deployment
 metadata:
   name: controller-manager
   namespace: system
-  labels:
-    control-plane: controller-manager
 spec:
-  selector:
-    matchLabels:
-      control-plane: controller-manager
   replicas: 1
   template:
     metadata:
       annotations:
         kubectl.kubernetes.io/default-container: manager
-      labels:
-        control-plane: controller-manager
     spec:
       securityContext:
         runAsNonRoot: true
diff --git a/config/components/webhook/service.yaml b/config/components/webhook/service.yaml
index c9f09d4b8c..acd6493f95 100644
--- a/config/components/webhook/service.yaml
+++ b/config/components/webhook/service.yaml
@@ -8,5 +8,3 @@ spec:
     - port: 443
       protocol: TCP
       targetPort: 9443
-  selector:
-    control-plane: controller-manager
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 069b5e0d6d..5bb37200bf 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -28,8 +28,9 @@ vars:
     fieldpath: data.odh-kueue-controller-image
 
 # Labels to add to all resources and selectors.
-#commonLabels:
-#  someName: someValue
+commonLabels:
+  app.kubernetes.io/name: kueue
+  app.kubernetes.io/component: controller
 
 resources:
 - ../components/crd
diff --git a/config/rhoai/monitor.yaml b/config/rhoai/monitor.yaml
index da660010c1..e8fd0618d8 100644
--- a/config/rhoai/monitor.yaml
+++ b/config/rhoai/monitor.yaml
@@ -2,13 +2,12 @@
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
-  labels:
-    control-plane: controller-manager
   name: controller-manager-metrics-monitor
   namespace: system
 spec:
   selector:
     matchLabels:
-      control-plane: controller-manager
+      app.kubernetes.io/name: kueue
+      app.kubernetes.io/component: controller
   podMetricsEndpoints:
     - port: metrics

From b13c3d516cfdf95847c7a943a650c3588a12ef4f Mon Sep 17 00:00:00 2001
From: Antonin Stefanutti <antonin@stefanutti.fr>
Date: Wed, 21 Feb 2024 10:43:23 +0100
Subject: [PATCH 38/48] CARRY: Add Kueue webhook NetworkPolicy

---
 config/rhoai/kustomization.yaml          |  1 +
 config/rhoai/webhook_network_policy.yaml | 14 ++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 config/rhoai/webhook_network_policy.yaml

diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 5bb37200bf..d7f94b62be 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -40,6 +40,7 @@ resources:
 - ../components/webhook
 - monitor.yaml
 - binding_admin_roles.yaml
+- webhook_network_policy.yaml
 
 patches:
 # Mount the controller config file for loading manager configurations
diff --git a/config/rhoai/webhook_network_policy.yaml b/config/rhoai/webhook_network_policy.yaml
new file mode 100644
index 0000000000..08845704c2
--- /dev/null
+++ b/config/rhoai/webhook_network_policy.yaml
@@ -0,0 +1,14 @@
+kind: NetworkPolicy
+apiVersion: networking.k8s.io/v1
+metadata:
+  name: webhook-server
+  namespace: system
+spec:
+  podSelector:
+    matchLabels: {}
+  ingress:
+    - ports:
+        - protocol: TCP
+          port: 9443
+  policyTypes:
+    - Ingress

From 91eed92a093e9c65b3860c83895071ea07fc336c Mon Sep 17 00:00:00 2001
From: Shilpa Chugh <shchugh@shchugh-thinkpadt14sgen1.remote.csb>
Date: Mon, 19 Feb 2024 20:16:36 +0530
Subject: [PATCH 39/48] CARRY: Use env variable 'NAMESPACE' for e2e tests

---
 test/util/e2e.go | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/test/util/e2e.go b/test/util/e2e.go
index af537c445c..d4549f750b 100644
--- a/test/util/e2e.go
+++ b/test/util/e2e.go
@@ -22,6 +22,22 @@ import (
 	visibilityv1alpha1 "sigs.k8s.io/kueue/client-go/clientset/versioned/typed/visibility/v1alpha1"
 )
 
+const (
+	// The environment variable for namespace where Kueue is installed
+	namespaceEnvVar = "NAMESPACE"
+
+	// The namespace where kueue is installed in opendatahub
+	odhNamespace = "opendatahub"
+
+	// The namespace where kueue is installed in rhoai
+	rhoaiNamespace = "redhat-ods-applications"
+
+	// The default namespace where kueue is installed
+	kueueNamespace = "kueue-system"
+
+	undefinedNamespace = "undefined"
+)
+
 func CreateClientUsingCluster(kContext string) client.WithWatch {
 	cfg, err := config.GetConfigWithContext(kContext)
 	if err != nil {
@@ -70,14 +86,14 @@ func CreateVisibilityClient(user string) visibilityv1alpha1.VisibilityV1alpha1In
 
 func WaitForKueueAvailability(ctx context.Context, k8sClient client.Client) {
 	kcmKey := types.NamespacedName{
-		Namespace: "kueue-system",
+		Namespace: GetNamespace(),
 		Name:      "kueue-controller-manager",
 	}
 	deployment := &appsv1.Deployment{}
 	pods := corev1.PodList{}
 	gomega.EventuallyWithOffset(1, func(g gomega.Gomega) error {
 		g.Expect(k8sClient.Get(ctx, kcmKey, deployment)).To(gomega.Succeed())
-		g.Expect(k8sClient.List(ctx, &pods, client.InNamespace("kueue-system"), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
+		g.Expect(k8sClient.List(ctx, &pods, client.InNamespace(GetNamespace()), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
 		for _, pod := range pods.Items {
 			for _, cs := range pod.Status.ContainerStatuses {
 				if cs.RestartCount > 0 {
@@ -95,3 +111,23 @@ func WaitForKueueAvailability(ctx context.Context, k8sClient client.Client) {
 
 	}, StartUpTimeout, Interval).Should(gomega.Succeed())
 }
+
+func GetNamespace() string {
+	namespace, ok := os.LookupEnv(namespaceEnvVar)
+	if !ok {
+		fmt.Printf("Expected environment variable %s is unset, please use this environment variable to specify in which namespace Kueue is installed", namespaceEnvVar)
+		os.Exit(1)
+	}
+	switch namespace {
+	case "opendatahub":
+		return odhNamespace
+	case "redhat-ods-applications":
+		return rhoaiNamespace
+	case "kueue-system":
+		return kueueNamespace
+	default:
+		fmt.Printf("Expected environment variable %s contains an incorrect value", namespaceEnvVar)
+		os.Exit(1)
+		return undefinedNamespace
+	}
+}

From 5bf2730068fefac9f46eadaa42a122df365daac9 Mon Sep 17 00:00:00 2001
From: Fiona Waters <fiwaters6@gmail.com>
Date: Mon, 4 Mar 2024 15:43:57 +0000
Subject: [PATCH 40/48] CARRY: Allow non-admin user access to view
 clusterqueue(metrics)

---
 config/rhoai/batch-user-rolebinding.yaml         | 12 ++++++++++++
 config/rhoai/clusterqueue_viewer_role_patch.yaml |  7 +++++++
 config/rhoai/kustomization.yaml                  |  2 ++
 3 files changed, 21 insertions(+)
 create mode 100644 config/rhoai/batch-user-rolebinding.yaml
 create mode 100644 config/rhoai/clusterqueue_viewer_role_patch.yaml

diff --git a/config/rhoai/batch-user-rolebinding.yaml b/config/rhoai/batch-user-rolebinding.yaml
new file mode 100644
index 0000000000..eb532afffc
--- /dev/null
+++ b/config/rhoai/batch-user-rolebinding.yaml
@@ -0,0 +1,12 @@
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: batch-user-rolebinding
+subjects:
+  - kind: Group
+    apiGroup: rbac.authorization.k8s.io
+    name: 'system:authenticated'
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: batch-user-role
diff --git a/config/rhoai/clusterqueue_viewer_role_patch.yaml b/config/rhoai/clusterqueue_viewer_role_patch.yaml
new file mode 100644
index 0000000000..bdb73407ca
--- /dev/null
+++ b/config/rhoai/clusterqueue_viewer_role_patch.yaml
@@ -0,0 +1,7 @@
+# patch to add clusterqueue-viewer-role to batch-user
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: clusterqueue-viewer-role
+  labels:
+    rbac.kueue.x-k8s.io/batch-user: 'true'
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index d7f94b62be..077fb341eb 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -41,6 +41,7 @@ resources:
 - monitor.yaml
 - binding_admin_roles.yaml
 - webhook_network_policy.yaml
+- batch-user-rolebinding.yaml
 
 patches:
 # Mount the controller config file for loading manager configurations
@@ -51,3 +52,4 @@ patches:
 - path: auth_proxy_service_patch.yaml
 - path: mutating_webhook_patch.yaml
 - path: validating_webhook_patch.yaml
+- path: clusterqueue_viewer_role_patch.yaml

From d6d413a8f05a86a1dfa5abaff6edf3f3a1bc1d68 Mon Sep 17 00:00:00 2001
From: Karel Suta <ksuta@redhat.com>
Date: Fri, 15 Mar 2024 08:46:14 +0100
Subject: [PATCH 41/48] CARRY: Add workflow to release ODH/Kueue with compiled
 test binaries

---
 .github/workflows/odh-release.yml | 54 +++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 .github/workflows/odh-release.yml

diff --git a/.github/workflows/odh-release.yml b/.github/workflows/odh-release.yml
new file mode 100644
index 0000000000..fd303d217b
--- /dev/null
+++ b/.github/workflows/odh-release.yml
@@ -0,0 +1,54 @@
+# This workflow will compile e2e tests and release them
+
+name: ODH Release
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Tag to be used for release, i.e.: v0.0.1'
+        required: true
+  push:
+    tags:
+      - '*'
+
+env:
+  KUEUE_RELEASE_VERSION: ${{ github.event.inputs.version || github.ref_name }}
+
+jobs:
+  release-odh:
+    runs-on: ubuntu-latest
+
+    # Permission required to create a release
+    permissions:
+      contents: write
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: v1.21
+
+    - name: Verify that release doesn't exist yet
+      shell: bash {0}
+      run: |
+        gh release view $KUEUE_RELEASE_VERSION
+        status=$?
+        if [[ $status -eq 0 ]]; then
+          echo "Release $KUEUE_RELEASE_VERSION already exists."
+          exit 1
+        fi
+      env:
+        GITHUB_TOKEN: ${{ github.TOKEN }}
+
+    - name: Compile tests
+      run: |
+        go test -c -o compiled-tests/e2e-singlecluster ./test/e2e/singlecluster/
+
+    - name: Creates a release in GitHub
+      run: |
+        gh release create $KUEUE_RELEASE_VERSION --target $GITHUB_SHA compiled-tests/*
+      env:
+        GITHUB_TOKEN: ${{ secrets.CODEFLARE_MACHINE_ACCOUNT_TOKEN }}
+      shell: bash

From d7083e9cb860a6f3d3310d7b1426743af80d4e25 Mon Sep 17 00:00:00 2001
From: Mark Campbell <mcampbel@redhat.com>
Date: Fri, 5 Apr 2024 13:07:05 +0100
Subject: [PATCH 42/48] CARRY: Set waitForPodsReady to true by default (#23)

---
 config/components/manager/controller_manager_config.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/components/manager/controller_manager_config.yaml b/config/components/manager/controller_manager_config.yaml
index e698ecf762..0c93b6957b 100644
--- a/config/components/manager/controller_manager_config.yaml
+++ b/config/components/manager/controller_manager_config.yaml
@@ -22,8 +22,9 @@ clientConnection:
   qps: 50
   burst: 100
 #pprofBindAddress: :8082
-#waitForPodsReady:
-#  enable: true
+waitForPodsReady:
+  enable: true
+  blockAdmission: false
 #manageJobsWithoutQueueName: true
 #internalCertManagement:
 #  enable: false

From 4cf5cceab32e329ac30df3460c5c462f22b87175 Mon Sep 17 00:00:00 2001
From: ChristianZaccaria <christian.zaccaria.cz@gmail.com>
Date: Wed, 17 Apr 2024 17:10:46 +0100
Subject: [PATCH 43/48] PATCH: Rebase on Kueue v0.6.2

---
 config/rhoai/params.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/rhoai/params.env b/config/rhoai/params.env
index 5884c42113..ca431060e4 100644
--- a/config/rhoai/params.env
+++ b/config/rhoai/params.env
@@ -1 +1 @@
-odh-kueue-controller-image=gcr.io/k8s-staging-kueue/kueue:main
+odh-kueue-controller-image=registry.k8s.io/kueue/kueue:v0.6.2

From d3d458de872834d8e082202ae37eb132d6f4b83e Mon Sep 17 00:00:00 2001
From: Kevin <kpostlet@redhat.com>
Date: Fri, 19 Apr 2024 14:37:06 -0400
Subject: [PATCH 44/48] CARRY: Add delete patch to remove default namespace

Signed-off-by: Kevin <kpostlet@redhat.com>
---
 config/rhoai/kustomization.yaml            | 1 +
 config/rhoai/remove_default_namespace.yaml | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 config/rhoai/remove_default_namespace.yaml

diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 077fb341eb..dc75c514eb 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -53,3 +53,4 @@ patches:
 - path: mutating_webhook_patch.yaml
 - path: validating_webhook_patch.yaml
 - path: clusterqueue_viewer_role_patch.yaml
+- path: remove_default_namespace.yaml
diff --git a/config/rhoai/remove_default_namespace.yaml b/config/rhoai/remove_default_namespace.yaml
new file mode 100644
index 0000000000..9a52c0573d
--- /dev/null
+++ b/config/rhoai/remove_default_namespace.yaml
@@ -0,0 +1,5 @@
+$patch: delete
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: system

From c312b2d137ef6c4508f6ce2bb886fd6ed607ba41 Mon Sep 17 00:00:00 2001
From: Kevin <kpostlet@redhat.com>
Date: Wed, 8 May 2024 10:36:19 -0400
Subject: [PATCH 45/48] PATCH: add service for kueue metrics

Signed-off-by: Kevin <kpostlet@redhat.com>
---
 config/rhoai/kueue-metrics-service.yaml | 15 +++++++++++++++
 config/rhoai/kustomization.yaml         |  1 +
 2 files changed, 16 insertions(+)
 create mode 100644 config/rhoai/kueue-metrics-service.yaml

diff --git a/config/rhoai/kueue-metrics-service.yaml b/config/rhoai/kueue-metrics-service.yaml
new file mode 100644
index 0000000000..a82f7bea1c
--- /dev/null
+++ b/config/rhoai/kueue-metrics-service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: kueue-metrics-service
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    protocol: TCP
+    targetPort: metrics
+  selector:
+    app.kubernetes.io/name: kueue
+    app.kubernetes.io/part-of: kueue
+status:
+  loadBalancer: {}
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index dc75c514eb..6c01d7e073 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -54,3 +54,4 @@ patches:
 - path: validating_webhook_patch.yaml
 - path: clusterqueue_viewer_role_patch.yaml
 - path: remove_default_namespace.yaml
+- path: kueue-metrics-service.yaml

From 00db5546389f3a8033ad956a921acc5af5591458 Mon Sep 17 00:00:00 2001
From: Kevin <kpostlet@redhat.com>
Date: Mon, 13 May 2024 13:43:09 -0400
Subject: [PATCH 46/48] PATCH: move service to resources from patch

Signed-off-by: Kevin <kpostlet@redhat.com>
---
 config/rhoai/kueue-metrics-service.yaml | 2 +-
 config/rhoai/kustomization.yaml         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/rhoai/kueue-metrics-service.yaml b/config/rhoai/kueue-metrics-service.yaml
index a82f7bea1c..1f0a18d17f 100644
--- a/config/rhoai/kueue-metrics-service.yaml
+++ b/config/rhoai/kueue-metrics-service.yaml
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: kueue-metrics-service
+  name: metrics-service
 spec:
   ports:
   - name: metrics
diff --git a/config/rhoai/kustomization.yaml b/config/rhoai/kustomization.yaml
index 6c01d7e073..fec8dff5fa 100644
--- a/config/rhoai/kustomization.yaml
+++ b/config/rhoai/kustomization.yaml
@@ -42,6 +42,7 @@ resources:
 - binding_admin_roles.yaml
 - webhook_network_policy.yaml
 - batch-user-rolebinding.yaml
+- kueue-metrics-service.yaml
 
 patches:
 # Mount the controller config file for loading manager configurations
@@ -54,4 +55,3 @@ patches:
 - path: validating_webhook_patch.yaml
 - path: clusterqueue_viewer_role_patch.yaml
 - path: remove_default_namespace.yaml
-- path: kueue-metrics-service.yaml

From fdafd7a7b575f4722f0f06ec99a558f120331d19 Mon Sep 17 00:00:00 2001
From: abhijeet-dhumal <abhijeetdhumal652@gmail.com>
Date: Tue, 21 May 2024 16:10:46 +0530
Subject: [PATCH 47/48] CARRY: Add dedicated Dockerfile for Konflux

---
 Dockerfile.rhoai | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 Dockerfile.rhoai

diff --git a/Dockerfile.rhoai b/Dockerfile.rhoai
new file mode 100644
index 0000000000..cb2fcee4df
--- /dev/null
+++ b/Dockerfile.rhoai
@@ -0,0 +1,28 @@
+# Build the manager binary
+FROM registry.access.redhat.com/ubi9/go-toolset:1.21 as builder
+
+WORKDIR /workspace
+
+RUN git config --global --add safe.directory /workspace
+
+# Copy the Go Modules manifests
+COPY go.mod go.mod
+COPY go.sum go.sum
+# cache deps before building and copying source so that we don't need to re-download as much
+# and so that source changes don't invalidate our downloaded layer
+RUN go mod download
+
+# Copy the go source
+COPY . .
+
+# Build
+RUN make build GO_BUILD_ENV='CGO_ENABLED=1 GOOS=linux'
+
+FROM registry.access.redhat.com/ubi9/ubi:latest
+
+WORKDIR /
+USER root
+COPY --from=builder /workspace/bin/manager /manager
+USER 65532:65532
+
+ENTRYPOINT ["/manager"]
\ No newline at end of file

From 5bec43389f43dc7a71d644d4efa9d6f456f77fc4 Mon Sep 17 00:00:00 2001
From: ChristianZaccaria <christian.zaccaria.cz@gmail.com>
Date: Thu, 30 May 2024 16:14:03 +0100
Subject: [PATCH 48/48] CARRY: Add watcher to controller and webhook

---
 config/components/rbac/role.yaml     |   8 ++
 go.mod                               |   4 +-
 pkg/controller/jobframework/setup.go | 146 +++++++++++++++++++++++----
 3 files changed, 139 insertions(+), 19 deletions(-)

diff --git a/config/components/rbac/role.yaml b/config/components/rbac/role.yaml
index c7ed22eb2d..f634072dfd 100644
--- a/config/components/rbac/role.yaml
+++ b/config/components/rbac/role.yaml
@@ -92,6 +92,14 @@ rules:
   - list
   - update
   - watch
+- apiGroups:
+  - apiextensions.k8s.io
+  resources:
+  - customresourcedefinitions
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - autoscaling.x-k8s.io
   resources:
diff --git a/go.mod b/go.mod
index 43ba6dceaf..dfcc91b11d 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,9 @@ require (
 	github.com/prometheus/client_model v0.5.0
 	github.com/ray-project/kuberay/ray-operator v1.1.0-alpha.0
 	go.uber.org/zap v1.26.0
+	golang.org/x/exp v0.0.0-20230905200255-921286631fa9
 	k8s.io/api v0.29.2
+	k8s.io/apiextensions-apiserver v0.29.0
 	k8s.io/apimachinery v0.29.2
 	k8s.io/apiserver v0.29.1
 	k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240325113845-0130d33747bb
@@ -100,7 +102,6 @@ require (
 	go.uber.org/atomic v1.11.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	golang.org/x/crypto v0.18.0 // indirect
-	golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
 	golang.org/x/mod v0.14.0 // indirect
 	golang.org/x/net v0.20.0 // indirect
 	golang.org/x/oauth2 v0.12.0 // indirect
@@ -121,7 +122,6 @@ require (
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/apiextensions-apiserver v0.29.0 // indirect
 	k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect
 	k8s.io/kms v0.29.1 // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.28.0 // indirect
diff --git a/pkg/controller/jobframework/setup.go b/pkg/controller/jobframework/setup.go
index 39541ad3c4..2109ee7da5 100644
--- a/pkg/controller/jobframework/setup.go
+++ b/pkg/controller/jobframework/setup.go
@@ -22,19 +22,38 @@ import (
 	"fmt"
 	"os"
 
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/tools/cache"
+
 	"github.com/go-logr/logr"
+	"golang.org/x/exp/slices"
 	"k8s.io/apimachinery/pkg/api/meta"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
 
+	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
+	"k8s.io/apimachinery/pkg/watch"
+	retrywatch "k8s.io/client-go/tools/watch"
+
 	"sigs.k8s.io/kueue/pkg/controller/jobs/noop"
 )
 
+const (
+	pytorchjobAPI = "pytorchjobs.kubeflow.org"
+	rayclusterAPI = "rayclusters.ray.io"
+)
+
 var (
 	errFailedMappingResource = errors.New("restMapper failed mapping resource")
 )
 
+// +kubebuilder:rbac:groups="apiextensions.k8s.io",resources=customresourcedefinitions,verbs=get;list;watch
+
 // SetupControllers setups all controllers and webhooks for integrations.
 // When the platform developers implement a separate kueue-manager to manage the in-house custom jobs,
 // they can easily setup controllers and webhooks for the in-house custom jobs.
@@ -62,24 +81,14 @@ func SetupControllers(mgr ctrl.Manager, log logr.Logger, opts ...Option) error {
 			if err != nil {
 				return fmt.Errorf("%s: %w: %w", fwkNamePrefix, errFailedMappingResource, err)
 			}
-			if _, err = mgr.GetRESTMapper().RESTMapping(gvk.GroupKind(), gvk.Version); err != nil {
-				if !meta.IsNoMatchError(err) {
-					return fmt.Errorf("%s: %w", fwkNamePrefix, err)
-				}
-				logger.Info("No matching API in the server for job framework, skipped setup of controller and webhook")
+			if !isAPIAvailable(context.TODO(), mgr, rayclusterAPI) {
+				logger.Info("API not available, waiting for it to become available... - Skipping setup of controller and webhook")
+				waitForAPI(context.TODO(), logger, mgr, rayclusterAPI, func() {
+					setupComponents(mgr, logger, gvk, fwkNamePrefix, cb, opts...)
+				})
 			} else {
-				if err = cb.NewReconciler(
-					mgr.GetClient(),
-					mgr.GetEventRecorderFor(fmt.Sprintf("%s-%s-controller", name, options.ManagerName)),
-					opts...,
-				).SetupWithManager(mgr); err != nil {
-					return fmt.Errorf("%s: %w", fwkNamePrefix, err)
-				}
-				if err = cb.SetupWebhook(mgr, opts...); err != nil {
-					return fmt.Errorf("%s: unable to create webhook: %w", fwkNamePrefix, err)
-				}
-				logger.Info("Set up controller and webhook for job framework")
-				return nil
+				logger.Info("API is available, setting up components...")
+				setupComponents(mgr, logger, gvk, fwkNamePrefix, cb, opts...)
 			}
 		}
 		if err := noop.SetupWebhook(mgr, cb.JobType); err != nil {
@@ -89,6 +98,39 @@ func SetupControllers(mgr ctrl.Manager, log logr.Logger, opts ...Option) error {
 	})
 }
 
+func setupComponents(mgr ctrl.Manager, log logr.Logger, gvk schema.GroupVersionKind, fwkNamePrefix string, cb IntegrationCallbacks, opts ...Option) {
+    // Attempt to get the REST mapping for the GVK
+    if _, err := mgr.GetRESTMapper().RESTMapping(gvk.GroupKind(), gvk.Version); err != nil {
+        if !meta.IsNoMatchError(err) {
+            log.Error(err, fmt.Sprintf("%s: unable to get REST mapping", fwkNamePrefix))
+            return
+        }
+        log.Info("No matching API in the server for job framework, skipped setup of controller and webhook")
+    } else {
+        if err := setupControllerAndWebhook(mgr, gvk, fwkNamePrefix, cb, opts...); err != nil {
+            log.Error(err, "Failed to set up controller and webhook")
+        } else {
+            log.Info("Set up controller and webhook for job framework")
+        }
+    }
+}
+
+func setupControllerAndWebhook(mgr ctrl.Manager, gvk schema.GroupVersionKind, fwkNamePrefix string, cb IntegrationCallbacks, opts ...Option) error {
+    if err := cb.NewReconciler(
+        mgr.GetClient(),
+        mgr.GetEventRecorderFor(fmt.Sprintf("%s-%s-controller", gvk.Kind, "managerName")), // Ensure managerName is defined or fetched
+        opts...,
+    ).SetupWithManager(mgr); err != nil {
+        return fmt.Errorf("%s: %w", fwkNamePrefix, err)
+    }
+
+    if err := cb.SetupWebhook(mgr, opts...); err != nil {
+        return fmt.Errorf("%s: unable to create webhook: %w", fwkNamePrefix, err)
+    }
+
+    return nil
+}
+
 // SetupIndexes setups the indexers for integrations.
 // When the platform developers implement a separate kueue-manager to manage the in-house custom jobs,
 // they can easily setup indexers for the in-house custom jobs.
@@ -105,3 +147,73 @@ func SetupIndexes(ctx context.Context, indexer client.FieldIndexer, opts ...Opti
 		return nil
 	})
 }
+
+func isAPIAvailable(ctx context.Context, mgr ctrl.Manager, apiName string) bool {
+	crdClient, err := apiextensionsclientset.NewForConfig(mgr.GetConfig())
+	exitOnError(err, "unable to create CRD client")
+
+	crdList, err := crdClient.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{})
+	exitOnError(err, "unable to list CRDs")
+
+	return slices.ContainsFunc(crdList.Items, func(crd apiextensionsv1.CustomResourceDefinition) bool {
+		return crd.Name == apiName
+	})
+}
+
+func waitForAPI(ctx context.Context, log logr.Logger, mgr ctrl.Manager, apiName string, action func()) {
+	crdClient, err := apiextensionsclientset.NewForConfig(mgr.GetConfig())
+	exitOnError(err, "unable to create CRD client")
+
+	crdList, err := crdClient.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{})
+	exitOnError(err, "unable to list CRDs")
+
+	// If API is already available, just invoke action
+	if slices.ContainsFunc(crdList.Items, func(crd apiextensionsv1.CustomResourceDefinition) bool {
+		return crd.Name == apiName
+	}) {
+		action()
+		return
+	}
+
+	// Wait for the API to become available then invoke action
+	log.Info(fmt.Sprintf("API %v not available, setting up retry watcher", apiName))
+	retryWatcher, err := retrywatch.NewRetryWatcher(crdList.ResourceVersion, &cache.ListWatch{
+		ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
+			return crdClient.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{})
+		},
+		WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
+			return crdClient.ApiextensionsV1().CustomResourceDefinitions().Watch(ctx, metav1.ListOptions{})
+		},
+	})
+	exitOnError(err, "unable to create retry watcher")
+
+	defer retryWatcher.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case event := <-retryWatcher.ResultChan():
+			switch event.Type {
+			case watch.Error:
+				exitOnError(apierrors.FromObject(event.Object), fmt.Sprintf("error watching for API %v", apiName))
+
+			case watch.Added, watch.Modified:
+				if crd := event.Object.(*apiextensionsv1.CustomResourceDefinition); crd.Name == apiName &&
+					slices.ContainsFunc(crd.Status.Conditions, func(condition apiextensionsv1.CustomResourceDefinitionCondition) bool {
+						return condition.Type == apiextensionsv1.Established && condition.Status == apiextensionsv1.ConditionTrue
+					}) {
+					log.Info(fmt.Sprintf("API %v installed, invoking deferred action", apiName))
+					action()
+					return
+				}
+			}
+		}
+	}
+}
+
+func exitOnError(err error, msg string) {
+	if err != nil {
+		fmt.Sprint(err, msg)
+		os.Exit(1)
+	}
+}