diff --git a/ray-operator/apis/ray/v1/raycluster_types.go b/ray-operator/apis/ray/v1/raycluster_types.go index e7fe2f25fe..a317a3b479 100644 --- a/ray-operator/apis/ray/v1/raycluster_types.go +++ b/ray-operator/apis/ray/v1/raycluster_types.go @@ -169,6 +169,7 @@ type RayClusterConditionType string // Custom Reason for RayClusterCondition const ( AllPodRunningAndReadyFirstTime = "AllPodRunningAndReadyFirstTime" + RayClusterPodsProvisioning = "RayClusterPodsProvisioning" HeadPodNotFound = "HeadPodNotFound" HeadPodRunningAndReady = "HeadPodRunningAndReady" // UnknownReason says that the reason for the condition is unknown. diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index 56633bc91f..af7cb03e09 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -1210,9 +1210,9 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra meta.SetStatusCondition(&newInstance.Status.Conditions, headPodReadyCondition) } - if meta.FindStatusCondition(newInstance.Status.Conditions, string(rayv1.RayClusterProvisioned)) == nil { + if !meta.IsStatusConditionTrue(newInstance.Status.Conditions, string(rayv1.RayClusterProvisioned)) { // RayClusterProvisioned indicates whether all Ray Pods are ready when the RayCluster is first created. - // Note RayClusterProvisioned StatusCondition will not be added to Raycluster until all Ray Pods are ready for the first time. + // Note RayClusterProvisioned StatusCondition will not be updated after all Ray Pods are ready for the first time. if utils.CheckAllPodsRunning(ctx, runtimePods) { meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ Type: string(rayv1.RayClusterProvisioned), @@ -1220,6 +1220,13 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra Reason: rayv1.AllPodRunningAndReadyFirstTime, Message: "All Ray Pods are ready for the first time", }) + } else { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterProvisioned), + Status: metav1.ConditionFalse, + Reason: rayv1.RayClusterPodsProvisioning, + Message: "RayCluster Pods are being provisioned for first time", + }) } } diff --git a/ray-operator/controllers/ray/raycluster_controller_test.go b/ray-operator/controllers/ray/raycluster_controller_test.go index 9699ef149a..bdeca65562 100644 --- a/ray-operator/controllers/ray/raycluster_controller_test.go +++ b/ray-operator/controllers/ray/raycluster_controller_test.go @@ -898,15 +898,15 @@ var _ = Context("Inside the default namespace", func() { time.Second*3, time.Millisecond*500).Should(BeTrue()) By("Check RayCluster RayClusterProvisioned condition is false") - // But the worker pod is not ready yet, RayClusterProvisioned condition should still be absent. + // But the worker pod is not ready yet, RayClusterProvisioned condition should be false. Consistently( - func() *metav1.Condition { + func() bool { if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { - return nil + return false } - return meta.FindStatusCondition(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + return meta.IsStatusConditionFalse(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) }, - time.Second*3, time.Millisecond*500).Should(BeNil()) + time.Second*3, time.Millisecond*500).Should(BeTrue()) By("Update the worker pod to Running") workerPod.Status.Phase = corev1.PodRunning diff --git a/ray-operator/controllers/ray/raycluster_controller_unit_test.go b/ray-operator/controllers/ray/raycluster_controller_unit_test.go index a944f4ac6e..e0f49f8d00 100644 --- a/ray-operator/controllers/ray/raycluster_controller_unit_test.go +++ b/ray-operator/controllers/ray/raycluster_controller_unit_test.go @@ -1829,7 +1829,9 @@ func TestRayClusterProvisionedCondition(t *testing.T) { _ = fakeClient.Status().Update(ctx, headPod) _ = fakeClient.Status().Update(ctx, workerPod) testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) - assert.Nil(t, meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))) + rayClusterProvisionedCondition := meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionFalse) + assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.RayClusterPodsProvisioning) // After a while, all Ray Pods are ready for the first time, RayClusterProvisioned condition should be added and set to True. headPod.Status = ReadyStatus @@ -1837,7 +1839,7 @@ func TestRayClusterProvisionedCondition(t *testing.T) { _ = fakeClient.Status().Update(ctx, headPod) _ = fakeClient.Status().Update(ctx, workerPod) testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) - rayClusterProvisionedCondition := meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue) assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime)