Skip to content

Commit

Permalink
[PWX-27401] Disable metrics collector if it's not already running
Browse files Browse the repository at this point in the history
  • Loading branch information
pure-jliao committed Oct 14, 2022
1 parent 91a1339 commit f766bb2
Show file tree
Hide file tree
Showing 3 changed files with 321 additions and 0 deletions.
37 changes: 37 additions & 0 deletions drivers/storage/portworx/component/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ type telemetry struct {
isDeploymentRegistrationServiceCreated bool
isDaemonSetTelemetryPhonehonmeCreated bool
usePxProxy bool
reconcileMetricsCollector *bool
}

func (t *telemetry) Name() string {
Expand Down Expand Up @@ -157,6 +158,9 @@ func (t *telemetry) Reconcile(cluster *corev1.StorageCluster) error {
if err := t.setTelemetryCertOwnerRef(cluster, ownerRef); err != nil {
return err
}
if err := t.shouldReconcileMetricsCollector(cluster); err != nil {
return err
}
t.isCCMGoSupported = pxutil.IsCCMGoSupported(pxutil.GetPortworxVersion(cluster))
if t.isCCMGoSupported {
return t.reconcileCCMGo(cluster, ownerRef)
Expand Down Expand Up @@ -404,10 +408,43 @@ func (t *telemetry) deleteCCMGoPhonehomeCluster(
return nil
}

// PWX-27401 only reconcile metrics collector if it's already running, revert once collector memory issue got resolved
func (t *telemetry) shouldReconcileMetricsCollector(
cluster *corev1.StorageCluster,
) error {
if t.reconcileMetricsCollector != nil {
return nil
}
// Check if collector V1 or V2 deployment exists
existingDeployment := &appsv1.Deployment{}
for _, deploymentName := range []string{CollectorDeploymentName, DeploymentNameTelemetryCollectorV2} {
err := t.k8sClient.Get(
context.TODO(),
types.NamespacedName{
Name: deploymentName,
Namespace: cluster.Namespace,
},
existingDeployment,
)
if err == nil {
t.reconcileMetricsCollector = boolPtr(true)
return nil
} else if !errors.IsNotFound(err) {
return err
}
}
t.reconcileMetricsCollector = boolPtr(false)
return nil
}

func (t *telemetry) reconcileCCMGoTelemetryCollectorV2(
cluster *corev1.StorageCluster,
ownerRef *metav1.OwnerReference,
) error {
if t.reconcileMetricsCollector == nil || !*t.reconcileMetricsCollector {
return nil
}

// Delete metrics collector V1 if exists
if err := t.deleteMetricsCollectorV1(cluster.Namespace, ownerRef); err != nil {
return err
Expand Down
4 changes: 4 additions & 0 deletions drivers/storage/portworx/component/telemetry_java.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ func (t *telemetry) deployMetricsCollectorV1(
cluster *corev1.StorageCluster,
ownerRef *metav1.OwnerReference,
) error {
if t.reconcileMetricsCollector == nil || !*t.reconcileMetricsCollector {
return nil
}

if err := t.createCollectorServiceAccount(cluster.Namespace, ownerRef); err != nil {
return err
}
Expand Down
Loading

0 comments on commit f766bb2

Please sign in to comment.