Skip to content

Commit

Permalink
PWX-35477 : Support Openshift Prometheus for portworx monitoring on O…
Browse files Browse the repository at this point in the history
…CP 4.14 (#1410)

* Create secret for autopilot auth token

* Register openshift route in operator

* Update autopilot pod to use auth-token secret

* mount AutopilotOCPToken as env variable in autopilot pod

* mount cacert as env variable in autopilot pod

* Mount secret and token as volume in autopilot pod

* Support user-workload for ocp version gte 4.14

* Add volume mounts for ocp 4.14

* Fix multiple time volume mounting issue

* Add UTs for supported version of openshift

* staticcheck fix

* Add UTs for autopilot deploymengt on openshift 4.14

* Add UTs for autopilot uninstall on openshift 4.14

* test commit to fix build

* addressed PR comments

* fixed hard-coded version

* Fix the build as the download url was broken

Signed-off-by: Piyush Nimbalkar <[email protected]>

* fix test failure

---------

Signed-off-by: Piyush Nimbalkar <[email protected]>
Co-authored-by: ezhang-px <[email protected]>
Co-authored-by: Piyush Nimbalkar <[email protected]>
  • Loading branch information
3 people authored Feb 1, 2024
1 parent 956b239 commit 27e72d5
Show file tree
Hide file tree
Showing 25 changed files with 5,699 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ getgrafanaconfigs:
cp deploy/grafana/* bin/configs/

getconfigs: cleanconfigs getccmconfigs getpluginconfigs getgrafanaconfigs getwindowsconfig
wget -q '$(PX_DOC_HOST)/samples/k8s/pxc/portworx-prometheus-rule.yaml' -P bin/configs --no-check-certificate
wget -q '$(PX_DOC_HOST)/samples/portworx-enterprise/k8s/pxc/portworx-prometheus-rule.yaml' -P bin/configs --no-check-certificate
wget -q '$(PROMETHEUS_OPERATOR_CRD_URL_PREFIX)/crd-alertmanagerconfigs.yaml' -O bin/configs/prometheus-crd-alertmanagerconfigs.yaml
wget -q '$(PROMETHEUS_OPERATOR_CRD_URL_PREFIX)/crd-alertmanagers.yaml' -O bin/configs/prometheus-crd-alertmanagers.yaml
wget -q '$(PROMETHEUS_OPERATOR_CRD_URL_PREFIX)/crd-podmonitors.yaml' -O bin/configs/prometheus-crd-podmonitors.yaml
Expand Down
5 changes: 5 additions & 0 deletions cmd/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/libopenstorage/operator/pkg/version"
ocp_configv1 "github.com/openshift/api/config/v1"
consolev1 "github.com/openshift/api/console/v1"
routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli"
Expand Down Expand Up @@ -228,6 +229,10 @@ func run(c *cli.Context) {
log.Fatalf("Failed to add cluster API resources to the scheme: %v", err)
}

if err := routev1.AddToScheme(mgr.GetScheme()); err != nil {
log.Fatalf("Failed to add cluster API resources to the scheme: %v", err)
}

// Create Service and ServiceMonitor objects to expose the metrics to Prometheus
metricsPort := c.Int(flagMetricsPort)
metricsServicePorts := []v1.ServicePort{
Expand Down
148 changes: 144 additions & 4 deletions drivers/storage/portworx/component/autopilot.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"sort"
"strings"

"github.com/sirupsen/logrus"

"github.com/hashicorp/go-version"
pxutil "github.com/libopenstorage/operator/drivers/storage/portworx/util"
corev1 "github.com/libopenstorage/operator/pkg/apis/core/v1"
Expand Down Expand Up @@ -45,7 +47,11 @@ const (
AutopilotDefaultProviderEndpoint = "http://px-prometheus:9090"
// AutopilotDefaultReviewersKey is a key for default reviewers array in gitops config map
AutopilotDefaultReviewersKey = "defaultReviewers"
defaultAutopilotCPU = "0.1"
// OCPPrometheusUserWorkloadSecretPrefix name of OCP user-workload Prometheus secret
OCPPrometheusUserWorkloadSecretPrefix = "prometheus-user-workload-token"
// Autopilot Secret name for prometheus-user-workload-token
AutopilotSecretName = "autopilot-prometheus-auth"
defaultAutopilotCPU = "0.1"
)

var (
Expand Down Expand Up @@ -80,12 +86,49 @@ var (
},
},
}

openshiftDeploymentVolume = []corev1.VolumeSpec{
{
Name: "token-volume",
MountPath: "/var/local/secrets",
ReadOnly: true,
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: AutopilotSecretName,
Items: []v1.KeyToPath{
{
Key: "token",
Path: "token",
},
},
},
},
},
{
Name: "ca-cert-volume",
MountPath: "/etc/ssl/certs",
ReadOnly: true,
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: AutopilotSecretName,
Items: []v1.KeyToPath{
{
Key: "cacert",
Path: "ca-certificates.crt",
},
},
},
},
},
}
)

type autopilot struct {
isCreated bool
k8sClient client.Client
k8sVersion version.Version
isCreated bool
k8sClient client.Client
k8sVersion version.Version
isUserWorkloadSupported *bool
isVolumeMounted bool
}

func (c *autopilot) Name() string {
Expand Down Expand Up @@ -128,6 +171,11 @@ func (c *autopilot) Reconcile(cluster *corev1.StorageCluster) error {
if err := c.createClusterRoleBinding(cluster.Namespace); err != nil {
return err
}
if c.isOCPUserWorkloadSupported() {
if err := c.createSecret(cluster.Namespace, ownerRef); err != nil {
return err
}
}
if err := c.createDeployment(cluster, ownerRef); err != nil {
return err
}
Expand All @@ -151,12 +199,20 @@ func (c *autopilot) Delete(cluster *corev1.StorageCluster) error {
if err := k8sutil.DeleteDeployment(c.k8sClient, AutopilotDeploymentName, cluster.Namespace, *ownerRef); err != nil {
return err
}
if c.isOCPUserWorkloadSupported() {
if err := k8sutil.DeleteSecret(c.k8sClient, AutopilotSecretName, cluster.Namespace, *ownerRef); err != nil {
return err
}
}

c.MarkDeleted()
return nil
}

func (c *autopilot) MarkDeleted() {
c.isCreated = false
c.isUserWorkloadSupported = nil
c.isVolumeMounted = false
}

func (c *autopilot) createConfigMap(
Expand Down Expand Up @@ -248,6 +304,30 @@ func (c *autopilot) createConfigMap(
return err
}

func (c *autopilot) createSecret(clusterNamespace string, ownerRef *metav1.OwnerReference) error {

token, cert, err := c.getPrometheusTokenAndCert()
if err != nil {
return err
}

return k8sutil.CreateOrUpdateSecret(
c.k8sClient,
&v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: AutopilotSecretName,
Namespace: clusterNamespace,
OwnerReferences: []metav1.OwnerReference{*ownerRef},
},
Data: map[string][]byte{
"token": []byte(token),
"cacert": []byte(cert),
},
},
ownerRef,
)
}

func (c *autopilot) createServiceAccount(
clusterNamespace string,
ownerRef *metav1.OwnerReference,
Expand Down Expand Up @@ -643,6 +723,12 @@ func (c *autopilot) getDesiredVolumesAndMounts(
cluster *corev1.StorageCluster,
) ([]v1.Volume, []v1.VolumeMount) {
volumeSpecs := make([]corev1.VolumeSpec, 0)

if c.isOCPUserWorkloadSupported() && !c.isVolumeMounted {
c.isVolumeMounted = true
autopilotDeploymentVolumes = append(autopilotDeploymentVolumes, openshiftDeploymentVolume...)
}

for _, v := range autopilotDeploymentVolumes {
vCopy := v.DeepCopy()
volumeSpecs = append(volumeSpecs, *vCopy)
Expand All @@ -659,6 +745,60 @@ func (c *autopilot) getDesiredVolumesAndMounts(
return volumes, volumeMounts
}

func (c *autopilot) getPrometheusTokenAndCert() (encodedToken, caCert string, err error) {
secrets := &v1.SecretList{}
err = c.k8sClient.List(
context.TODO(),
secrets,
client.InNamespace("openshift-user-workload-monitoring"),
)

if err != nil {
return "", "", err
}

// Iterate through the secrets list to process prometheus-user-workload-token secret
var secretFound bool
for _, secret := range secrets.Items {

if strings.HasPrefix(secret.Name, OCPPrometheusUserWorkloadSecretPrefix) {
secretFound = true
// Retrieve the token data from the secret as []byte
tokenBytes, ok := secret.Data["token"]
if !ok {
return encodedToken, caCert, fmt.Errorf("token not found in secret")
}

// Retrieve the ca.cert data from the secret as []byte
cert, ok := secret.Data["ca.crt"]
if !ok {
return encodedToken, caCert, fmt.Errorf("cert not found in secret")
}

encodedToken = string(tokenBytes)
caCert = string(cert)
break
}
}

if !secretFound {
return "", "", fmt.Errorf("prometheus-user-workload-token not found. Please make sure that user workload monitoring is enabled in openshift")
}
return encodedToken, caCert, nil
}

func (c *autopilot) isOCPUserWorkloadSupported() bool {
if c.isUserWorkloadSupported == nil {
isSupported, err := pxutil.IsSupportedOCPVersion(c.k8sClient, pxutil.OpenshiftPrometheusSupportedVersion)
if err != nil {
logrus.Errorf("Failed to check if OCP user workload monitoring is supported: %v", err)
return false
}
c.isUserWorkloadSupported = &isSupported
}
return *c.isUserWorkloadSupported
}

// RegisterAutopilotComponent registers the Autopilot component
func RegisterAutopilotComponent() {
Register(AutopilotComponentName, &autopilot{})
Expand Down
8 changes: 4 additions & 4 deletions drivers/storage/portworx/component/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func (p *plugin) IsEnabled(cluster *corev1.StorageCluster) bool {
}

for _, v := range operator.Status.Versions {
if v.Name == OpenshiftAPIServer && isVersionSupported(v.Version) {
if v.Name == OpenshiftAPIServer && isVersionSupported(v.Version, OpenshiftSupportedVersion) {
p.isPluginSupported = boolPtr(true)
return true
}
Expand Down Expand Up @@ -349,14 +349,14 @@ func updateDataIfNginxConfigMap(cm *v1.ConfigMap, storageNs string) {
}
}

func isVersionSupported(v string) bool {
targetVersion, err := version.NewVersion(OpenshiftSupportedVersion)
func isVersionSupported(current, target string) bool {
targetVersion, err := version.NewVersion(target)
if err != nil {
logrus.Errorf("Error during parsing version : %s ", err)
return false
}

currentVersion, err := version.NewVersion(v)
currentVersion, err := version.NewVersion(current)
if err != nil {
logrus.Errorf("Error during parsing version : %s ", err)
return false
Expand Down
Loading

0 comments on commit 27e72d5

Please sign in to comment.