Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding felix service metric port #3534

Merged
merged 8 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 26 additions & 16 deletions pkg/controller/installation/core_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ const (
// The default port used by calico/node to report Calico Enterprise internal metrics.
// This is separate from the calico/node prometheus metrics port, which is user configurable.
defaultNodeReporterPort = 9081

defaultFelixMetricsDefaultPort = 9091
)

const InstallationName string = "calico"
Expand Down Expand Up @@ -1135,19 +1137,25 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile
nodeReporterMetricsPort := defaultNodeReporterPort
var nodePrometheusTLS certificatemanagement.KeyPairInterface
calicoVersion := components.CalicoRelease

felixPrometheusMetricsPort := defaultFelixMetricsDefaultPort

if instance.Spec.Variant == operator.TigeraSecureEnterprise {

// Determine the port to use for nodeReporter metrics.
if felixConfiguration.Spec.PrometheusReporterPort != nil {
nodeReporterMetricsPort = *felixConfiguration.Spec.PrometheusReporterPort
}

if nodeReporterMetricsPort == 0 {
err := errors.New("felixConfiguration prometheusReporterPort=0 not supported")
r.status.SetDegraded(operator.InvalidConfigurationError, "invalid metrics port", err, reqLogger)
return reconcile.Result{}, err
}

if felixConfiguration.Spec.PrometheusMetricsPort != nil {
felixPrometheusMetricsPort = *felixConfiguration.Spec.PrometheusMetricsPort
}

nodePrometheusTLS, err = certificateManager.GetOrCreateKeyPair(r.client, render.NodePrometheusTLSServerSecret, common.OperatorNamespace(), dns.GetServiceDNSNames(render.CalicoNodeMetricsService, common.CalicoNamespace, r.clusterDomain))
if err != nil {
r.status.SetDegraded(operator.ResourceCreateError, "Error creating TLS certificate", err, reqLogger)
Expand Down Expand Up @@ -1349,21 +1357,23 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile

// Build a configuration for rendering calico/node.
nodeCfg := render.NodeConfiguration{
K8sServiceEp: k8sapi.Endpoint,
Installation: &instance.Spec,
IPPools: crdPoolsToOperator(currentPools.Items),
LogCollector: logCollector,
BirdTemplates: birdTemplates,
TLS: typhaNodeTLS,
ClusterDomain: r.clusterDomain,
NodeReporterMetricsPort: nodeReporterMetricsPort,
BGPLayouts: bgpLayout,
NodeAppArmorProfile: nodeAppArmorProfile,
MigrateNamespaces: needNsMigration,
CanRemoveCNIFinalizer: canRemoveCNI,
PrometheusServerTLS: nodePrometheusTLS,
FelixHealthPort: *felixConfiguration.Spec.HealthPort,
BindMode: bgpConfiguration.Spec.BindMode,
K8sServiceEp: k8sapi.Endpoint,
Installation: &instance.Spec,
IPPools: crdPoolsToOperator(currentPools.Items),
LogCollector: logCollector,
BirdTemplates: birdTemplates,
TLS: typhaNodeTLS,
ClusterDomain: r.clusterDomain,
NodeReporterMetricsPort: nodeReporterMetricsPort,
BGPLayouts: bgpLayout,
NodeAppArmorProfile: nodeAppArmorProfile,
MigrateNamespaces: needNsMigration,
CanRemoveCNIFinalizer: canRemoveCNI,
PrometheusServerTLS: nodePrometheusTLS,
FelixHealthPort: *felixConfiguration.Spec.HealthPort,
BindMode: bgpConfiguration.Spec.BindMode,
FelixPrometheusMetricsEnabled: utils.IsFelixPrometheusMetricsEnabled(felixConfiguration),
FelixPrometheusMetricsPort: felixPrometheusMetricsPort,
}
components = append(components, render.Node(&nodeCfg))

Expand Down
35 changes: 24 additions & 11 deletions pkg/controller/monitor/monitor_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"fmt"
"reflect"

crdv1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -143,6 +145,10 @@ func add(_ manager.Manager, c ctrlruntime.Controller) error {
return fmt.Errorf("monitor-controller failed to watch ManagementClusterConnection resource: %w", err)
}

if err = c.WatchObject(&crdv1.FelixConfiguration{}, &handler.EnqueueRequestForObject{}); err != nil {
return fmt.Errorf("monitor-controller failed to watch FelixConfiguration resource: %w", err)
}

for _, secret := range []string{
certificatemanagement.CASecretName,
esmetrics.ElasticsearchMetricsServerTLSSecret,
Expand Down Expand Up @@ -373,18 +379,25 @@ func (r *ReconcileMonitor) Reconcile(ctx context.Context, request reconcile.Requ
return reconcile.Result{}, err
}

felixConfiguration, err := utils.GetFelixConfiguration(ctx, r.client)
if err != nil {
r.status.SetDegraded(operatorv1.ResourceReadError, "Error retrieving Felix configuration", err, reqLogger)
return reconcile.Result{}, err
}

monitorCfg := &monitor.Config{
Monitor: instance.Spec,
Installation: install,
PullSecrets: pullSecrets,
AlertmanagerConfigSecret: alertmanagerConfigSecret,
KeyValidatorConfig: keyValidatorConfig,
ServerTLSSecret: serverTLSSecret,
ClientTLSSecret: clientTLSSecret,
ClusterDomain: r.clusterDomain,
TrustedCertBundle: trustedBundle,
OpenShift: r.provider.IsOpenShift(),
KubeControllerPort: kubeControllersMetricsPort,
Monitor: instance.Spec,
Installation: install,
PullSecrets: pullSecrets,
AlertmanagerConfigSecret: alertmanagerConfigSecret,
KeyValidatorConfig: keyValidatorConfig,
ServerTLSSecret: serverTLSSecret,
ClientTLSSecret: clientTLSSecret,
ClusterDomain: r.clusterDomain,
TrustedCertBundle: trustedBundle,
OpenShift: r.provider.IsOpenShift(),
KubeControllerPort: kubeControllersMetricsPort,
FelixPrometheusMetricsEnabled: utils.IsFelixPrometheusMetricsEnabled(felixConfiguration),
}

// Render prometheus component
Expand Down
16 changes: 16 additions & 0 deletions pkg/controller/utils/felix_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,19 @@ func PatchFelixConfiguration(ctx context.Context, c client.Client, patchFn func(

return fc, nil
}

func GetFelixConfiguration(ctx context.Context, c client.Client) (*crdv1.FelixConfiguration, error) {
fc := &crdv1.FelixConfiguration{}
err := c.Get(ctx, types.NamespacedName{Name: "default"}, fc)
if err != nil && !errors.IsNotFound(err) {
return nil, fmt.Errorf("unable to read FelixConfiguration: %w", err)
}
return fc, nil
}

func IsFelixPrometheusMetricsEnabled(felixConfiguration *crdv1.FelixConfiguration) bool {
if felixConfiguration.Spec.PrometheusMetricsEnabled != nil {
return *felixConfiguration.Spec.PrometheusMetricsEnabled
}
return false
}
71 changes: 42 additions & 29 deletions pkg/render/monitor/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,18 @@ func MonitorPolicy(cfg *Config) render.Component {

// Config contains all the config information needed to render the Monitor component.
type Config struct {
Monitor operatorv1.MonitorSpec
Installation *operatorv1.InstallationSpec
PullSecrets []*corev1.Secret
AlertmanagerConfigSecret *corev1.Secret
KeyValidatorConfig authentication.KeyValidatorConfig
ServerTLSSecret certificatemanagement.KeyPairInterface
ClientTLSSecret certificatemanagement.KeyPairInterface
ClusterDomain string
TrustedCertBundle certificatemanagement.TrustedBundle
OpenShift bool
KubeControllerPort int
Monitor operatorv1.MonitorSpec
Installation *operatorv1.InstallationSpec
PullSecrets []*corev1.Secret
AlertmanagerConfigSecret *corev1.Secret
KeyValidatorConfig authentication.KeyValidatorConfig
ServerTLSSecret certificatemanagement.KeyPairInterface
ClientTLSSecret certificatemanagement.KeyPairInterface
ClusterDomain string
TrustedCertBundle certificatemanagement.TrustedBundle
OpenShift bool
KubeControllerPort int
FelixPrometheusMetricsEnabled bool
}

type monitorComponent struct {
Expand Down Expand Up @@ -806,6 +807,35 @@ func (mc *monitorComponent) prometheusRule() *monitoringv1.PrometheusRule {
}

func (mc *monitorComponent) serviceMonitorCalicoNode() *monitoringv1.ServiceMonitor {
endpoints := []monitoringv1.Endpoint{
{
HonorLabels: true,
Interval: "5s",
Port: "calico-metrics-port",
ScrapeTimeout: "5s",
Scheme: "https",
TLSConfig: mc.tlsConfig(render.CalicoNodeMetricsService),
},
{
HonorLabels: true,
Interval: "5s",
Port: "calico-bgp-metrics-port",
ScrapeTimeout: "5s",
Scheme: "https",
TLSConfig: mc.tlsConfig(render.CalicoNodeMetricsService),
},
}

if mc.cfg.FelixPrometheusMetricsEnabled {
endpoints = append(endpoints, monitoringv1.Endpoint{
HonorLabels: true,
Interval: "5s",
Port: "felix-metrics-port",
ScrapeTimeout: "5s",
Scheme: "http",
})
}

return &monitoringv1.ServiceMonitor{
TypeMeta: metav1.TypeMeta{Kind: monitoringv1.ServiceMonitorsKind, APIVersion: MonitoringAPIVersion},
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -824,24 +854,7 @@ func (mc *monitorComponent) serviceMonitorCalicoNode() *monitoringv1.ServiceMoni
},
},
NamespaceSelector: monitoringv1.NamespaceSelector{MatchNames: []string{"calico-system"}},
Endpoints: []monitoringv1.Endpoint{
{
HonorLabels: true,
Interval: "5s",
Port: "calico-metrics-port",
ScrapeTimeout: "5s",
Scheme: "https",
TLSConfig: mc.tlsConfig(render.CalicoNodeMetricsService),
},
{
HonorLabels: true,
Interval: "5s",
Port: "calico-bgp-metrics-port",
ScrapeTimeout: "5s",
Scheme: "https",
TLSConfig: mc.tlsConfig(render.CalicoNodeMetricsService),
},
},
Endpoints: endpoints,
},
}
}
Expand Down
26 changes: 26 additions & 0 deletions pkg/render/monitor/monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,32 @@ var _ = Describe("monitor rendering tests", func() {
},
}))
})

It("Should render serviceMonitor with felix endpoint if FelixPrometheusMetricsEnabled", func() {
cfg.FelixPrometheusMetricsEnabled = true
component := monitor.Monitor(cfg)
toCreate, _ := component.Objects()
servicemonitorObj, ok := rtest.GetResource(toCreate, monitor.CalicoNodeMonitor, common.TigeraPrometheusNamespace, "monitoring.coreos.com", "v1", monitoringv1.ServiceMonitorsKind).(*monitoringv1.ServiceMonitor)
Expect(ok).To(BeTrue())

Expect(servicemonitorObj.Spec.Endpoints).To(HaveLen(3))
Expect(servicemonitorObj.Spec.Endpoints[0].HonorLabels).To(BeTrue())
Expect(servicemonitorObj.Spec.Endpoints[0].Interval).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[0].Port).To(Equal("calico-metrics-port"))
Expect(servicemonitorObj.Spec.Endpoints[0].ScrapeTimeout).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[0].Scheme).To(Equal("https"))
Expect(servicemonitorObj.Spec.Endpoints[1].HonorLabels).To(BeTrue())
Expect(servicemonitorObj.Spec.Endpoints[1].Interval).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[1].Port).To(Equal("calico-bgp-metrics-port"))
Expect(servicemonitorObj.Spec.Endpoints[1].ScrapeTimeout).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[1].Scheme).To(Equal("https"))
Expect(servicemonitorObj.Spec.Endpoints[2].HonorLabels).To(BeTrue())
Expect(servicemonitorObj.Spec.Endpoints[2].Interval).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[2].Port).To(Equal("felix-metrics-port"))
Expect(servicemonitorObj.Spec.Endpoints[2].ScrapeTimeout).To(BeEquivalentTo("5s"))
Expect(servicemonitorObj.Spec.Endpoints[2].Scheme).To(Equal("http"))

})
})

type resource struct {
Expand Down
45 changes: 31 additions & 14 deletions pkg/render/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ type NodeConfiguration struct {
// The bindMode read from the default BGPConfiguration. Used to trigger rolling updates
// should this value change.
BindMode string

FelixPrometheusMetricsEnabled bool

FelixPrometheusMetricsPort int
}

// Node creates the node daemonset and other resources for the daemonset to operate normally.
Expand Down Expand Up @@ -1717,6 +1721,32 @@ func (c *nodeComponent) nodeLivenessReadinessProbes() (*corev1.Probe, *corev1.Pr
// This service is used internally by Calico Enterprise and is separate from general
// Prometheus metrics which are user-configurable.
func (c *nodeComponent) nodeMetricsService() *corev1.Service {
ports := []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(c.cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: nodeBGPReporterPort,
TargetPort: intstr.FromInt(int(nodeBGPReporterPort)),
Protocol: corev1.ProtocolTCP,
},
}

if c.cfg.FelixPrometheusMetricsEnabled {
felixMetricsPort := int32(c.cfg.FelixPrometheusMetricsPort)

ports = append(ports, corev1.ServicePort{
Name: "felix-metrics-port",
Port: felixMetricsPort,
TargetPort: intstr.FromInt(int(felixMetricsPort)),
Protocol: corev1.ProtocolTCP,
})
}

return &corev1.Service{
TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"},
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -1731,20 +1761,7 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service {
// a huge set of iptables rules for this service since there's an instance
// on every node.
ClusterIP: "None",
Ports: []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(c.cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: nodeBGPReporterPort,
TargetPort: intstr.FromInt(int(nodeBGPReporterPort)),
Protocol: corev1.ProtocolTCP,
},
},
Ports: ports,
},
}
}
Expand Down
Loading
Loading